MapComplete/scripts/generateStats.ts

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

217 lines
8.3 KiB
TypeScript
Raw Permalink Normal View History

import known_layers from "../src/assets/generated/known_layers.json"
import { LayerConfigJson } from "../src/Models/ThemeConfig/Json/LayerConfigJson"
import { TagUtils } from "../src/Logic/Tags/TagUtils"
import { Utils } from "../src/Utils"
2025-01-08 14:21:07 +01:00
import { copyFileSync, existsSync, readFileSync, writeFileSync } from "fs"
2022-03-13 01:27:19 +01:00
import ScriptUtils from "./ScriptUtils"
import TagRenderingConfig from "../src/Models/ThemeConfig/TagRenderingConfig"
import { And } from "../src/Logic/Tags/And"
2024-05-13 17:21:40 +02:00
import Script from "./Script"
import NameSuggestionIndex from "../src/Logic/Web/NameSuggestionIndex"
2024-06-01 12:48:22 +02:00
import TagInfo from "../src/Logic/Web/TagInfo"
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
class Utilities {
2024-06-16 16:06:26 +02:00
static mapValues<X extends string | number, T, TOut>(
record: Record<X, T>,
2025-01-18 00:30:06 +01:00
f: (t: T) => TOut
2024-06-16 16:06:26 +02:00
): Record<X, TOut> {
2024-05-13 17:21:40 +02:00
const newR = <Record<X, TOut>>{}
for (const x in record) {
newR[x] = f(record[x])
}
return newR
}
}
2024-05-23 04:42:26 +02:00
2024-05-13 17:21:40 +02:00
class GenerateStats extends Script {
async createOptimizationFile(includeTags = true) {
ScriptUtils.fixUtils()
2025-01-08 14:21:07 +01:00
const layers = <LayerConfigJson[]>known_layers["layers"]
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
const keysAndTags = new Map<string, Set<string>>()
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
for (const layer of layers) {
if (layer.source["geoJson"] !== undefined && !layer.source["isOsmCache"]) {
continue
}
if (layer.source == null || typeof layer.source === "string") {
continue
}
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
const sourcesList = [TagUtils.Tag(layer.source["osmTags"])]
if (layer?.title) {
sourcesList.push(...new TagRenderingConfig(layer.title).usedTags())
}
2024-05-13 17:21:40 +02:00
const sources = new And(sourcesList)
const allKeys = sources.usedKeys()
for (const key of allKeys) {
if (!keysAndTags.has(key)) {
keysAndTags.set(key, new Set<string>())
}
2022-03-13 01:27:19 +01:00
}
2024-05-13 17:21:40 +02:00
const allTags = includeTags ? sources.usedTags() : []
for (const tag of allTags) {
if (!keysAndTags.has(tag.key)) {
keysAndTags.set(tag.key, new Set<string>())
}
keysAndTags.get(tag.key).add(tag.value)
2022-03-13 01:27:19 +01:00
}
}
2024-05-13 17:21:40 +02:00
const keyTotal = new Map<string, number>()
const tagTotal = new Map<string, Map<string, number>>()
await Promise.all(
Array.from(keysAndTags.keys()).map(async (key) => {
const values = keysAndTags.get(key)
const data = await TagInfo.global.getStats(key)
2024-05-13 17:21:40 +02:00
const count = data.data.find((item) => item.type === "all").count
keyTotal.set(key, count)
console.log(key, "-->", count)
if (values.size > 0) {
tagTotal.set(key, new Map<string, number>())
await Promise.all(
Array.from(values).map(async (value) => {
2024-06-16 16:06:26 +02:00
const tagData = await TagInfo.global.getStats(key, value)
const count = tagData.data.find((item) => item.type === "all").count
tagTotal.get(key).set(value, count)
console.log(key + "=" + value, "-->", count)
2025-01-18 00:30:06 +01:00
})
2024-05-13 17:21:40 +02:00
)
}
2025-01-18 00:30:06 +01:00
})
2024-06-16 16:06:26 +02:00
)
2024-05-13 17:21:40 +02:00
writeFileSync(
"./src/assets/key_totals.json",
JSON.stringify(
{
"#": "Generated with generateStats.ts",
date: new Date().toISOString(),
keys: Utils.MapToObj(keyTotal, (t) => t),
2024-06-16 16:06:26 +02:00
tags: Utils.MapToObj(tagTotal, (v) => Utils.MapToObj(v, (t) => t)),
2024-05-13 17:21:40 +02:00
},
null,
2025-01-18 00:30:06 +01:00
" "
)
2024-05-13 17:21:40 +02:00
)
}
2022-03-13 01:27:19 +01:00
private summarizeNSI(sourcefile: string, pathNoExtension: string): void {
2024-06-16 16:06:26 +02:00
const data = <Record<string, Record<string, number>>>(
JSON.parse(readFileSync(sourcefile, "utf8"))
)
const allCountries: Set<string> = new Set()
for (const brand in data) {
const perCountry = data[brand]
for (const country in perCountry) {
allCountries.add(country)
const count = perCountry[country]
if (count === 0) {
delete perCountry[country]
}
}
}
const pathOut = pathNoExtension + ".summarized.json"
2024-06-16 16:06:26 +02:00
writeFileSync(pathOut, JSON.stringify(data, null, " "), "utf8")
console.log("Written", pathOut)
const allBrands = Object.keys(data)
allBrands.sort()
for (const country of allCountries) {
const summary = <Record<string, number>>{}
for (const brand of allBrands) {
const count = data[brand][country]
2024-06-16 16:06:26 +02:00
if (count > 2) {
// Eéntje is geentje
// We ignore count == 1 as they are rather exceptional
summary[brand] = data[brand][country]
}
}
const countryPath = pathNoExtension + "." + country + ".json"
writeFileSync(countryPath, JSON.stringify(summary), "utf8")
console.log("Written", countryPath)
}
}
2024-05-23 04:42:26 +02:00
async createNameSuggestionIndexFile(basepath: string, type: "brand" | "operator" | string) {
const path = basepath + type + ".json"
2025-01-08 14:21:07 +01:00
let allBrands: Record<string, Record<string, number>> = {}
2024-05-13 17:21:40 +02:00
if (existsSync(path)) {
allBrands = JSON.parse(readFileSync(path, "utf8"))
2024-06-16 16:06:26 +02:00
console.log(
"Loaded",
Object.keys(allBrands).length,
" previously loaded " + type,
"from",
2025-01-18 00:30:06 +01:00
path
2024-06-16 16:06:26 +02:00
)
2024-05-13 17:21:40 +02:00
}
2025-01-08 14:21:07 +01:00
const nsi = await NameSuggestionIndex.getNsiIndex()
2024-06-16 16:06:26 +02:00
const allBrandNames: string[] = Utils.Dedup(
2025-01-18 00:30:06 +01:00
nsi.allPossible(<any>type).map((item) => item.tags[type])
2024-06-16 16:06:26 +02:00
)
2025-01-08 14:21:07 +01:00
const batchSize = 50
for (let i = 0; i < allBrandNames.length; i += batchSize) {
2025-03-06 16:21:55 +01:00
console.warn(
"Downloading ",
batchSize,
"occurence counts, items: ",
i + "/" + allBrandNames.length
)
2025-01-08 14:21:07 +01:00
let downloaded = 0
await Promise.all(
2024-06-16 16:06:26 +02:00
Utils.TimesT(batchSize, async (j) => {
2025-01-08 14:21:07 +01:00
const brand = allBrandNames[i + j]
if (!allBrands[brand]) {
allBrands[brand] = {}
}
const writeInto = allBrands[brand]
const dloaded = await TagInfo.getGlobalDistributionsFor(
2025-01-18 00:30:06 +01:00
writeInto,
(stats) => stats.data.find((t) => t.type === "all").count,
type,
brand
)
2025-01-08 14:21:07 +01:00
downloaded += dloaded
2025-01-18 00:30:06 +01:00
})
)
2025-01-08 14:21:07 +01:00
console.log("Downloaded ", downloaded, " values this batch")
writeFileSync(path, JSON.stringify(allBrands), "utf8")
console.log("Checkpointed", path)
2024-05-13 17:21:40 +02:00
}
2025-01-08 14:21:07 +01:00
console.log("Written:", path)
2024-05-13 17:21:40 +02:00
writeFileSync(path, JSON.stringify(allBrands), "utf8")
}
constructor() {
2024-06-16 16:06:26 +02:00
super(
2025-01-18 00:30:06 +01:00
"Downloads stats on osmSource-tags and keys from tagInfo. There are two usecases with separate outputs:\n 1. To optimize the query before sending it to overpass (generates ./src/assets/key_totals.json) \n 2. To amend the Name Suggestion Index "
2024-06-16 16:06:26 +02:00
)
2024-05-13 17:21:40 +02:00
}
async main(_: string[]) {
2025-01-08 14:21:07 +01:00
const target = "./public/assets/data/nsi/"
const basepath = target + "stats/"
{
const src = "./node_modules/name-suggestion-index/dist/"
const files = ["featureCollection.min.json", "nsi.min.json", "wikidata.min.json"]
console.log(process.cwd())
for (const file of files) {
console.log("Copying ", src + file, target + "/" + file)
copyFileSync(src + file, target + file)
}
}
2024-05-23 11:38:29 +02:00
for (const type of ["operator", "brand"]) {
2024-05-23 04:42:26 +02:00
await this.createNameSuggestionIndexFile(basepath, type)
this.summarizeNSI(basepath + type + ".json", "./public/assets/data/nsi/stats/" + type)
}
2025-01-08 14:21:07 +01:00
await this.createOptimizationFile()
2024-05-13 17:21:40 +02:00
}
2022-03-13 01:27:19 +01:00
}
2024-05-13 17:21:40 +02:00
new GenerateStats().run()