2023-07-15 18:04:30 +02:00
import known_layers from "../src/assets/generated/known_layers.json"
import { LayerConfigJson } from "../src/Models/ThemeConfig/Json/LayerConfigJson"
import { TagUtils } from "../src/Logic/Tags/TagUtils"
import { Utils } from "../src/Utils"
2024-05-13 17:21:40 +02:00
import { existsSync , readFileSync , writeFileSync } from "fs"
2022-03-13 01:27:19 +01:00
import ScriptUtils from "./ScriptUtils"
2023-12-04 16:10:05 +01:00
import TagRenderingConfig from "../src/Models/ThemeConfig/TagRenderingConfig"
import { And } from "../src/Logic/Tags/And"
2024-05-13 17:21:40 +02:00
import Script from "./Script"
2024-05-16 00:12:50 +02:00
import NameSuggestionIndex from "../src/Logic/Web/NameSuggestionIndex"
2024-05-13 17:21:40 +02:00
import TagInfo , { TagInfoStats } from "../src/Logic/Web/TagInfo"
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
class Utilities {
static mapValues < X extends string | number , T , TOut > ( record : Record < X , T > , f : ( ( t : T ) = > TOut ) ) : Record < X , TOut > {
const newR = < Record < X , TOut > > { }
for ( const x in record ) {
newR [ x ] = f ( record [ x ] )
}
return newR
}
2024-05-16 00:12:50 +02:00
2024-05-13 17:21:40 +02:00
}
2024-05-23 04:42:26 +02:00
2024-05-13 17:21:40 +02:00
class GenerateStats extends Script {
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
async createOptimizationFile ( includeTags = true ) {
ScriptUtils . fixUtils ( )
const layers = < LayerConfigJson [ ] > known_layers . layers
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
const keysAndTags = new Map < string , Set < string > > ( )
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
for ( const layer of layers ) {
if ( layer . source [ "geoJson" ] !== undefined && ! layer . source [ "isOsmCache" ] ) {
continue
}
if ( layer . source == null || typeof layer . source === "string" ) {
continue
}
2022-03-13 01:27:19 +01:00
2024-05-13 17:21:40 +02:00
const sourcesList = [ TagUtils . Tag ( layer . source [ "osmTags" ] ) ]
if ( layer ? . title ) {
sourcesList . push ( . . . new TagRenderingConfig ( layer . title ) . usedTags ( ) )
}
2023-12-04 16:10:05 +01:00
2024-05-13 17:21:40 +02:00
const sources = new And ( sourcesList )
const allKeys = sources . usedKeys ( )
for ( const key of allKeys ) {
if ( ! keysAndTags . has ( key ) ) {
keysAndTags . set ( key , new Set < string > ( ) )
}
2022-03-13 01:27:19 +01:00
}
2024-05-13 17:21:40 +02:00
const allTags = includeTags ? sources . usedTags ( ) : [ ]
for ( const tag of allTags ) {
if ( ! keysAndTags . has ( tag . key ) ) {
keysAndTags . set ( tag . key , new Set < string > ( ) )
}
keysAndTags . get ( tag . key ) . add ( tag . value )
2022-03-13 01:27:19 +01:00
}
}
2024-05-13 17:21:40 +02:00
const keyTotal = new Map < string , number > ( )
const tagTotal = new Map < string , Map < string , number > > ( )
await Promise . all (
Array . from ( keysAndTags . keys ( ) ) . map ( async ( key ) = > {
const values = keysAndTags . get ( key )
2024-05-16 00:12:50 +02:00
const data = await TagInfo . global . getStats ( key )
2024-05-13 17:21:40 +02:00
const count = data . data . find ( ( item ) = > item . type === "all" ) . count
keyTotal . set ( key , count )
console . log ( key , "-->" , count )
if ( values . size > 0 ) {
tagTotal . set ( key , new Map < string , number > ( ) )
await Promise . all (
Array . from ( values ) . map ( async ( value ) = > {
2024-05-23 04:42:26 +02:00
const tagData : TagInfoStats = await TagInfo . global . getStats ( key , value )
const count = tagData . data . find ( ( item ) = > item . type === "all" ) . count
2024-05-13 17:21:40 +02:00
tagTotal . get ( key ) . set ( value , count )
console . log ( key + "=" + value , "-->" , count )
} )
)
}
} )
)
writeFileSync (
"./src/assets/key_totals.json" ,
JSON . stringify (
{
"#" : "Generated with generateStats.ts" ,
date : new Date ( ) . toISOString ( ) ,
keys : Utils.MapToObj ( keyTotal , ( t ) = > t ) ,
tags : Utils.MapToObj ( tagTotal , ( v ) = > Utils . MapToObj ( v , ( t ) = > t ) )
} ,
null ,
" "
2022-03-13 01:27:19 +01:00
)
2024-05-13 17:21:40 +02:00
)
}
2022-03-13 01:27:19 +01:00
2024-05-16 00:12:50 +02:00
private summarizeNSI ( sourcefile : string , pathNoExtension : string ) : void {
const data = < Record < string , Record < string , number > >> JSON . parse ( readFileSync ( sourcefile , "utf8" ) )
const allCountries : Set < string > = new Set ( )
for ( const brand in data ) {
const perCountry = data [ brand ]
for ( const country in perCountry ) {
allCountries . add ( country )
const count = perCountry [ country ]
if ( count === 0 ) {
delete perCountry [ country ]
}
}
}
const pathOut = pathNoExtension + ".summarized.json"
writeFileSync ( pathOut , JSON . stringify (
data , null , " " ) , "utf8" )
console . log ( "Written" , pathOut )
const allBrands = Object . keys ( data )
allBrands . sort ( )
for ( const country of allCountries ) {
const summary = < Record < string , number > > { }
for ( const brand of allBrands ) {
const count = data [ brand ] [ country ]
if ( count > 2 ) { // Eéntje is geentje
// We ignore count == 1 as they are rather exceptional
summary [ brand ] = data [ brand ] [ country ]
}
}
const countryPath = pathNoExtension + "." + country + ".json"
writeFileSync ( countryPath , JSON . stringify ( summary ) , "utf8" )
console . log ( "Written" , countryPath )
}
}
2024-05-23 04:42:26 +02:00
async createNameSuggestionIndexFile ( basepath : string , type : "brand" | "operator" | string ) {
const path = basepath + type + ".json"
2024-05-13 17:21:40 +02:00
let allBrands = < Record < string , Record < string , number > >> { }
if ( existsSync ( path ) ) {
allBrands = JSON . parse ( readFileSync ( path , "utf8" ) )
2024-05-23 04:42:26 +02:00
console . log ( "Loaded" , Object . keys ( allBrands ) . length , " previously loaded brands" )
2024-05-13 17:21:40 +02:00
}
2024-05-23 04:42:26 +02:00
const lastWrite = new Date ( )
2024-05-16 00:12:50 +02:00
let skipped = 0
const allBrandNames : string [ ] = Utils . Dedup ( NameSuggestionIndex . allPossible ( type ) . map ( item = > item . tags [ type ] ) )
2024-05-23 04:42:26 +02:00
for ( let i = 0 ; i < allBrandNames . length ; i ++ ) {
if ( i % 100 == 0 ) {
console . log ( "Downloading " , i + "/" + allBrandNames . length , "; skipped" , skipped )
2024-05-16 00:12:50 +02:00
}
const brand = allBrandNames [ i ]
2024-05-23 04:42:26 +02:00
if ( ! ! allBrands [ brand ] && Object . keys ( allBrands [ brand ] ) . length == 0 ) {
2024-05-16 00:12:50 +02:00
delete allBrands [ brand ]
console . log ( "Deleted" , brand , "as no entries at all" )
}
2024-05-23 04:42:26 +02:00
if ( allBrands [ brand ] !== undefined ) {
2024-05-16 00:12:50 +02:00
const max = Math . max ( . . . Object . values ( allBrands [ brand ] ) )
skipped ++
2024-05-23 04:42:26 +02:00
if ( max < 0 ) {
2024-05-16 00:12:50 +02:00
console . log ( "HMMMM:" , allBrands [ brand ] )
delete allBrands [ brand ]
2024-05-23 04:42:26 +02:00
} else {
2024-05-16 00:12:50 +02:00
continue
}
2022-09-08 21:40:48 +02:00
}
2024-05-13 17:21:40 +02:00
const distribution : Record < string , number > = Utilities . mapValues ( await TagInfo . getGlobalDistributionsFor ( type , brand ) , s = > s . data . find ( t = > t . type === "all" ) . count )
allBrands [ brand ] = distribution
if ( ( new Date ( ) . getTime ( ) - lastWrite . getTime ( ) ) / 1000 >= 5 ) {
writeFileSync ( path , JSON . stringify ( allBrands ) , "utf8" )
console . log ( "Checkpointed" , path )
}
}
writeFileSync ( path , JSON . stringify ( allBrands ) , "utf8" )
}
constructor ( ) {
super ( "Downloads stats on osmSource-tags and keys from tagInfo. There are two usecases with separate outputs:\n 1. To optimize the query before sending it to overpass (generates ./src/assets/key_totals.json) \n 2. To amend the Name Suggestion Index " )
}
async main ( _ : string [ ] ) {
2024-05-16 00:12:50 +02:00
const basepath = "./src/assets/generated/stats/"
2024-05-23 04:42:26 +02:00
for ( const type of [ "operator" , "brand" ] ) {
await this . createNameSuggestionIndexFile ( basepath , type )
this . summarizeNSI ( basepath + type + ".json" , "./public/assets/data/nsi/stats/" + type )
}
await this . createOptimizationFile ( )
2024-05-13 17:21:40 +02:00
}
2022-03-13 01:27:19 +01:00
}
2024-05-13 17:21:40 +02:00
new GenerateStats ( ) . run ( )