NSI: add script to download logos and statistics, dynamically inject extra mappings, hide low-priority mappings if applicable

This commit is contained in:
Pieter Vander Vennet 2024-05-16 00:12:50 +02:00
parent 30d1f175c6
commit c5b4cdf450
18 changed files with 459 additions and 114 deletions

View file

@ -1,11 +1,10 @@
import * as fs from "fs"
import { existsSync, lstatSync, readdirSync, readFileSync } from "fs"
import { Utils } from "../src/Utils"
import * as https from "https"
import {https} from "follow-redirects"
import { LayoutConfigJson } from "../src/Models/ThemeConfig/Json/LayoutConfigJson"
import { LayerConfigJson } from "../src/Models/ThemeConfig/Json/LayerConfigJson"
import xml2js from "xml2js"
import { resolve } from "node:dns"
export default class ScriptUtils {
public static fixUtils() {

113
scripts/downloadNsiLogos.ts Normal file
View file

@ -0,0 +1,113 @@
import Script from "./Script"
import NameSuggestionIndex, { NSIItem } from "../src/Logic/Web/NameSuggestionIndex"
import * as nsiWD from "../node_modules/name-suggestion-index/dist/wikidata.min.json"
import { existsSync, writeFileSync } from "fs"
import ScriptUtils from "./ScriptUtils"
import { Utils } from "../src/Utils"
import { WikimediaImageProvider } from "../src/Logic/ImageProviders/WikimediaImageProvider"
import { renameSync } from "node:fs"
export default class DownloadNsiLogos extends Script {
constructor() {
super("Downloads all images of the NSI")
}
private async getWikimediaUrl(startUrl: string) {
if (!startUrl) {
return startUrl
}
}
private async downloadLogo(nsiItem: NSIItem, type: string, basePath: string) {
try {
return await this.downloadLogoUnsafe(nsiItem, type, basePath)
} catch (e) {
console.error("Could not download", nsiItem.displayName, "due to", e)
return false
}
}
private async downloadLogoUnsafe(nsiItem: NSIItem, type: string, basePath: string) {
if (nsiItem === undefined) {
return false
}
let path = basePath + nsiItem.id
const logos = nsiWD["wikidata"][nsiItem?.tags?.[type + ":wikidata"]]?.logos
if (NameSuggestionIndex.isSvg(nsiItem, type)) {
path = path + ".svg"
}
if (existsSync(path)) {
return false
}
if (!logos) {
return false
}
if (logos.facebook) {
// Facebook logo's are generally better and square
await ScriptUtils.DownloadFileTo(logos.facebook, path)
return true
}
if (logos.wikidata) {
let url: string = logos.wikidata
console.log("Downloading", url)
let ttl = 10
do {
ttl--
const dloaded = await Utils.downloadAdvanced(url, {
"User-Agent": "MapComplete NSI scraper/0.1 (https://github.com/pietervdvn/MapComplete; pietervdvn@posteo.net)"
})
const redirect: string | undefined = dloaded["redirect"]
if (redirect) {
console.log("Got a redirect from", url, "to", redirect)
url = redirect
continue
}
if ((<string>logos.wikidata).toLowerCase().endsWith(".svg")) {
console.log("Written SVG", path)
if(!path.endsWith(".svg")){
throw "Undetected svg path:"+logos.wikidata
}
writeFileSync(path, dloaded["content"], "utf8")
return true
}
console.log("Got data from", url, "-->", path)
await ScriptUtils.DownloadFileTo(url, path)
return true
} while (ttl > 0)
return false
}
return false
}
async main(args: string[]): Promise<void> {
const type = "brand"
const items = NameSuggestionIndex.allPossible(type)
const basePath = "./public/assets/data/nsi/logos/"
let downloadCount = 0
const stepcount = 100
for (let i = 0; i < items.length; i += stepcount) {
if (i % 100 === 0) {
console.log(i + "/" + items.length, "downloaded " + downloadCount)
}
await Promise.all(Utils.TimesT(stepcount, j => j).map(async j => {
const downloaded = await this.downloadLogo(items[i + j], type, basePath)
if (downloaded) {
downloadCount++
}
}))
}
}
}
new DownloadNsiLogos().run()

View file

@ -7,7 +7,7 @@ import ScriptUtils from "./ScriptUtils"
import TagRenderingConfig from "../src/Models/ThemeConfig/TagRenderingConfig"
import { And } from "../src/Logic/Tags/And"
import Script from "./Script"
import NameSuggestionIndex, { NSIItem } from "../src/Logic/Web/NameSuggestionIndex"
import NameSuggestionIndex from "../src/Logic/Web/NameSuggestionIndex"
import TagInfo, { TagInfoStats } from "../src/Logic/Web/TagInfo"
class Utilities {
@ -18,6 +18,7 @@ class Utilities {
}
return newR
}
}
class GenerateStats extends Script {
@ -61,9 +62,7 @@ class GenerateStats extends Script {
await Promise.all(
Array.from(keysAndTags.keys()).map(async (key) => {
const values = keysAndTags.get(key)
const data = await Utils.downloadJson(
`https://taginfo.openstreetmap.org/api/4/key/stats?key=${key}`
)
const data = await TagInfo.global.getStats(key)
const count = data.data.find((item) => item.type === "all").count
keyTotal.set(key, count)
console.log(key, "-->", count)
@ -72,10 +71,8 @@ class GenerateStats extends Script {
tagTotal.set(key, new Map<string, number>())
await Promise.all(
Array.from(values).map(async (value) => {
const tagData = await Utils.downloadJson(
`https://taginfo.openstreetmap.org/api/4/tag/stats?key=${key}&value=${value}`
)
const count = tagData.data.find((item) => item.type === "all").count
const tagData: TagInfoStats= await TagInfo.global.getStats(key, value)
const count = tagData.data .find((item) => item.type === "all").count
tagTotal.get(key).set(value, count)
console.log(key + "=" + value, "-->", count)
})
@ -98,20 +95,74 @@ class GenerateStats extends Script {
)
}
async createNameSuggestionIndexFile() {
const type = "brand"
private summarizeNSI(sourcefile: string, pathNoExtension: string): void {
const data = <Record<string, Record<string, number>>>JSON.parse(readFileSync(sourcefile, "utf8"))
const allCountries: Set<string> = new Set()
for (const brand in data) {
const perCountry = data[brand]
for (const country in perCountry) {
allCountries.add(country)
const count = perCountry[country]
if (count === 0) {
delete perCountry[country]
}
}
}
const pathOut = pathNoExtension + ".summarized.json"
writeFileSync(pathOut, JSON.stringify(
data, null, " "), "utf8")
console.log("Written", pathOut)
const allBrands = Object.keys(data)
allBrands.sort()
for (const country of allCountries) {
const summary = <Record<string, number>>{}
for (const brand of allBrands) {
const count = data[brand][country]
if (count > 2) { // Eéntje is geentje
// We ignore count == 1 as they are rather exceptional
summary[brand] = data[brand][country]
}
}
const countryPath = pathNoExtension + "." + country + ".json"
writeFileSync(countryPath, JSON.stringify(summary), "utf8")
console.log("Written", countryPath)
}
}
async createNameSuggestionIndexFile(basepath: string,type: "brand" | "operator") {
const path = basepath+type+'.json'
let allBrands = <Record<string, Record<string, number>>>{}
const path = "./src/assets/generated/nsi_stats/" + type + ".json"
if (existsSync(path)) {
allBrands = JSON.parse(readFileSync(path, "utf8"))
console.log("Loaded",Object.keys(allBrands).length," previously loaded brands")
}
let lastWrite = new Date()
const allBrandNames: string[] = NameSuggestionIndex.allPossible(type)
for (const brand of allBrandNames) {
let skipped = 0
const allBrandNames: string[] = Utils.Dedup(NameSuggestionIndex.allPossible(type).map(item => item.tags[type]))
for (let i = 0; i < allBrandNames.length; i++){
if(i % 100 == 0){
console.log("Downloading ",i+"/"+allBrandNames.length,"; skipped",skipped)
}
const brand = allBrandNames[i]
if(!!allBrands[brand] && Object.keys(allBrands[brand]).length == 0){
delete allBrands[brand]
console.log("Deleted", brand, "as no entries at all")
}
if(allBrands[brand] !== undefined){
console.log("Skipping", brand,", already loaded")
continue
const max = Math.max(...Object.values(allBrands[brand]))
skipped++
if(max < 0){
console.log("HMMMM:", allBrands[brand])
delete allBrands[brand]
}else{
continue
}
}
const distribution: Record<string, number> = Utilities.mapValues(await TagInfo.getGlobalDistributionsFor(type, brand), s => s.data.find(t => t.type === "all").count)
allBrands[brand] = distribution
@ -128,8 +179,11 @@ class GenerateStats extends Script {
}
async main(_: string[]) {
// this.createOptimizationFile()
await this.createNameSuggestionIndexFile()
await this.createOptimizationFile()
const type = "brand"
const basepath = "./src/assets/generated/stats/"
await this.createNameSuggestionIndexFile(basepath, type)
this.summarizeNSI(basepath+type+".json", "./public/assets/data/stats/"+type)
}