forked from MapComplete/MapComplete
NSI: add script to download logos and statistics, dynamically inject extra mappings, hide low-priority mappings if applicable
This commit is contained in:
parent
30d1f175c6
commit
c5b4cdf450
18 changed files with 459 additions and 114 deletions
|
@ -1,11 +1,10 @@
|
|||
import * as fs from "fs"
|
||||
import { existsSync, lstatSync, readdirSync, readFileSync } from "fs"
|
||||
import { Utils } from "../src/Utils"
|
||||
import * as https from "https"
|
||||
import {https} from "follow-redirects"
|
||||
import { LayoutConfigJson } from "../src/Models/ThemeConfig/Json/LayoutConfigJson"
|
||||
import { LayerConfigJson } from "../src/Models/ThemeConfig/Json/LayerConfigJson"
|
||||
import xml2js from "xml2js"
|
||||
import { resolve } from "node:dns"
|
||||
|
||||
export default class ScriptUtils {
|
||||
public static fixUtils() {
|
||||
|
|
113
scripts/downloadNsiLogos.ts
Normal file
113
scripts/downloadNsiLogos.ts
Normal file
|
@ -0,0 +1,113 @@
|
|||
import Script from "./Script"
|
||||
import NameSuggestionIndex, { NSIItem } from "../src/Logic/Web/NameSuggestionIndex"
|
||||
import * as nsiWD from "../node_modules/name-suggestion-index/dist/wikidata.min.json"
|
||||
import { existsSync, writeFileSync } from "fs"
|
||||
import ScriptUtils from "./ScriptUtils"
|
||||
import { Utils } from "../src/Utils"
|
||||
import { WikimediaImageProvider } from "../src/Logic/ImageProviders/WikimediaImageProvider"
|
||||
import { renameSync } from "node:fs"
|
||||
|
||||
export default class DownloadNsiLogos extends Script {
|
||||
constructor() {
|
||||
super("Downloads all images of the NSI")
|
||||
}
|
||||
|
||||
private async getWikimediaUrl(startUrl: string) {
|
||||
if (!startUrl) {
|
||||
return startUrl
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private async downloadLogo(nsiItem: NSIItem, type: string, basePath: string) {
|
||||
try {
|
||||
return await this.downloadLogoUnsafe(nsiItem, type, basePath)
|
||||
} catch (e) {
|
||||
console.error("Could not download", nsiItem.displayName, "due to", e)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
private async downloadLogoUnsafe(nsiItem: NSIItem, type: string, basePath: string) {
|
||||
if (nsiItem === undefined) {
|
||||
return false
|
||||
}
|
||||
let path = basePath + nsiItem.id
|
||||
|
||||
const logos = nsiWD["wikidata"][nsiItem?.tags?.[type + ":wikidata"]]?.logos
|
||||
|
||||
if (NameSuggestionIndex.isSvg(nsiItem, type)) {
|
||||
path = path + ".svg"
|
||||
}
|
||||
|
||||
if (existsSync(path)) {
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
if (!logos) {
|
||||
return false
|
||||
}
|
||||
if (logos.facebook) {
|
||||
// Facebook logo's are generally better and square
|
||||
await ScriptUtils.DownloadFileTo(logos.facebook, path)
|
||||
return true
|
||||
}
|
||||
if (logos.wikidata) {
|
||||
let url: string = logos.wikidata
|
||||
console.log("Downloading", url)
|
||||
let ttl = 10
|
||||
do {
|
||||
ttl--
|
||||
const dloaded = await Utils.downloadAdvanced(url, {
|
||||
"User-Agent": "MapComplete NSI scraper/0.1 (https://github.com/pietervdvn/MapComplete; pietervdvn@posteo.net)"
|
||||
})
|
||||
const redirect: string | undefined = dloaded["redirect"]
|
||||
if (redirect) {
|
||||
console.log("Got a redirect from", url, "to", redirect)
|
||||
url = redirect
|
||||
continue
|
||||
}
|
||||
if ((<string>logos.wikidata).toLowerCase().endsWith(".svg")) {
|
||||
console.log("Written SVG", path)
|
||||
if(!path.endsWith(".svg")){
|
||||
throw "Undetected svg path:"+logos.wikidata
|
||||
}
|
||||
writeFileSync(path, dloaded["content"], "utf8")
|
||||
return true
|
||||
}
|
||||
|
||||
console.log("Got data from", url, "-->", path)
|
||||
await ScriptUtils.DownloadFileTo(url, path)
|
||||
return true
|
||||
} while (ttl > 0)
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
return false
|
||||
|
||||
}
|
||||
|
||||
async main(args: string[]): Promise<void> {
|
||||
const type = "brand"
|
||||
const items = NameSuggestionIndex.allPossible(type)
|
||||
const basePath = "./public/assets/data/nsi/logos/"
|
||||
let downloadCount = 0
|
||||
const stepcount = 100
|
||||
for (let i = 0; i < items.length; i += stepcount) {
|
||||
if (i % 100 === 0) {
|
||||
console.log(i + "/" + items.length, "downloaded " + downloadCount)
|
||||
}
|
||||
await Promise.all(Utils.TimesT(stepcount, j => j).map(async j => {
|
||||
const downloaded = await this.downloadLogo(items[i + j], type, basePath)
|
||||
if (downloaded) {
|
||||
downloadCount++
|
||||
}
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new DownloadNsiLogos().run()
|
|
@ -7,7 +7,7 @@ import ScriptUtils from "./ScriptUtils"
|
|||
import TagRenderingConfig from "../src/Models/ThemeConfig/TagRenderingConfig"
|
||||
import { And } from "../src/Logic/Tags/And"
|
||||
import Script from "./Script"
|
||||
import NameSuggestionIndex, { NSIItem } from "../src/Logic/Web/NameSuggestionIndex"
|
||||
import NameSuggestionIndex from "../src/Logic/Web/NameSuggestionIndex"
|
||||
import TagInfo, { TagInfoStats } from "../src/Logic/Web/TagInfo"
|
||||
|
||||
class Utilities {
|
||||
|
@ -18,6 +18,7 @@ class Utilities {
|
|||
}
|
||||
return newR
|
||||
}
|
||||
|
||||
}
|
||||
class GenerateStats extends Script {
|
||||
|
||||
|
@ -61,9 +62,7 @@ class GenerateStats extends Script {
|
|||
await Promise.all(
|
||||
Array.from(keysAndTags.keys()).map(async (key) => {
|
||||
const values = keysAndTags.get(key)
|
||||
const data = await Utils.downloadJson(
|
||||
`https://taginfo.openstreetmap.org/api/4/key/stats?key=${key}`
|
||||
)
|
||||
const data = await TagInfo.global.getStats(key)
|
||||
const count = data.data.find((item) => item.type === "all").count
|
||||
keyTotal.set(key, count)
|
||||
console.log(key, "-->", count)
|
||||
|
@ -72,10 +71,8 @@ class GenerateStats extends Script {
|
|||
tagTotal.set(key, new Map<string, number>())
|
||||
await Promise.all(
|
||||
Array.from(values).map(async (value) => {
|
||||
const tagData = await Utils.downloadJson(
|
||||
`https://taginfo.openstreetmap.org/api/4/tag/stats?key=${key}&value=${value}`
|
||||
)
|
||||
const count = tagData.data.find((item) => item.type === "all").count
|
||||
const tagData: TagInfoStats= await TagInfo.global.getStats(key, value)
|
||||
const count = tagData.data .find((item) => item.type === "all").count
|
||||
tagTotal.get(key).set(value, count)
|
||||
console.log(key + "=" + value, "-->", count)
|
||||
})
|
||||
|
@ -98,20 +95,74 @@ class GenerateStats extends Script {
|
|||
)
|
||||
}
|
||||
|
||||
async createNameSuggestionIndexFile() {
|
||||
const type = "brand"
|
||||
private summarizeNSI(sourcefile: string, pathNoExtension: string): void {
|
||||
const data = <Record<string, Record<string, number>>>JSON.parse(readFileSync(sourcefile, "utf8"))
|
||||
|
||||
const allCountries: Set<string> = new Set()
|
||||
for (const brand in data) {
|
||||
const perCountry = data[brand]
|
||||
for (const country in perCountry) {
|
||||
allCountries.add(country)
|
||||
const count = perCountry[country]
|
||||
if (count === 0) {
|
||||
delete perCountry[country]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const pathOut = pathNoExtension + ".summarized.json"
|
||||
writeFileSync(pathOut, JSON.stringify(
|
||||
data, null, " "), "utf8")
|
||||
console.log("Written", pathOut)
|
||||
|
||||
const allBrands = Object.keys(data)
|
||||
allBrands.sort()
|
||||
for (const country of allCountries) {
|
||||
const summary = <Record<string, number>>{}
|
||||
for (const brand of allBrands) {
|
||||
const count = data[brand][country]
|
||||
if (count > 2) { // Eéntje is geentje
|
||||
// We ignore count == 1 as they are rather exceptional
|
||||
summary[brand] = data[brand][country]
|
||||
}
|
||||
}
|
||||
|
||||
const countryPath = pathNoExtension + "." + country + ".json"
|
||||
writeFileSync(countryPath, JSON.stringify(summary), "utf8")
|
||||
console.log("Written", countryPath)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async createNameSuggestionIndexFile(basepath: string,type: "brand" | "operator") {
|
||||
const path = basepath+type+'.json'
|
||||
let allBrands = <Record<string, Record<string, number>>>{}
|
||||
const path = "./src/assets/generated/nsi_stats/" + type + ".json"
|
||||
if (existsSync(path)) {
|
||||
allBrands = JSON.parse(readFileSync(path, "utf8"))
|
||||
console.log("Loaded",Object.keys(allBrands).length," previously loaded brands")
|
||||
}
|
||||
let lastWrite = new Date()
|
||||
const allBrandNames: string[] = NameSuggestionIndex.allPossible(type)
|
||||
for (const brand of allBrandNames) {
|
||||
let skipped = 0
|
||||
const allBrandNames: string[] = Utils.Dedup(NameSuggestionIndex.allPossible(type).map(item => item.tags[type]))
|
||||
for (let i = 0; i < allBrandNames.length; i++){
|
||||
if(i % 100 == 0){
|
||||
console.log("Downloading ",i+"/"+allBrandNames.length,"; skipped",skipped)
|
||||
}
|
||||
const brand = allBrandNames[i]
|
||||
if(!!allBrands[brand] && Object.keys(allBrands[brand]).length == 0){
|
||||
delete allBrands[brand]
|
||||
console.log("Deleted", brand, "as no entries at all")
|
||||
}
|
||||
if(allBrands[brand] !== undefined){
|
||||
console.log("Skipping", brand,", already loaded")
|
||||
continue
|
||||
const max = Math.max(...Object.values(allBrands[brand]))
|
||||
skipped++
|
||||
if(max < 0){
|
||||
console.log("HMMMM:", allBrands[brand])
|
||||
delete allBrands[brand]
|
||||
|
||||
}else{
|
||||
continue
|
||||
}
|
||||
}
|
||||
const distribution: Record<string, number> = Utilities.mapValues(await TagInfo.getGlobalDistributionsFor(type, brand), s => s.data.find(t => t.type === "all").count)
|
||||
allBrands[brand] = distribution
|
||||
|
@ -128,8 +179,11 @@ class GenerateStats extends Script {
|
|||
}
|
||||
|
||||
async main(_: string[]) {
|
||||
// this.createOptimizationFile()
|
||||
await this.createNameSuggestionIndexFile()
|
||||
await this.createOptimizationFile()
|
||||
const type = "brand"
|
||||
const basepath = "./src/assets/generated/stats/"
|
||||
await this.createNameSuggestionIndexFile(basepath, type)
|
||||
this.summarizeNSI(basepath+type+".json", "./public/assets/data/stats/"+type)
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue