diff --git a/assets/language_in_country.json b/assets/language_in_country.json index 847fa4da1..4e4970b55 100644 --- a/assets/language_in_country.json +++ b/assets/language_in_country.json @@ -132,6 +132,7 @@ "en" ], "CN": [ + "zh", "zh" ], "CO": [ @@ -148,6 +149,7 @@ ], "CY": [ "tr", + "el", "el" ], "CZ": [ @@ -247,6 +249,9 @@ "es", "pt" ], + "GR": [ + "el" + ], "GT": [ "es" ], @@ -453,8 +458,7 @@ "fr" ], "NG": [ - "en", - "yo" + "en" ], "NI": [ "es" @@ -502,9 +506,7 @@ "en" ], "PK": [ - "ur", - "en", - "ar" + "ur" ], "PL": [ "pl", @@ -559,7 +561,6 @@ "ar" ], "SE": [ - "sv", "sv" ], "SG": [ @@ -648,6 +649,9 @@ "en", "en" ], + "TW": [ + "zh" + ], "TZ": [ "en", "sw" @@ -693,16 +697,16 @@ "ar" ], "ZA": [ + "en", + "zu", + "xh", "af", "ve", "ss", "tn", "ts", "st", - "nr", - "en", - "zu", - "xh" + "nr" ], "ZM": [ "en" diff --git a/package.json b/package.json index 9375df9a7..b884d5a50 100644 --- a/package.json +++ b/package.json @@ -54,7 +54,7 @@ "weblate-fix-heavy": "git remote rm weblate-layers; git remote add weblate-layers https://hosted.weblate.org/git/mapcomplete/layers/; git remote update weblate-layers; git merge weblate-layers/master", "housekeeping": "npm run generate && npm run generate:docs && npm run generate:contributor-list && npm run format && git add assets/ langs/ Docs/ **/*.ts Docs/* && git commit -m 'Housekeeping...'", "parseSchools": "ts-node scripts/schools/amendSchoolData.ts", - "steal": "ts-node scripts/thieves/stealLanguages.ts" + "steal": "ts-node scripts/fetchLanguages.ts" }, "keywords": [ "OpenStreetMap", diff --git a/scripts/fetchLanguages.ts b/scripts/fetchLanguages.ts index e4641f279..4940a07aa 100644 --- a/scripts/fetchLanguages.ts +++ b/scripts/fetchLanguages.ts @@ -1,5 +1,6 @@ /** - * Fetches all 'modern languages' from wikidata, then exports their names in every language + * Fetches all 'modern languages' from wikidata, then exports their names in every language. + * Some meta-info (e.g. RTL) is exported too */ import * as wds from "wikidata-sdk" @@ -21,12 +22,15 @@ async function fetchRegularLanguages() { console.log("Fetching languages") const sparql = - "SELECT ?lang ?label ?code \n" + + "SELECT ?lang ?label ?code ?directionalityLabel \n" + "WHERE \n" + "{ \n" + " ?lang wdt:P31 wd:Q1288568. \n" + // language instanceOf (p31) modern language(Q1288568) " ?lang rdfs:label ?label. \n" + - " ?lang wdt:P424 ?code" + // Wikimedia language code seems to be close to the weblate entries + " ?lang wdt:P282 ?writing_system. \n"+ + " ?writing_system wdt:P1406 ?directionality. \n" + + " ?lang wdt:P424 ?code. \n" +// Wikimedia language code seems to be close to the weblate entries + " SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". } \n" + "} " const url = wds.sparqlQuery(sparql) @@ -67,16 +71,19 @@ async function fetchSpecial(id: number, code: string) { return bindings } -function getNativeList(langs: Map>) { +function getNativeList(langs: Map }>) { const native = {} const keys: string[] = Array.from(langs.keys()) keys.sort() for (const key of keys) { - const translations: Map = langs.get(key) + const translations: Map = langs.get(key).translations if (!LanguageUtils.usedLanguages.has(key)) { continue } native[key] = translations.get(key) + if(native[key] === undefined){ + console.log("No native translation found for "+key) + } } return native } @@ -108,33 +115,7 @@ async function getOfficialLanguagesPerCountry(): Promise> return lngs } -async function main(wipeCache = false) { - const cacheFile = "./assets/generated/languages-wd.json" - if (wipeCache || !existsSync(cacheFile)) { - console.log("Refreshing cache") - await fetch(cacheFile) - } else { - console.log("Reusing the cached file") - } - const data = JSON.parse(readFileSync(cacheFile, "UTF8")) - const perId = WikidataUtils.extractLanguageData(data, WikidataUtils.languageRemapping) - const nativeList = getNativeList(perId) - writeFileSync("./assets/language_native.json", JSON.stringify(nativeList, null, " ")) - - const translations = Utils.MapToObj(perId, (value, key) => { - if (!LanguageUtils.usedLanguages.has(key)) { - return undefined // Remove unused languages - } - return Utils.MapToObj(value, (v, k) => { - if (!LanguageUtils.usedLanguages.has(k)) { - return undefined - } - return v - }) - }) - - writeFileSync("./assets/language_translations.json", JSON.stringify(translations, null, " ")) - +async function getOfficialLanguagesPerCountryCached(wipeCache: boolean): Promise>{ let officialLanguages: Record const officialLanguagesPath = "./assets/language_in_country.json" if (existsSync("./assets/languages_in_country.json") && !wipeCache) { @@ -143,37 +124,48 @@ async function main(wipeCache = false) { officialLanguages = Utils.MapToObj(await getOfficialLanguagesPerCountry(), (t) => t) writeFileSync(officialLanguagesPath, JSON.stringify(officialLanguages, null, " ")) } + return officialLanguages +} - const perLanguage = Utils.TransposeMap(officialLanguages) - console.log(JSON.stringify(perLanguage, null, " ")) - const mappings: { if: string; then: Record; hideInAnswer: string }[] = [] - for (const language of Object.keys(perLanguage)) { - const countries = Utils.Dedup(perLanguage[language].map((c) => c.toLowerCase())) - mappings.push({ - if: "language=" + language, - then: translations[language], - hideInAnswer: "_country=" + countries.join("|"), +async function main(wipeCache = false) { + const cacheFile = "./assets/generated/languages-wd.json" + if (wipeCache || !existsSync(cacheFile)) { + console.log("Refreshing cache") + await fetch(cacheFile) + } else { + console.log("Reusing the cached file") + } + + + const data = JSON.parse(readFileSync(cacheFile, "UTF8")) + const perId = WikidataUtils.extractLanguageData(data, WikidataUtils.languageRemapping) + const nativeList = getNativeList(perId) + writeFileSync("./assets/language_native.json", JSON.stringify(nativeList, null, " ")) + const languagesPerCountry = Utils.TransposeMap(await getOfficialLanguagesPerCountryCached(wipeCache)) + const translations = Utils.MapToObj(perId, (value, key) => { + // We keep all language codes in the list... + const translatedForId : Record = Utils.MapToObj(value.translations, (v, k) => { + if (!LanguageUtils.usedLanguages.has(k)) { + // ... but don't keep translations if we don't have a displayed language for them + return undefined + } + return v }) - } - const tagRenderings = { - id: "official-language", - mappings, - question: "What languages are spoken here?", - } + translatedForId["_meta"] = { + countries : Utils.Dedup( languagesPerCountry[key]), + dir: value.directionality + } + + return translatedForId + }) + + writeFileSync("./assets/language_translations.json", JSON.stringify(translations, null, " ")) + + + + - writeFileSync( - "./assets/layers/language/language.json", - JSON.stringify( - { - id: "language", - description: "Various tagRenderings to help language tooling", - tagRenderings, - }, - null, - " " - ) - ) } const forceRefresh = process.argv[2] === "--force-refresh" diff --git a/scripts/thieves/stealLanguages.ts b/scripts/thieves/stealLanguages.ts deleted file mode 100644 index 1f6bda27f..000000000 --- a/scripts/thieves/stealLanguages.ts +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Uses the languages in and to every translation from wikidata to generate a language question in wikidata/wikidata - * */ - -import WikidataUtils from "../../Utils/WikidataUtils" -import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs" -import { LayerConfigJson } from "../../Models/ThemeConfig/Json/LayerConfigJson" -import { MappingConfigJson } from "../../Models/ThemeConfig/Json/QuestionableTagRenderingConfigJson" -import LanguageUtils from "../../Utils/LanguageUtils" -import * as perCountry from "../../assets/language_in_country.json" -import { Utils } from "../../Utils" -function main() { - const sourcepath = "assets/generated/languages-wd.json" - console.log(`Converting language data file '${sourcepath}' into a tagMapping`) - const languages = WikidataUtils.extractLanguageData( - JSON.parse(readFileSync(sourcepath, "utf8")), - {} - ) - const mappings: MappingConfigJson[] = [] - const schoolmappings: MappingConfigJson[] = [] - const brailemappings: MappingConfigJson[] = [] - - const countryToLanguage: Record = perCountry - const officialLanguagesPerCountry = Utils.TransposeMap(countryToLanguage) - - languages.forEach((l, code) => { - const then: Record = {} - l.forEach((tr, lng) => { - const languageCodeWeblate = WikidataUtils.languageRemapping[lng] ?? lng - if (!LanguageUtils.usedLanguages.has(languageCodeWeblate)) { - return - } - then[languageCodeWeblate] = tr - }) - - const officialCountries = Utils.Dedup( - officialLanguagesPerCountry[code]?.map((s) => s.toLowerCase()) ?? [] - ) - const prioritySearch = - officialCountries.length > 0 - ? "_country~" + officialCountries.map((c) => "((^|;)" + c + "($|;))").join("|") - : undefined - mappings.push({ - if: "language:" + code + "=yes", - ifnot: "language:" + code + "=", - searchTerms: { - "*": [code], - }, - then, - priorityIf: prioritySearch, - }) - - schoolmappings.push({ - if: "school:language=" + code, - then, - priorityIf: prioritySearch, - searchTerms: { - "*": [code], - }, - }) - - brailemappings.push({ - if: "tactile_writing:braille:" + code + "=yes", - ifnot: "tactile_writing:braille:" + code + "=", - searchTerms: { - "*": [code], - }, - then, - priorityIf: prioritySearch, - }) - }) - - const wikidataLayer = { - id: "wikidata", - description: { - en: "Various tagrenderings which are generated from Wikidata. Automatically generated with a script, don't edit manually", - }, - "#dont-translate": "*", - source: { - osmTags: "id~*", - }, - title: null, - mapRendering: null, - tagRenderings: [ - { - id: "language", - // @ts-ignore - description: "Enables to pick *a single* 'language:=yes' within the mappings", - mappings, - }, - { - builtin: "wikidata.language", - override: { - id: "language-multi", - // @ts-ignore - description: - "Enables to pick *multiple* 'language:=yes' within the mappings", - multiAnswer: true, - }, - }, - { - id: "school-language", - // @ts-ignore - description: "Enables to pick a single 'school:language=' within the mappings", - multiAnswer: true, - mappings: schoolmappings, - }, - { - id: "tactile_writing-braille", - // @ts-ignore - description: - "Enables to pick *multiple* 'tactile_writing:braille=' within the mappings", - multiAnswer: true, - mappings: brailemappings, - }, - ], - } - const dir = "./assets/layers/wikidata/" - if (!existsSync(dir)) { - mkdirSync(dir) - } - const path = dir + "wikidata.json" - writeFileSync(path, JSON.stringify(wikidataLayer, null, " ")) - console.log("Written " + path) -} - -main()