| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  | /** | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  |  * Fetches all 'modern languages' from wikidata, then exports their names in every language. | 
					
						
							|  |  |  |  * Some meta-info (e.g. RTL) is exported too | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import * as wds from "wikidata-sdk" | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  | import { Utils } from "../Utils" | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  | import ScriptUtils from "./ScriptUtils" | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  | import { existsSync, readFileSync, writeFileSync } from "fs" | 
					
						
							| 
									
										
										
										
											2022-07-11 09:14:26 +02:00
										 |  |  | import WikidataUtils from "../Utils/WikidataUtils" | 
					
						
							|  |  |  | import LanguageUtils from "../Utils/LanguageUtils" | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  | import Wikidata from "../Logic/Web/Wikidata" | 
					
						
							| 
									
										
										
										
											2022-11-14 00:46:04 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | interface value<T> { | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  |     value: T | 
					
						
							| 
									
										
										
										
											2022-11-14 00:46:04 +01:00
										 |  |  |     type: "uri" | "literal" | string | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  |     "xml:lang"?: string | 
					
						
							| 
									
										
										
										
											2022-11-14 00:46:04 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | interface LanguageSpecResult { | 
					
						
							|  |  |  |     directionalityLabel: value<string | "right-to-left" | "left-to-right"> | 
					
						
							|  |  |  |     lang: value<string> | 
					
						
							|  |  |  |     code: value<string> | 
					
						
							|  |  |  |     label: value<string> | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2022-09-08 21:40:48 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  | async function fetch(target: string) { | 
					
						
							|  |  |  |     const regular = await fetchRegularLanguages() | 
					
						
							|  |  |  |     writeFileSync(target, JSON.stringify(regular, null, "  ")) | 
					
						
							|  |  |  |     console.log("Written to " + target) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | async function fetchRegularLanguages() { | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |     console.log("Fetching languages") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const sparql = | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  |         "SELECT ?lang ?label ?code ?directionalityLabel \n" + | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |         "WHERE \n" + | 
					
						
							|  |  |  |         "{ \n" + | 
					
						
							|  |  |  |         "  ?lang wdt:P31 wd:Q1288568. \n" + // language instanceOf (p31) modern language(Q1288568)
 | 
					
						
							|  |  |  |         "  ?lang rdfs:label ?label. \n" + | 
					
						
							| 
									
										
										
										
											2022-11-02 13:47:34 +01:00
										 |  |  |         " ?lang wdt:P282 ?writing_system. \n" + | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  |         "  ?writing_system wdt:P1406 ?directionality. \n" + | 
					
						
							| 
									
										
										
										
											2022-11-02 13:47:34 +01:00
										 |  |  |         "  ?lang wdt:P424 ?code. \n" + // Wikimedia language code seems to be close to the weblate entries
 | 
					
						
							|  |  |  |         '  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } \n' + | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |         "} " | 
					
						
							|  |  |  |     const url = wds.sparqlQuery(sparql) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // request the generated URL with your favorite HTTP request library
 | 
					
						
							|  |  |  |     const result = await Utils.downloadJson(url, { "User-Agent": "MapComplete script" }) | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  |     const bindings = <LanguageSpecResult[]>result.results.bindings | 
					
						
							| 
									
										
										
										
											2022-09-08 21:40:48 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  |     const zh_hant = await fetchSpecial(18130932, "zh_Hant") | 
					
						
							|  |  |  |     const zh_hans = await fetchSpecial(13414913, "zh_Hant") | 
					
						
							|  |  |  |     const pt_br = await fetchSpecial(750553, "pt_BR") | 
					
						
							| 
									
										
										
										
											2022-11-14 00:46:04 +01:00
										 |  |  |     const punjabi = await fetchSpecial(58635, "pa_PK") | 
					
						
							|  |  |  |     const Shahmukhi = await Wikidata.LoadWikidataEntryAsync(133800) | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  |     punjabi.forEach((item) => { | 
					
						
							|  |  |  |         const neededLanguage = item.label["xml:lang"] | 
					
						
							|  |  |  |         const native = Shahmukhi.labels.get(neededLanguage) ?? Shahmukhi.labels.get("en") | 
					
						
							|  |  |  |         item.label.value = item.label.value + " (" + native + ")" | 
					
						
							|  |  |  |     }) | 
					
						
							| 
									
										
										
										
											2022-11-14 00:46:04 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-22 16:51:49 +02:00
										 |  |  |     const fil = await fetchSpecial(33298, "fil") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  |     bindings.push(...zh_hant) | 
					
						
							|  |  |  |     bindings.push(...zh_hans) | 
					
						
							|  |  |  |     bindings.push(...pt_br) | 
					
						
							| 
									
										
										
										
											2022-04-22 16:51:49 +02:00
										 |  |  |     bindings.push(...fil) | 
					
						
							| 
									
										
										
										
											2022-11-14 00:46:04 +01:00
										 |  |  |     bindings.push(...punjabi) | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return result.results.bindings | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-14 00:46:04 +01:00
										 |  |  | /** | 
					
						
							|  |  |  |  * Fetches the object as is. Sets a 'code' binding as predifined value | 
					
						
							|  |  |  |  * @param id | 
					
						
							|  |  |  |  * @param code | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  | async function fetchSpecial(id: number, code: string): Promise<LanguageSpecResult[]> { | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  |     ScriptUtils.fixUtils() | 
					
						
							|  |  |  |     console.log("Fetching languages") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  |     const lang = "  wd:Q" + id | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  |     const sparql = | 
					
						
							| 
									
										
										
										
											2022-11-14 00:46:04 +01:00
										 |  |  |         "SELECT ?label ?directionalityLabel \n" + | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  |         "WHERE \n" + | 
					
						
							|  |  |  |         "{ \n" + | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  |         lang + | 
					
						
							|  |  |  |         " rdfs:label ?label." + | 
					
						
							|  |  |  |         lang + | 
					
						
							|  |  |  |         " wdt:P282 ?writing_system. \n" + | 
					
						
							| 
									
										
										
										
											2022-11-14 00:46:04 +01:00
										 |  |  |         "  ?writing_system wdt:P1406 ?directionality. \n" + | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  |         '  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } \n' + | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  |         "} " | 
					
						
							| 
									
										
										
										
											2022-11-14 00:46:04 +01:00
										 |  |  |     console.log("Special sparql:", sparql) | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  |     const url = wds.sparqlQuery(sparql) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const result = await Utils.downloadJson(url, { "User-Agent": "MapComplete script" }) | 
					
						
							|  |  |  |     const bindings = result.results.bindings | 
					
						
							|  |  |  |     bindings.forEach((binding) => (binding["code"] = { value: code })) | 
					
						
							|  |  |  |     return bindings | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  | function getNativeList(langs: Map<string, { translations: Map<string, string> }>) { | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |     const native = {} | 
					
						
							| 
									
										
										
										
											2022-04-19 01:55:14 +02:00
										 |  |  |     const keys: string[] = Array.from(langs.keys()) | 
					
						
							|  |  |  |     keys.sort() | 
					
						
							|  |  |  |     for (const key of keys) { | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  |         const translations: Map<string, string> = langs.get(key).translations | 
					
						
							| 
									
										
										
										
											2022-07-11 09:14:26 +02:00
										 |  |  |         if (!LanguageUtils.usedLanguages.has(key)) { | 
					
						
							| 
									
										
										
										
											2022-04-19 01:55:14 +02:00
										 |  |  |             continue | 
					
						
							| 
									
										
										
										
											2022-02-25 01:50:15 +01:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |         native[key] = translations.get(key) | 
					
						
							| 
									
										
										
										
											2022-11-02 13:47:34 +01:00
										 |  |  |         if (native[key] === undefined) { | 
					
						
							|  |  |  |             console.log("No native translation found for " + key) | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2022-04-19 01:55:14 +02:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |     return native | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-06-24 16:49:03 +02:00
										 |  |  | async function getOfficialLanguagesPerCountry(): Promise<Map<string, string[]>> { | 
					
						
							|  |  |  |     const lngs = new Map<string, string[]>() | 
					
						
							|  |  |  |     const sparql = `SELECT ?country ?countryLabel ?countryCode ?language ?languageCode ?languageLabel
 | 
					
						
							|  |  |  |     WHERE | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |             ?country wdt:P31/wdt:P279* wd:Q3624078; | 
					
						
							|  |  |  |         wdt:P297 ?countryCode; | 
					
						
							|  |  |  |         wdt:P37 ?language. | 
					
						
							|  |  |  |             ?language wdt:P218 ?languageCode. | 
					
						
							|  |  |  |             SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } | 
					
						
							|  |  |  |     }`
 | 
					
						
							|  |  |  |     const url = wds.sparqlQuery(sparql) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const result = await Utils.downloadJson(url, { "User-Agent": "MapComplete script" }) | 
					
						
							|  |  |  |     const bindings: { countryCode: { value: string }; languageCode: { value: string } }[] = | 
					
						
							|  |  |  |         result.results.bindings | 
					
						
							|  |  |  |     for (const binding of bindings) { | 
					
						
							|  |  |  |         const countryCode = binding.countryCode.value | 
					
						
							|  |  |  |         const language = binding.languageCode.value | 
					
						
							|  |  |  |         if (lngs.get(countryCode) === undefined) { | 
					
						
							|  |  |  |             lngs.set(countryCode, []) | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         lngs.get(countryCode).push(language) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return lngs | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-02 13:47:34 +01:00
										 |  |  | async function getOfficialLanguagesPerCountryCached( | 
					
						
							|  |  |  |     wipeCache: boolean | 
					
						
							|  |  |  | ): Promise<Record<string /*Country code*/, string[] /*Language codes*/>> { | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  |     let officialLanguages: Record<string, string[]> | 
					
						
							|  |  |  |     const officialLanguagesPath = "./assets/language_in_country.json" | 
					
						
							|  |  |  |     if (existsSync("./assets/languages_in_country.json") && !wipeCache) { | 
					
						
							|  |  |  |         officialLanguages = JSON.parse(readFileSync(officialLanguagesPath, "utf8")) | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         officialLanguages = Utils.MapToObj(await getOfficialLanguagesPerCountry(), (t) => t) | 
					
						
							|  |  |  |         writeFileSync(officialLanguagesPath, JSON.stringify(officialLanguages, null, "  ")) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return officialLanguages | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  | async function main(wipeCache = false) { | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |     const cacheFile = "./assets/generated/languages-wd.json" | 
					
						
							|  |  |  |     if (wipeCache || !existsSync(cacheFile)) { | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  |         console.log("Refreshing cache") | 
					
						
							|  |  |  |         await fetch(cacheFile) | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |     } else { | 
					
						
							|  |  |  |         console.log("Reusing the cached file") | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-15 23:28:02 +01:00
										 |  |  |     const data = JSON.parse(readFileSync(cacheFile, { encoding: "utf8" })) | 
					
						
							| 
									
										
										
										
											2022-07-11 09:14:26 +02:00
										 |  |  |     const perId = WikidataUtils.extractLanguageData(data, WikidataUtils.languageRemapping) | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |     const nativeList = getNativeList(perId) | 
					
						
							| 
									
										
										
										
											2022-02-25 01:15:16 +01:00
										 |  |  |     writeFileSync("./assets/language_native.json", JSON.stringify(nativeList, null, "  ")) | 
					
						
							| 
									
										
										
										
											2022-11-02 13:47:34 +01:00
										 |  |  |     const languagesPerCountry = Utils.TransposeMap( | 
					
						
							|  |  |  |         await getOfficialLanguagesPerCountryCached(wipeCache) | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2022-06-24 16:49:03 +02:00
										 |  |  |     const translations = Utils.MapToObj(perId, (value, key) => { | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  |         // We keep all language codes in the list...
 | 
					
						
							| 
									
										
										
										
											2022-11-02 13:47:34 +01:00
										 |  |  |         const translatedForId: Record<string, string | { countries?: string[]; dir: string[] }> = | 
					
						
							|  |  |  |             Utils.MapToObj(value.translations, (v, k) => { | 
					
						
							|  |  |  |                 if (!LanguageUtils.usedLanguages.has(k)) { | 
					
						
							|  |  |  |                     // ... but don't keep translations if we don't have a displayed language for them
 | 
					
						
							|  |  |  |                     return undefined | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 return v | 
					
						
							|  |  |  |             }) | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         translatedForId["_meta"] = { | 
					
						
							| 
									
										
										
										
											2022-11-02 13:47:34 +01:00
										 |  |  |             countries: Utils.Dedup(languagesPerCountry[key]), | 
					
						
							|  |  |  |             dir: value.directionality, | 
					
						
							| 
									
										
										
										
											2022-10-29 03:01:24 +02:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return translatedForId | 
					
						
							| 
									
										
										
										
											2022-02-25 01:50:15 +01:00
										 |  |  |     }) | 
					
						
							| 
									
										
										
										
											2022-09-08 21:40:48 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-24 02:33:20 +01:00
										 |  |  |     writeFileSync("./assets/language_translations.json", JSON.stringify(translations, null, "  ")) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-02-01 02:03:25 +01:00
										 |  |  | const forceRefresh = process.argv[2] === "--force-refresh" | 
					
						
							| 
									
										
										
										
											2022-06-24 16:49:03 +02:00
										 |  |  | ScriptUtils.fixUtils() | 
					
						
							|  |  |  | main(forceRefresh).then(() => console.log("Done!")) |