forked from MapComplete/MapComplete
Support for lexemes, decent etymology layer and theme with rudimentary icon
This commit is contained in:
parent
9726d85ad7
commit
9faac532b5
18 changed files with 611 additions and 270 deletions
|
@ -2,27 +2,33 @@ import {Utils} from "../../Utils";
|
|||
import {UIEventSource} from "../UIEventSource";
|
||||
|
||||
|
||||
export interface WikidataResponse {
|
||||
export class WikidataResponse {
|
||||
public readonly id: string
|
||||
public readonly labels: Map<string, string>
|
||||
public readonly descriptions: Map<string, string>
|
||||
public readonly claims: Map<string, Set<string>>
|
||||
public readonly wikisites: Map<string, string>
|
||||
public readonly commons: string
|
||||
|
||||
id: string,
|
||||
labels: Map<string, string>,
|
||||
descriptions: Map<string, string>,
|
||||
claims: Map<string, Set<string>>,
|
||||
wikisites: Map<string, string>
|
||||
commons: string
|
||||
}
|
||||
constructor(
|
||||
id: string,
|
||||
labels: Map<string, string>,
|
||||
descriptions: Map<string, string>,
|
||||
claims: Map<string, Set<string>>,
|
||||
wikisites: Map<string, string>,
|
||||
commons: string
|
||||
) {
|
||||
|
||||
export interface WikidataSearchoptions {
|
||||
lang?: "en" | string,
|
||||
maxCount?: 20 | number
|
||||
}
|
||||
this.id = id
|
||||
this.labels = labels
|
||||
this.descriptions = descriptions
|
||||
this.claims = claims
|
||||
this.wikisites = wikisites
|
||||
this.commons = commons
|
||||
|
||||
/**
|
||||
* Utility functions around wikidata
|
||||
*/
|
||||
export default class Wikidata {
|
||||
}
|
||||
|
||||
private static ParseResponse(entity: any): WikidataResponse {
|
||||
public static fromJson(entity: any): WikidataResponse {
|
||||
const labels = new Map<string, string>()
|
||||
for (const labelName in entity.labels) {
|
||||
// The labelname is the language code
|
||||
|
@ -42,163 +48,252 @@ export default class Wikidata {
|
|||
const title = entity.sitelinks[labelName].title
|
||||
sitelinks.set(language, title)
|
||||
}
|
||||
|
||||
|
||||
const commons = sitelinks.get("commons")
|
||||
sitelinks.delete("commons")
|
||||
const claims = WikidataResponse.extractClaims(entity.claims);
|
||||
return new WikidataResponse(
|
||||
entity.id,
|
||||
labels,
|
||||
descr,
|
||||
claims,
|
||||
sitelinks,
|
||||
commons
|
||||
)
|
||||
|
||||
|
||||
}
|
||||
|
||||
static extractClaims(claimsJson: any): Map<string, Set<string>> {
|
||||
const claims = new Map<string, Set<string>>();
|
||||
for (const claimId in entity.claims) {
|
||||
for (const claimId in claimsJson) {
|
||||
|
||||
const claimsList: any[] = entity.claims[claimId]
|
||||
const claimsList: any[] = claimsJson[claimId]
|
||||
const values = new Set<string>()
|
||||
for (const claim of claimsList) {
|
||||
let value = claim.mainsnak?.datavalue?.value;
|
||||
if (value === undefined) {
|
||||
continue;
|
||||
}
|
||||
if(value.id !== undefined){
|
||||
if (value.id !== undefined) {
|
||||
value = value.id
|
||||
}
|
||||
values.add(value)
|
||||
}
|
||||
claims.set(claimId, values);
|
||||
}
|
||||
return claims
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
claims: claims,
|
||||
descriptions: descr,
|
||||
id: entity.id,
|
||||
labels: labels,
|
||||
wikisites: sitelinks,
|
||||
commons: commons
|
||||
export class WikidataLexeme {
|
||||
id: string
|
||||
lemma: Map<string, string>
|
||||
senses: Map<string, string>
|
||||
claims: Map<string, Set<string>>
|
||||
|
||||
|
||||
constructor(json) {
|
||||
this.id = json.id
|
||||
this.claims = WikidataResponse.extractClaims(json.claims)
|
||||
this.lemma = new Map<string, string>()
|
||||
for (const language in json.lemmas) {
|
||||
this.lemma.set(language, json.lemmas[language].value)
|
||||
}
|
||||
|
||||
this.senses = new Map<string, string>()
|
||||
|
||||
for (const sense of json.senses) {
|
||||
const glosses = sense.glosses
|
||||
for (const language in glosses) {
|
||||
let previousSenses = this.senses.get(language)
|
||||
if(previousSenses === undefined){
|
||||
previousSenses = ""
|
||||
}else{
|
||||
previousSenses = previousSenses+"; "
|
||||
}
|
||||
this.senses.set(language, previousSenses + glosses[language].value ?? "")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly _cache = new Map<number, UIEventSource<{success: WikidataResponse} | {error: any}>>()
|
||||
public static LoadWikidataEntry(value: string | number): UIEventSource<{success: WikidataResponse} | {error: any}> {
|
||||
asWikidataResponse() {
|
||||
return new WikidataResponse(
|
||||
this.id,
|
||||
this.lemma,
|
||||
this.senses,
|
||||
this.claims,
|
||||
new Map(),
|
||||
undefined
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export interface WikidataSearchoptions {
|
||||
lang?: "en" | string,
|
||||
maxCount?: 20 | number
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility functions around wikidata
|
||||
*/
|
||||
export default class Wikidata {
|
||||
|
||||
private static readonly _identifierPrefixes = ["Q", "L"].map(str => str.toLowerCase())
|
||||
private static readonly _prefixesToRemove = ["https://www.wikidata.org/wiki/Lexeme:", "https://www.wikidata.org/wiki/", "Lexeme:"].map(str => str.toLowerCase())
|
||||
|
||||
|
||||
private static readonly _cache = new Map<string, UIEventSource<{ success: WikidataResponse } | { error: any }>>()
|
||||
|
||||
public static LoadWikidataEntry(value: string | number): UIEventSource<{ success: WikidataResponse } | { error: any }> {
|
||||
const key = this.ExtractKey(value)
|
||||
const cached = Wikidata._cache.get(key)
|
||||
if(cached !== undefined){
|
||||
if (cached !== undefined) {
|
||||
return cached
|
||||
}
|
||||
const src = UIEventSource.FromPromiseWithErr(Wikidata.LoadWikidataEntryAsync(key))
|
||||
Wikidata._cache.set(key, src)
|
||||
return src;
|
||||
}
|
||||
|
||||
|
||||
public static async search(
|
||||
search: string,
|
||||
options?:WikidataSearchoptions,
|
||||
page = 1
|
||||
): Promise<{
|
||||
search: string,
|
||||
options?: WikidataSearchoptions,
|
||||
page = 1
|
||||
): Promise<{
|
||||
id: string,
|
||||
label: string,
|
||||
description: string
|
||||
}[]> {
|
||||
const maxCount = options?.maxCount ?? 20
|
||||
let pageCount = Math.min(maxCount,50)
|
||||
const start = page * pageCount - pageCount;
|
||||
const lang = (options?.lang ?? "en")
|
||||
const url =
|
||||
"https://www.wikidata.org/w/api.php?action=wbsearchentities&search=" +
|
||||
search +
|
||||
"&language=" +
|
||||
lang +
|
||||
"&limit="+pageCount+"&continue=" +
|
||||
start +
|
||||
"&format=json&uselang=" +
|
||||
lang +
|
||||
"&type=item&origin=*"+
|
||||
"&props=" ;// props= removes some unused values in the result
|
||||
const response = await Utils.downloadJson(url)
|
||||
|
||||
const result : any[] = response.search
|
||||
|
||||
if(result.length < pageCount){
|
||||
// No next page
|
||||
return result;
|
||||
}
|
||||
if(result.length < maxCount){
|
||||
const newOptions = {...options}
|
||||
newOptions.maxCount = maxCount - result.length
|
||||
result.push(...await Wikidata.search(search,
|
||||
newOptions,
|
||||
page + 1
|
||||
))
|
||||
}
|
||||
|
||||
const maxCount = options?.maxCount ?? 20
|
||||
let pageCount = Math.min(maxCount, 50)
|
||||
const start = page * pageCount - pageCount;
|
||||
const lang = (options?.lang ?? "en")
|
||||
const url =
|
||||
"https://www.wikidata.org/w/api.php?action=wbsearchentities&search=" +
|
||||
search +
|
||||
"&language=" +
|
||||
lang +
|
||||
"&limit=" + pageCount + "&continue=" +
|
||||
start +
|
||||
"&format=json&uselang=" +
|
||||
lang +
|
||||
"&type=item&origin=*" +
|
||||
"&props=";// props= removes some unused values in the result
|
||||
const response = await Utils.downloadJson(url)
|
||||
|
||||
const result: any[] = response.search
|
||||
|
||||
if (result.length < pageCount) {
|
||||
// No next page
|
||||
return result;
|
||||
}
|
||||
if (result.length < maxCount) {
|
||||
const newOptions = {...options}
|
||||
newOptions.maxCount = maxCount - result.length
|
||||
result.push(...await Wikidata.search(search,
|
||||
newOptions,
|
||||
page + 1
|
||||
))
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
public static async searchAndFetch(
|
||||
search: string,
|
||||
options?:WikidataSearchoptions
|
||||
) : Promise<WikidataResponse[]>
|
||||
{
|
||||
search: string,
|
||||
options?: WikidataSearchoptions
|
||||
): Promise<WikidataResponse[]> {
|
||||
const maxCount = options.maxCount
|
||||
// We provide some padding to filter away invalid values
|
||||
options.maxCount = Math.ceil((options.maxCount ?? 20) * 1.5)
|
||||
const searchResults = await Wikidata.search(search, options)
|
||||
const maybeResponses = await Promise.all(searchResults.map(async r => {
|
||||
try{
|
||||
return await Wikidata.LoadWikidataEntry(r.id).AsPromise()
|
||||
}catch(e){
|
||||
console.error(e)
|
||||
return undefined;
|
||||
}
|
||||
const maybeResponses = await Promise.all(searchResults.map(async r => {
|
||||
try {
|
||||
return await Wikidata.LoadWikidataEntry(r.id).AsPromise()
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
return undefined;
|
||||
}
|
||||
}))
|
||||
const responses = maybeResponses
|
||||
.map(r => <WikidataResponse> r["success"])
|
||||
.map(r => <WikidataResponse>r["success"])
|
||||
.filter(wd => {
|
||||
if(wd === undefined){
|
||||
return false;
|
||||
}
|
||||
if(wd.claims.get("P31" /*Instance of*/)?.has("Q4167410"/* Wikimedia Disambiguation page*/)){
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})
|
||||
if (wd === undefined) {
|
||||
return false;
|
||||
}
|
||||
if (wd.claims.get("P31" /*Instance of*/)?.has("Q4167410"/* Wikimedia Disambiguation page*/)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})
|
||||
responses.splice(maxCount, responses.length - maxCount)
|
||||
return responses
|
||||
return responses
|
||||
}
|
||||
|
||||
private static ExtractKey(value: string | number) : number{
|
||||
|
||||
public static ExtractKey(value: string | number): string {
|
||||
if (typeof value === "number") {
|
||||
return value
|
||||
return "Q" + value
|
||||
}
|
||||
const wikidataUrl = "https://www.wikidata.org/wiki/"
|
||||
if (value.startsWith(wikidataUrl)) {
|
||||
value = value.substring(wikidataUrl.length)
|
||||
if (value === undefined) {
|
||||
console.error("ExtractKey: value is undefined")
|
||||
return undefined;
|
||||
}
|
||||
if (value.startsWith("http")) {
|
||||
value = value.trim().toLowerCase()
|
||||
|
||||
for (const prefix of Wikidata._prefixesToRemove) {
|
||||
if (value.startsWith(prefix)) {
|
||||
value = value.substring(prefix.length)
|
||||
}
|
||||
}
|
||||
|
||||
if (value.startsWith("http") && value === "") {
|
||||
// Probably some random link in the image field - we skip it
|
||||
return undefined
|
||||
}
|
||||
if (value.startsWith("Q")) {
|
||||
value = value.substring(1)
|
||||
|
||||
for (const identifierPrefix of Wikidata._identifierPrefixes) {
|
||||
if (value.startsWith(identifierPrefix)) {
|
||||
const trimmed = value.substring(identifierPrefix.length);
|
||||
if(trimmed === ""){
|
||||
return undefined
|
||||
}
|
||||
const n = Number(trimmed)
|
||||
if (isNaN(n)) {
|
||||
return undefined
|
||||
}
|
||||
return value.toUpperCase();
|
||||
}
|
||||
}
|
||||
const n = Number(value)
|
||||
if(isNaN(n)){
|
||||
return undefined
|
||||
|
||||
if (value !== "" && !isNaN(Number(value))) {
|
||||
return "Q" + value
|
||||
}
|
||||
return n;
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Loads a wikidata page
|
||||
* @returns the entity of the given value
|
||||
*/
|
||||
public static async LoadWikidataEntryAsync(value: string | number): Promise<WikidataResponse> {
|
||||
const id = Wikidata.ExtractKey(value)
|
||||
if(id === undefined){
|
||||
if (id === undefined) {
|
||||
console.warn("Could not extract a wikidata entry from", value)
|
||||
return undefined;
|
||||
throw "Could not extract a wikidata entry from " + value
|
||||
}
|
||||
|
||||
const url = "https://www.wikidata.org/wiki/Special:EntityData/Q" + id + ".json";
|
||||
const response = await Utils.downloadJson(url)
|
||||
return Wikidata.ParseResponse(response.entities["Q" + id])
|
||||
|
||||
const url = "https://www.wikidata.org/wiki/Special:EntityData/" + id + ".json";
|
||||
const response = (await Utils.downloadJson(url)).entities[id]
|
||||
|
||||
if (id.startsWith("L")) {
|
||||
// This is a lexeme:
|
||||
return new WikidataLexeme(response).asWikidataResponse()
|
||||
}
|
||||
|
||||
return WikidataResponse.fromJson(response)
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue