Refactoring: move all code files into a src directory

This commit is contained in:
Pieter Vander Vennet 2023-07-09 13:09:05 +02:00
parent de99f56ca8
commit e75d2789d2
389 changed files with 0 additions and 12 deletions

52
src/Logic/Web/Hash.ts Normal file
View file

@ -0,0 +1,52 @@
import { UIEventSource } from "../UIEventSource"
import { Utils } from "../../Utils"
/**
* Wrapper around the hash to create an UIEventSource from it
*/
export default class Hash {
public static hash: UIEventSource<string> = Hash.Get()
/**
* Gets the current string, including the pound sign if there is any
* @constructor
*/
public static Current(): string {
if (Hash.hash.data === undefined || Hash.hash.data === "") {
return ""
} else {
return "#" + Hash.hash.data
}
}
private static Get(): UIEventSource<string> {
if (Utils.runningFromConsole) {
return new UIEventSource<string>(undefined)
}
const hash = new UIEventSource<string>(window.location.hash.substr(1))
hash.addCallback((h) => {
if (h === "undefined") {
console.warn("Got a literal 'undefined' as hash, ignoring")
h = undefined
}
if (h === undefined || h === "") {
window.location.hash = ""
return
}
history.pushState({}, "")
window.location.hash = "#" + h
})
window.onhashchange = () => {
let newValue = window.location.hash.substr(1)
if (newValue === "") {
newValue = undefined
}
hash.setData(newValue)
}
return hash
}
}

View file

@ -0,0 +1,49 @@
import { UIEventSource } from "../UIEventSource"
import * as idb from "idb-keyval"
import { Utils } from "../../Utils"
/**
* UIEventsource-wrapper around indexedDB key-value
*/
export class IdbLocalStorage {
private static readonly _sourceCache: Record<string, UIEventSource<any>> = {}
public static Get<T>(
key: string,
options?: { defaultValue?: T; whenLoaded?: (t: T | null) => void }
): UIEventSource<T> {
if (IdbLocalStorage._sourceCache[key] !== undefined) {
return IdbLocalStorage._sourceCache[key]
}
const src = new UIEventSource<T>(options?.defaultValue, "idb-local-storage:" + key)
if (Utils.runningFromConsole) {
return src
}
src.addCallback((v) => idb.set(key, v))
idb.get(key)
.then((v) => {
src.setData(v ?? options?.defaultValue)
if (options?.whenLoaded !== undefined) {
options?.whenLoaded(v)
}
})
.catch((err) => {
console.warn("Loading from local storage failed due to", err)
if (options?.whenLoaded !== undefined) {
options?.whenLoaded(null)
}
})
IdbLocalStorage._sourceCache[key] = src
return src
}
public static SetDirectly(key: string, value: any): Promise<void> {
const copy = Utils.Clone(value)
return idb.set(key, copy)
}
static GetDirectly(key: string): Promise<any> {
return idb.get(key)
}
}

View file

@ -0,0 +1,47 @@
import { UIEventSource } from "../UIEventSource"
import { Utils } from "../../Utils"
/**
* Fetches data from random data sources, used in the metatagging
*/
export default class LiveQueryHandler {
private static neededShorthands = {} // url -> (shorthand:paths)[]
public static FetchLiveData(
url: string,
shorthands: string[]
): UIEventSource<any /* string -> string */> {
const shorthandsSet: string[] = LiveQueryHandler.neededShorthands[url] ?? []
for (const shorthand of shorthands) {
if (shorthandsSet.indexOf(shorthand) < 0) {
shorthandsSet.push(shorthand)
}
}
LiveQueryHandler.neededShorthands[url] = shorthandsSet
if (LiveQueryHandler[url] === undefined) {
const source = new UIEventSource({})
LiveQueryHandler[url] = source
console.log("Fetching live data from a third-party (unknown) API:", url)
Utils.downloadJson(url).then((data) => {
for (const shorthandDescription of shorthandsSet) {
const descr = shorthandDescription.trim().split(":")
const shorthand = descr[0]
const path = descr[1]
const parts = path.split(".")
let trail = data
for (const part of parts) {
if (trail !== undefined) {
trail = trail[part]
}
}
source.data[shorthand] = trail
}
source.ping()
})
}
return LiveQueryHandler[url]
}
}

View file

@ -0,0 +1,43 @@
import { UIEventSource } from "../UIEventSource"
/**
* UIEventsource-wrapper around localStorage
*/
export class LocalStorageSource {
static GetParsed<T>(key: string, defaultValue: T): UIEventSource<T> {
return LocalStorageSource.Get(key).sync(
(str) => {
if (str === undefined) {
return defaultValue
}
try {
return JSON.parse(str)
} catch {
return defaultValue
}
},
[],
(value) => JSON.stringify(value)
)
}
static Get(key: string, defaultValue: string = undefined): UIEventSource<string> {
try {
const saved = localStorage.getItem(key)
const source = new UIEventSource<string>(saved ?? defaultValue, "localstorage:" + key)
source.addCallback((data) => {
try {
localStorage.setItem(key, data)
} catch (e) {
// Probably exceeded the quota with this item!
// Lets nuke everything
localStorage.clear()
}
})
return source
} catch (e) {
return new UIEventSource<string>(defaultValue)
}
}
}

View file

@ -0,0 +1,236 @@
import { ImmutableStore, Store, UIEventSource } from "../UIEventSource"
import { MangroveReviews, Review } from "mangrove-reviews-typescript"
import { Utils } from "../../Utils"
import { Feature, Position } from "geojson"
import { GeoOperations } from "../GeoOperations"
export class MangroveIdentity {
public readonly keypair: Store<CryptoKeyPair>
public readonly key_id: Store<string>
constructor(mangroveIdentity: UIEventSource<string>) {
const key_id = new UIEventSource<string>(undefined)
this.key_id = key_id
const keypairEventSource = new UIEventSource<CryptoKeyPair>(undefined)
this.keypair = keypairEventSource
mangroveIdentity.addCallbackAndRunD(async (data) => {
if (data === "") {
return
}
const keypair = await MangroveReviews.jwkToKeypair(JSON.parse(data))
keypairEventSource.setData(keypair)
const pem = await MangroveReviews.publicToPem(keypair.publicKey)
key_id.setData(pem)
})
try {
if (!Utils.runningFromConsole && (mangroveIdentity.data ?? "") === "") {
MangroveIdentity.CreateIdentity(mangroveIdentity).then((_) => {})
}
} catch (e) {
console.error("Could not create identity: ", e)
}
}
/**
* Creates an identity if none exists already.
* Is written into the UIEventsource, which was passed into the constructor
* @constructor
*/
private static async CreateIdentity(identity: UIEventSource<string>): Promise<void> {
const keypair = await MangroveReviews.generateKeypair()
const jwk = await MangroveReviews.keypairToJwk(keypair)
if ((identity.data ?? "") !== "") {
// Identity has been loaded via osmPreferences by now - we don't overwrite
return
}
identity.setData(JSON.stringify(jwk))
}
}
/**
* Tracks all reviews of a given feature, allows to create a new review
*/
export default class FeatureReviews {
private static readonly _featureReviewsCache: Record<string, FeatureReviews> = {}
public readonly subjectUri: Store<string>
private readonly _reviews: UIEventSource<(Review & { madeByLoggedInUser: Store<boolean> })[]> =
new UIEventSource([])
public readonly reviews: Store<(Review & { madeByLoggedInUser: Store<boolean> })[]> =
this._reviews
private readonly _lat: number
private readonly _lon: number
private readonly _uncertainty: number
private readonly _name: Store<string>
private readonly _identity: MangroveIdentity
private constructor(
feature: Feature,
tagsSource: UIEventSource<Record<string, string>>,
mangroveIdentity?: MangroveIdentity,
options?: {
nameKey?: "name" | string
fallbackName?: string
uncertaintyRadius?: number
}
) {
const centerLonLat = GeoOperations.centerpointCoordinates(feature)
;[this._lon, this._lat] = centerLonLat
this._identity =
mangroveIdentity ?? new MangroveIdentity(new UIEventSource<string>(undefined))
const nameKey = options?.nameKey ?? "name"
if (feature.geometry.type === "Point") {
this._uncertainty = options?.uncertaintyRadius ?? 10
} else {
let coordss: Position[][]
if (feature.geometry.type === "LineString") {
coordss = [feature.geometry.coordinates]
} else if (
feature.geometry.type === "MultiLineString" ||
feature.geometry.type === "Polygon"
) {
coordss = feature.geometry.coordinates
}
let maxDistance = 0
for (const coords of coordss) {
for (const coord of coords) {
maxDistance = Math.max(
maxDistance,
GeoOperations.distanceBetween(centerLonLat, coord)
)
}
}
this._uncertainty = options?.uncertaintyRadius ?? maxDistance
}
this._name = tagsSource.map((tags) => tags[nameKey] ?? options?.fallbackName)
this.subjectUri = this.ConstructSubjectUri()
const self = this
this.subjectUri.addCallbackAndRunD(async (sub) => {
const reviews = await MangroveReviews.getReviews({ sub })
self.addReviews(reviews.reviews)
})
/* We also construct all subject queries _without_ encoding the name to work around a previous bug
* See https://github.com/giggls/opencampsitemap/issues/30
*/
this.ConstructSubjectUri(true).addCallbackAndRunD(async (sub) => {
try {
const reviews = await MangroveReviews.getReviews({ sub })
self.addReviews(reviews.reviews)
} catch (e) {
console.log("Could not fetch reviews for partially incorrect query ", sub)
}
})
}
/**
* Construct a featureReviewsFor or fetches it from the cache
*/
public static construct(
feature: Feature,
tagsSource: UIEventSource<Record<string, string>>,
mangroveIdentity?: MangroveIdentity,
options?: {
nameKey?: "name" | string
fallbackName?: string
uncertaintyRadius?: number
}
) {
const key = feature.properties.id
const cached = FeatureReviews._featureReviewsCache[key]
if (cached !== undefined) {
return cached
}
const featureReviews = new FeatureReviews(feature, tagsSource, mangroveIdentity, options)
FeatureReviews._featureReviewsCache[key] = featureReviews
return featureReviews
}
/**
* The given review is uploaded to mangrove.reviews and added to the list of known reviews
*/
public async createReview(review: Omit<Review, "sub">): Promise<void> {
const r: Review = {
sub: this.subjectUri.data,
...review,
}
const keypair: CryptoKeyPair = this._identity.keypair.data
console.log(r)
const jwt = await MangroveReviews.signReview(keypair, r)
console.log("Signed:", jwt)
await MangroveReviews.submitReview(jwt)
this._reviews.data.push({ ...r, madeByLoggedInUser: new ImmutableStore(true) })
this._reviews.ping()
}
/**
* Adds given reviews to the 'reviews'-UI-eventsource
* @param reviews
* @private
*/
private addReviews(reviews: { payload: Review; kid: string }[]) {
const self = this
const alreadyKnown = new Set(self._reviews.data.map((r) => r.rating + " " + r.opinion))
let hasNew = false
for (const reviewData of reviews) {
const review = reviewData.payload
try {
const url = new URL(review.sub)
console.log("URL is", url)
if (url.protocol === "geo:") {
const coordinate = <[number, number]>(
url.pathname.split(",").map((n) => Number(n))
)
const distance = GeoOperations.distanceBetween(
[this._lat, this._lon],
coordinate
)
if (distance > this._uncertainty) {
continue
}
}
} catch (e) {
console.warn(e)
}
const key = review.rating + " " + review.opinion
if (alreadyKnown.has(key)) {
continue
}
self._reviews.data.push({
...review,
madeByLoggedInUser: this._identity.key_id.map((user_key_id) => {
return reviewData.kid === user_key_id
}),
})
hasNew = true
}
if (hasNew) {
self._reviews.ping()
}
}
/**
* Gets an URI which represents the item in a mangrove-compatible way
*
* See https://mangrove.reviews/standard#mangrove-core-uri-schemes
* @constructor
*/
private ConstructSubjectUri(dontEncodeName: boolean = false): Store<string> {
// https://www.rfc-editor.org/rfc/rfc5870#section-3.4.2
// `u` stands for `uncertainty`, https://www.rfc-editor.org/rfc/rfc5870#section-3.4.3
const self = this
return this._name.map(function (name) {
let uri = `geo:${self._lat},${self._lon}?u=${self._uncertainty}`
if (name) {
uri += "&q=" + (dontEncodeName ? name : encodeURIComponent(name))
}
return uri
})
}
}

1020
src/Logic/Web/PlantNet.ts Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,134 @@
/**
* Wraps the query parameters into UIEventSources
*/
import { UIEventSource } from "../UIEventSource"
import Hash from "./Hash"
import { Utils } from "../../Utils"
export class QueryParameters {
static defaults: Record<string, string> = {}
static documentation: Map<string, string> = new Map<string, string>()
private static order: string[] = ["layout", "test", "z", "lat", "lon"]
protected static readonly _wasInitialized: Set<string> = new Set()
protected static readonly knownSources: Record<string, UIEventSource<string>> = {}
private static initialized = false
public static GetQueryParameter(
key: string,
deflt: string,
documentation?: string
): UIEventSource<string> {
if (!this.initialized) {
this.init()
}
QueryParameters.documentation.set(key, documentation)
if (deflt !== undefined) {
QueryParameters.defaults[key] = deflt
}
if (QueryParameters.knownSources[key] !== undefined) {
return QueryParameters.knownSources[key]
}
QueryParameters.addOrder(key)
const source = new UIEventSource<string>(deflt, "&" + key)
QueryParameters.knownSources[key] = source
source.addCallback(() => QueryParameters.Serialize())
return source
}
public static SetDefaultFor(key: string, value: string) {
if (QueryParameters.defaults[key] === value) {
return
}
QueryParameters.defaults[key] = value
QueryParameters.Serialize()
}
public static GetBooleanQueryParameter(
key: string,
deflt: boolean,
documentation?: string
): UIEventSource<boolean> {
return UIEventSource.asBoolean(
QueryParameters.GetQueryParameter(key, "" + deflt, documentation)
)
}
public static wasInitialized(key: string): boolean {
return QueryParameters._wasInitialized.has(key)
}
private static addOrder(key) {
if (this.order.indexOf(key) < 0) {
this.order.push(key)
}
}
private static init() {
if (this.initialized) {
return
}
this.initialized = true
if (Utils.runningFromConsole) {
return
}
if (window?.location?.search) {
const params = window.location.search.substr(1).split("&")
for (const param of params) {
const kv = param.split("=")
const key = decodeURIComponent(kv[0])
QueryParameters.addOrder(key)
QueryParameters._wasInitialized.add(key)
const v = decodeURIComponent(kv[1])
const source = new UIEventSource<string>(v)
source.addCallback(() => QueryParameters.Serialize())
QueryParameters.knownSources[key] = source
}
}
}
/**
* Set the query parameters of the page location
* @constructor
* @private
*/
private static Serialize() {
const parts = []
for (const key of QueryParameters.order) {
if (QueryParameters.knownSources[key]?.data === undefined) {
continue
}
if (QueryParameters.knownSources[key].data === "undefined") {
continue
}
if (QueryParameters.knownSources[key].data === QueryParameters.defaults[key]) {
continue
}
parts.push(
encodeURIComponent(key) +
"=" +
encodeURIComponent(QueryParameters.knownSources[key].data)
)
}
if (!Utils.runningFromConsole) {
// Don't pollute the history every time a parameter changes
try {
history.replaceState(null, "", "?" + parts.join("&") + Hash.Current())
} catch (e) {
console.error(e)
}
}
}
static ClearAll() {
for (const name in QueryParameters.knownSources) {
QueryParameters.knownSources[name].setData(undefined)
}
QueryParameters._wasInitialized.clear()
QueryParameters.order = []
}
}

View file

@ -0,0 +1,173 @@
import ThemeViewState from "../../Models/ThemeViewState"
import Hash from "./Hash"
export default class ThemeViewStateHashActor {
private readonly _state: ThemeViewState
/**
* Converts the hash to the appropriate themeview state and, vice versa, sets the hash.
*
* As the navigator-back-button changes the hash first, this class thus also handles the 'back'-button events.
*
* Note that there is no "real" way to intercept the back button, we can only detect the removal of the hash.
* As such, we use a change in the hash to close the appropriate windows
*
* @param state
*/
constructor(state: ThemeViewState) {
this._state = state
// First of all, try to recover the selected element
if (Hash.hash.data) {
const hash = Hash.hash.data
this.loadStateFromHash(hash)
Hash.hash.setData(hash) // reapply the previous hash
state.indexedFeatures.featuresById.addCallbackAndRunD((_) => {
let unregister = this.loadSelectedElementFromHash(hash)
// once that we have found a matching element, we can be sure the indexedFeaturesource was popuplated and that the job is done
return unregister
})
}
// Register a hash change listener to correctly handle the back button
Hash.hash.addCallback((hash) => {
if (!!hash) {
// There is still a hash
// We _only_ have to (at most) close the overlays in this case
const parts = hash.split(";")
if (parts.indexOf("background") < 0) {
state.guistate.backgroundLayerSelectionIsOpened.setData(false)
}
this.loadSelectedElementFromHash(hash)
} else {
this.back()
}
})
// At last, register callbacks on the state to update the hash when they change.
// Note: these should use 'addCallback', not 'addCallbackAndRun'
state.selectedElement.addCallback((_) => this.setHash())
state.guistate.allToggles.forEach(({ toggle, submenu }) => {
submenu?.addCallback((_) => this.setHash())
toggle.addCallback((_) => this.setHash())
})
// When all is done, set the hash. This must happen last to give the code above correct info
this.setHash()
}
/**
* Selects the appropriate element
* Returns true if this method can be unregistered for the first run
* @param hash
* @private
*/
private loadSelectedElementFromHash(hash: string): boolean {
const state = this._state
const selectedElement = state.selectedElement
// state.indexedFeatures.featuresById.stabilized(250)
hash = hash.split(";")[0] // The 'selectedElement' is always the _first_ item in the hash (if any)
// Set the hash based on the selected element...
// ... search and select an element based on the hash
if (selectedElement.data?.properties?.id === hash) {
// We already have the correct hash
return true
}
const found = state.indexedFeatures.featuresById.data?.get(hash)
if (!found) {
return false
}
if (found.properties.id === "last_click") {
return true
}
const layer = this._state.layout.getMatchingLayer(found.properties)
console.log(
"Setting selected element based on hash",
hash,
"; found",
found,
"got matching layer",
layer.id,
""
)
selectedElement.setData(found)
state.selectedLayer.setData(layer)
return true
}
private loadStateFromHash(hash: string) {
const state = this._state
const parts = hash.split(";")
outer: for (const { toggle, name, showOverOthers, submenu } of state.guistate.allToggles) {
for (const part of parts) {
if (part === name) {
toggle.setData(true)
continue outer
}
if (part.indexOf(":") < 0) {
continue
}
const [main, submenuValue] = part.split(":")
if (part !== main) {
continue
}
toggle.setData(true)
submenu?.setData(submenuValue)
continue outer
}
// If we arrive here, the loop above has not found any match
toggle.setData(false)
}
}
private setHash() {
const s = this._state
let h = ""
for (const { toggle, showOverOthers, name, submenu } of s.guistate.allToggles) {
if (showOverOthers || !toggle.data) {
continue
}
h = name
if (submenu?.data) {
h += ":" + submenu.data
}
}
if (s.selectedElement.data !== undefined) {
h = s.selectedElement.data.properties.id
}
for (const { toggle, showOverOthers, name, submenu } of s.guistate.allToggles) {
if (!showOverOthers || !toggle.data) {
continue
}
if (h) {
h += ";" + name
} else {
h = name
}
if (submenu?.data) {
h += ":" + submenu.data
}
}
Hash.hash.setData(h)
}
private back() {
console.log("Got a back event")
const state = this._state
// history.pushState(null, null, window.location.pathname);
if (state.selectedElement.data) {
state.selectedElement.setData(undefined)
return
}
if (state.guistate.closeAll()) {
return
}
}
}

426
src/Logic/Web/Wikidata.ts Normal file
View file

@ -0,0 +1,426 @@
import { Utils } from "../../Utils"
import { Store, UIEventSource } from "../UIEventSource"
import * as wds from "wikidata-sdk"
export class WikidataResponse {
public readonly id: string
public readonly labels: Map<string, string>
public readonly descriptions: Map<string, string>
public readonly claims: Map<string, Set<string>>
public readonly wikisites: Map<string, string>
public readonly commons: string
constructor(
id: string,
labels: Map<string, string>,
descriptions: Map<string, string>,
claims: Map<string, Set<string>>,
wikisites: Map<string, string>,
commons: string
) {
this.id = id
this.labels = labels
this.descriptions = descriptions
this.claims = claims
this.wikisites = wikisites
this.commons = commons
}
public static fromJson(entity: any): WikidataResponse {
const labels = new Map<string, string>()
for (const labelName in entity.labels) {
// The labelname is the language code
labels.set(labelName, entity.labels[labelName].value)
}
const descr = new Map<string, string>()
for (const labelName in entity.descriptions) {
// The labelname is the language code
descr.set(labelName, entity.descriptions[labelName].value)
}
const sitelinks = new Map<string, string>()
for (const labelName in entity.sitelinks) {
// labelName is `${language}wiki`
const language = labelName.substring(0, labelName.length - 4)
const title = entity.sitelinks[labelName].title
sitelinks.set(language, title)
}
const commons = sitelinks.get("commons")
sitelinks.delete("commons")
const claims = WikidataResponse.extractClaims(entity.claims)
return new WikidataResponse(entity.id, labels, descr, claims, sitelinks, commons)
}
static extractClaims(claimsJson: any): Map<string, Set<string>> {
const simplified = wds.simplify.claims(claimsJson, {
timeConverter: "simple-day",
})
const claims = new Map<string, Set<string>>()
for (const claimId in simplified) {
const claimsList: any[] = simplified[claimId]
claims.set(claimId, new Set(claimsList))
}
return claims
}
}
export class WikidataLexeme {
id: string
lemma: Map<string, string>
senses: Map<string, string>
claims: Map<string, Set<string>>
constructor(json) {
this.id = json.id
this.claims = WikidataResponse.extractClaims(json.claims)
this.lemma = new Map<string, string>()
for (const language in json.lemmas) {
this.lemma.set(language, json.lemmas[language].value)
}
this.senses = new Map<string, string>()
for (const sense of json.senses) {
const glosses = sense.glosses
for (const language in glosses) {
let previousSenses = this.senses.get(language)
if (previousSenses === undefined) {
previousSenses = ""
} else {
previousSenses = previousSenses + "; "
}
this.senses.set(language, previousSenses + glosses[language].value ?? "")
}
}
}
asWikidataResponse() {
return new WikidataResponse(
this.id,
this.lemma,
this.senses,
this.claims,
new Map(),
undefined
)
}
}
export interface WikidataSearchoptions {
lang?: "en" | string
maxCount?: 20 | number
}
export interface WikidataAdvancedSearchoptions extends WikidataSearchoptions {
instanceOf?: number[]
notInstanceOf?: number[]
}
/**
* Utility functions around wikidata
*/
export default class Wikidata {
private static readonly _identifierPrefixes = ["Q", "L"].map((str) => str.toLowerCase())
private static readonly _prefixesToRemove = [
"https://www.wikidata.org/wiki/Lexeme:",
"https://www.wikidata.org/wiki/",
"http://www.wikidata.org/entity/",
"Lexeme:",
].map((str) => str.toLowerCase())
private static readonly _storeCache = new Map<
string,
Store<{ success: WikidataResponse } | { error: any }>
>()
/**
* Same as LoadWikidataEntry, but wrapped into a UIEventSource
* @param value
* @constructor
*/
public static LoadWikidataEntry(
value: string | number
): Store<{ success: WikidataResponse } | { error: any }> {
const key = this.ExtractKey(value)
const cached = Wikidata._storeCache.get(key)
if (cached) {
return cached
}
const src = UIEventSource.FromPromiseWithErr(Wikidata.LoadWikidataEntryAsync(key))
Wikidata._storeCache.set(key, src)
return src
}
/**
* Given a search text, searches for the relevant wikidata entries, excluding pages "outside of the main tree", e.g. disambiguation pages.
* Optionally, an 'instance of' can be given to limit the scope, e.g. instanceOf:5 (humans) will only search for humans
*/
public static async searchAdvanced(
text: string,
options: WikidataAdvancedSearchoptions
): Promise<
{
id: string
relevance?: number
label: string
description?: string
}[]
> {
let instanceOf = ""
if (options?.instanceOf !== undefined && options.instanceOf.length > 0) {
const phrases = options.instanceOf.map((q) => `{ ?item wdt:P31/wdt:P279* wd:Q${q}. }`)
instanceOf = "{" + phrases.join(" UNION ") + "}"
}
const forbidden = (options?.notInstanceOf ?? []).concat([17379835]) // blacklist 'wikimedia pages outside of the main knowledge tree', e.g. disambiguation pages
const minusPhrases = forbidden.map((q) => `MINUS {?item wdt:P31/wdt:P279* wd:Q${q} .}`)
const sparql = `SELECT * WHERE {
SERVICE wikibase:mwapi {
bd:serviceParam wikibase:api "EntitySearch" .
bd:serviceParam wikibase:endpoint "www.wikidata.org" .
bd:serviceParam mwapi:search "${text.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}" .
bd:serviceParam mwapi:language "${options.lang}" .
?item wikibase:apiOutputItem mwapi:item .
?num wikibase:apiOrdinal true .
bd:serviceParam wikibase:limit ${
Math.round(
(options.maxCount ?? 20) * 1.5
) /*Some padding for disambiguation pages */
} .
?label wikibase:apiOutput mwapi:label .
?description wikibase:apiOutput "@description" .
}
${instanceOf}
${minusPhrases.join("\n ")}
} ORDER BY ASC(?num) LIMIT ${options.maxCount ?? 20}`
const url = wds.sparqlQuery(sparql)
const result = await Utils.downloadJson(url)
/*The full uri of the wikidata-item*/
return result.results.bindings.map(({ item, label, description, num }) => ({
relevance: num?.value,
id: item?.value,
label: label?.value,
description: description?.value,
}))
}
public static async search(
search: string,
options?: WikidataSearchoptions,
page = 1
): Promise<
{
id: string
label: string
description: string
}[]
> {
const maxCount = options?.maxCount ?? 20
let pageCount = Math.min(maxCount, 50)
const start = page * pageCount - pageCount
const lang = options?.lang ?? "en"
const url =
"https://www.wikidata.org/w/api.php?action=wbsearchentities&search=" +
search +
"&language=" +
lang +
"&limit=" +
pageCount +
"&continue=" +
start +
"&format=json&uselang=" +
lang +
"&type=item&origin=*" +
"&props=" // props= removes some unused values in the result
const response = await Utils.downloadJsonCached(url, 10000)
const result: any[] = response.search
if (result.length < pageCount) {
// No next page
return result
}
if (result.length < maxCount) {
const newOptions = { ...options }
newOptions.maxCount = maxCount - result.length
result.push(...(await Wikidata.search(search, newOptions, page + 1)))
}
return result
}
public static async searchAndFetch(
search: string,
options?: WikidataAdvancedSearchoptions
): Promise<WikidataResponse[]> {
// We provide some padding to filter away invalid values
const searchResults = await Wikidata.searchAdvanced(search, options)
const maybeResponses = await Promise.all(
searchResults.map(async (r) => {
try {
console.log("Loading ", r.id)
return await Wikidata.LoadWikidataEntry(r.id).AsPromise()
} catch (e) {
console.error(e)
return undefined
}
})
)
return Utils.NoNull(maybeResponses.map((r) => <WikidataResponse>r["success"]))
}
/**
* Gets the 'key' segment from a URL
*
* Wikidata.ExtractKey("https://www.wikidata.org/wiki/Lexeme:L614072") // => "L614072"
* Wikidata.ExtractKey("http://www.wikidata.org/entity/Q55008046") // => "Q55008046"
* Wikidata.ExtractKey("Q55008046") // => "Q55008046"
* Wikidata.ExtractKey("A55008046") // => undefined
* Wikidata.ExtractKey("Q55008046X") // => undefined
*/
public static ExtractKey(value: string | number): string {
if (typeof value === "number") {
return "Q" + value
}
if (value === undefined) {
console.error("ExtractKey: value is undefined")
return undefined
}
value = value.trim().toLowerCase()
for (const prefix of Wikidata._prefixesToRemove) {
if (value.startsWith(prefix)) {
value = value.substring(prefix.length)
}
}
if (value.startsWith("http") && value === "") {
// Probably some random link in the image field - we skip it
return undefined
}
for (const identifierPrefix of Wikidata._identifierPrefixes) {
if (value.startsWith(identifierPrefix)) {
const trimmed = value.substring(identifierPrefix.length)
if (trimmed === "") {
return undefined
}
const n = Number(trimmed)
if (isNaN(n)) {
return undefined
}
return value.toUpperCase()
}
}
if (value !== "" && !isNaN(Number(value))) {
return "Q" + value
}
return undefined
}
/**
* Converts 'Q123' into 123, returns undefined if invalid
*
* Wikidata.QIdToNumber("Q123") // => 123
* Wikidata.QIdToNumber(" Q123 ") // => 123
* Wikidata.QIdToNumber(" X123 ") // => undefined
* Wikidata.QIdToNumber(" Q123X ") // => undefined
* Wikidata.QIdToNumber(undefined) // => undefined
* Wikidata.QIdToNumber(123) // => 123
*/
public static QIdToNumber(q: string | number): number | undefined {
if (q === undefined || q === null) {
return
}
if (typeof q === "number") {
return q
}
q = q.trim()
if (!q.startsWith("Q")) {
return
}
q = q.substr(1)
const n = Number(q)
if (isNaN(n)) {
return
}
return n
}
public static IdToArticle(id: string) {
if (id.startsWith("Q")) {
return "https://wikidata.org/wiki/" + id
}
if (id.startsWith("L")) {
return "https://wikidata.org/wiki/Lexeme:" + id
}
throw "Unknown id type: " + id
}
/**
* Build a SPARQL-query, return the result
*
* @param keys: how variables are named. Every key not ending with 'Label' should appear in at least one statement
* @param statements
* @constructor
*/
public static async Sparql<T>(
keys: string[],
statements: string[]
): Promise<(T & Record<string, { type: string; value: string }>)[]> {
const query =
"SELECT " +
keys.map((k) => (k.startsWith("?") ? k : "?" + k)).join(" ") +
"\n" +
"WHERE\n" +
"{\n" +
statements.map((stmt) => (stmt.endsWith(".") ? stmt : stmt + ".")).join("\n") +
' SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". }\n' +
"}"
const url = wds.sparqlQuery(query)
const result = await Utils.downloadJsonCached(url, 24 * 60 * 60 * 1000)
return result.results.bindings
}
private static _cache = new Map<string, Promise<WikidataResponse>>()
public static async LoadWikidataEntryAsync(value: string | number): Promise<WikidataResponse> {
const key = "" + value
const cached = Wikidata._cache.get(key)
if (cached) {
return cached
}
const uncached = Wikidata.LoadWikidataEntryUncachedAsync(value)
Wikidata._cache.set(key, uncached)
return uncached
}
/**
* Loads a wikidata page
* @returns the entity of the given value
*/
private static async LoadWikidataEntryUncachedAsync(
value: string | number
): Promise<WikidataResponse> {
const id = Wikidata.ExtractKey(value)
if (id === undefined) {
console.warn("Could not extract a wikidata entry from", value)
return undefined
}
const url = "https://www.wikidata.org/wiki/Special:EntityData/" + id + ".json"
const entities = (await Utils.downloadJsonCached(url, 10000)).entities
const firstKey = <string>Array.from(Object.keys(entities))[0] // Roundabout way to fetch the entity; it might have been a redirect
const response = entities[firstKey]
if (id.startsWith("L")) {
// This is a lexeme:
return new WikidataLexeme(response).asWikidataResponse()
}
return WikidataResponse.fromJson(response)
}
}

View file

@ -0,0 +1,55 @@
import { Utils } from "../../Utils"
export default class Wikimedia {
/**
* Recursively walks a wikimedia commons category in order to search for entries, which can be File: or Category: entries
* Returns (a promise of) a list of URLS
* @param categoryName The name of the wikimedia category
* @param maxLoad: the maximum amount of images to return
* @param continueParameter: if the page indicates that more pages should be loaded, this uses a token to continue. Provided by wikimedia
*/
public static async GetCategoryContents(
categoryName: string,
maxLoad = 10,
continueParameter: string = undefined
): Promise<string[]> {
if (categoryName === undefined || categoryName === null || categoryName === "") {
return []
}
if (!categoryName.startsWith("Category:")) {
categoryName = "Category:" + categoryName
}
let url =
"https://commons.wikimedia.org/w/api.php?" +
"action=query&list=categorymembers&format=json&" +
"&origin=*" +
"&cmtitle=" +
encodeURIComponent(categoryName)
if (continueParameter !== undefined) {
url = `${url}&cmcontinue=${continueParameter}`
}
const response = await Utils.downloadJson(url)
const members = response.query?.categorymembers ?? []
const imageOverview: string[] = members.map((member) => member.title)
if (response.continue === undefined) {
// We are done crawling through the category - no continuation in sight
return imageOverview
}
if (maxLoad - imageOverview.length <= 0) {
console.debug(`Recursive wikimedia category load stopped for ${categoryName}`)
return imageOverview
}
// We do have a continue token - let's load the next page
const recursive = await Wikimedia.GetCategoryContents(
categoryName,
maxLoad - imageOverview.length,
response.continue.cmcontinue
)
imageOverview.push(...recursive)
return imageOverview
}
}

298
src/Logic/Web/Wikipedia.ts Normal file
View file

@ -0,0 +1,298 @@
import { Utils } from "../../Utils"
import Wikidata, { WikidataResponse } from "./Wikidata"
import { Store, UIEventSource } from "../UIEventSource"
export interface FullWikipediaDetails {
articleUrl?: string
language?: string
pagename?: string
fullArticle?: string
firstParagraph?: string
restOfArticle?: string
wikidata?: WikidataResponse
title?: string
}
export default class Wikipedia {
/**
* When getting a wikipedia page data result, some elements (e.g. navigation, infoboxes, ...) should be removed if 'removeInfoBoxes' is set.
* We do this based on the classes. This set contains a blacklist of the classes to remove
* @private
*/
private static readonly classesToRemove = [
"shortdescription",
"sidebar",
"infobox",
"infobox_v2",
"noprint",
"ambox",
"mw-editsection",
"mw-selflink",
"mw-empty-elt",
"hatnote", // Often redirects
]
private static readonly idsToRemove = ["sjabloon_zie"]
private static readonly _cache = new Map<string, Promise<string>>()
private static _fullDetailsCache = new Map<string, Store<FullWikipediaDetails>>()
public readonly backend: string
constructor(options?: { language?: "en" | string } | { backend?: string }) {
this.backend = Wikipedia.getBackendUrl(options ?? {})
}
/**
* Tries to extract the language and article name from the given string
*
* Wikipedia.extractLanguageAndName("qsdf") // => undefined
* Wikipedia.extractLanguageAndName("nl:Warandeputten") // => {language: "nl", pageName: "Warandeputten"}
*/
public static extractLanguageAndName(input: string): { language: string; pageName: string } {
const matched = input.match("([^:]+):(.*)")
if (matched === undefined || matched === null) {
return undefined
}
const [_, language, pageName] = matched
return {
language,
pageName,
}
}
/**
* Fetch all useful information for the given entity.
*
*/
public static fetchArticleAndWikidata(
wikidataOrPageId: string,
preferedLanguage: string
): Store<FullWikipediaDetails> {
const cachekey = preferedLanguage + wikidataOrPageId
const cached = Wikipedia._fullDetailsCache.get(cachekey)
if (cached) {
return cached
}
console.log("Constructing store for", cachekey)
const store = new UIEventSource<FullWikipediaDetails>({}, cachekey)
Wikipedia._fullDetailsCache.set(cachekey, store)
// Are we dealing with a wikidata item?
const wikidataId = Wikidata.ExtractKey(wikidataOrPageId)
if (!wikidataId) {
// We are dealing with a wikipedia identifier, e.g. 'NL:articlename', 'https://nl.wikipedia.org/wiki/article', ...
const { language, pageName } = Wikipedia.extractLanguageAndName(wikidataOrPageId)
store.data.articleUrl = new Wikipedia({ language }).getPageUrl(pageName)
store.data.language = language
store.data.pagename = pageName
store.data.title = pageName
} else {
// Jup, this is a wikidata item
// Lets fetch the wikidata
store.data.title = wikidataId
Wikidata.LoadWikidataEntryAsync(wikidataId).then((wikidata) => {
store.data.wikidata = wikidata
store.ping()
// With the wikidata, we can search for the appropriate wikipedia page
const preferredLanguage = [
preferedLanguage,
"en",
Array.from(wikidata.wikisites.keys())[0],
]
for (const language of preferredLanguage) {
const pagetitle = wikidata.wikisites.get(language)
if (pagetitle) {
store.data.articleUrl = new Wikipedia({ language }).getPageUrl(pagetitle)
store.data.pagename = pagetitle
store.data.language = language
store.data.title = pagetitle
store.ping()
break
}
}
})
}
// Now that the pageURL has been setup, we can focus on downloading the actual article
// We setup a listener. As soon as the article-URL is know, we'll fetch the actual page
// This url can either be set by the Wikidata-response or directly if we are dealing with a wikipedia-url
store.addCallbackAndRun((data) => {
if (data.language === undefined || data.pagename === undefined) {
return
}
const wikipedia = new Wikipedia({ language: data.language })
wikipedia.GetArticleHtml(data.pagename).then((article) => {
data.fullArticle = article
const content = document.createElement("div")
content.innerHTML = article
const firstParagraph = content.getElementsByTagName("p").item(0)
data.firstParagraph = firstParagraph.innerHTML
content.removeChild(firstParagraph)
data.restOfArticle = content.innerHTML
store.ping()
})
return true // unregister
})
return store
}
private static getBackendUrl(
options: { language?: "en" | string } | { backend?: "en.wikipedia.org" | string }
): string {
let backend = "en.wikipedia.org"
if (options["backend"]) {
backend = options["backend"]
} else if (options["language"]) {
backend = `${options["language"] ?? "en"}.wikipedia.org`
}
if (!backend.startsWith("http")) {
backend = "https://" + backend
}
return backend
}
/**
* Extracts the actual pagename; returns undefined if this came from a different wikimedia entry
*
* new Wikipedia({backend: "https://wiki.openstreetmap.org"}).extractPageName("https://wiki.openstreetmap.org/wiki/NL:Speelbos") // => "NL:Speelbos"
* new Wikipedia().extractPageName("https://wiki.openstreetmap.org/wiki/NL:Speelbos") // => undefined
*/
public extractPageName(input: string): string | undefined {
if (!input.startsWith(this.backend)) {
return undefined
}
input = input.substring(this.backend.length)
const matched = input.match("/?wiki/(.+)")
if (matched === undefined || matched === null) {
return undefined
}
const [_, pageName] = matched
return pageName
}
public getDataUrl(pageName: string): string {
return (
`${this.backend}/w/api.php?action=parse&format=json&origin=*&prop=text&page=` + pageName
)
}
public getPageUrl(pageName: string): string {
return `${this.backend}/wiki/${pageName}`
}
/**
* Textual search of the specified wiki-instance. If searching Wikipedia, we recommend using wikidata.search instead
* @param searchTerm
*/
public async search(searchTerm: string): Promise<{ title: string; snippet: string }[]> {
const url =
this.backend +
"/w/api.php?action=query&format=json&list=search&srsearch=" +
encodeURIComponent(searchTerm)
return (await Utils.downloadJson(url))["query"]["search"]
}
/**
* Searches via 'index.php' and scrapes the result.
* This gives better results then via the API
* @param searchTerm
*/
public async searchViaIndex(
searchTerm: string
): Promise<{ title: string; snippet: string; url: string }[]> {
const url = `${this.backend}/w/index.php?search=${encodeURIComponent(searchTerm)}&ns0=1`
const result = await Utils.downloadAdvanced(url)
if (result["redirect"]) {
const targetUrl = result["redirect"]
// This is an exact match
return [
{
title: this.extractPageName(targetUrl)?.trim(),
url: targetUrl,
snippet: "",
},
]
}
if (result["error"]) {
throw "Could not download: " + JSON.stringify(result)
}
const el = document.createElement("html")
el.innerHTML = result["content"].replace(/href="\//g, 'href="' + this.backend + "/")
const searchResults = el.getElementsByClassName("mw-search-results")
const individualResults = Array.from(
searchResults[0]?.getElementsByClassName("mw-search-result") ?? []
)
return individualResults.map((result) => {
const toRemove = Array.from(result.getElementsByClassName("searchalttitle"))
for (const toRm of toRemove) {
toRm.parentElement.removeChild(toRm)
}
return {
title: result
.getElementsByClassName("mw-search-result-heading")[0]
.textContent.trim(),
url: result.getElementsByTagName("a")[0].href,
snippet: result.getElementsByClassName("searchresult")[0].textContent,
}
})
}
/**
* Returns the innerHTML for the given article as string.
* Some cleanup is applied to this.
*
* This method uses a static, local cache, so each article will be retrieved only once via the network
*/
public GetArticleHtml(pageName: string): Promise<string> {
const cacheKey = this.backend + "/" + pageName
if (Wikipedia._cache.has(cacheKey)) {
return Wikipedia._cache.get(cacheKey)
}
const promise = this.GetArticleUncachedAsync(pageName)
Wikipedia._cache.set(cacheKey, promise)
return promise
}
private async GetArticleUncachedAsync(pageName: string): Promise<string> {
const response = await Utils.downloadJson(this.getDataUrl(pageName))
if (response?.parse?.text === undefined) {
return undefined
}
const html = response["parse"]["text"]["*"]
if (html === undefined) {
return undefined
}
const div = document.createElement("div")
div.innerHTML = html
const content = Array.from(div.children)[0]
for (const forbiddenClass of Wikipedia.classesToRemove) {
const toRemove = content.getElementsByClassName(forbiddenClass)
for (const toRemoveElement of Array.from(toRemove)) {
toRemoveElement.parentElement?.removeChild(toRemoveElement)
}
}
for (const forbiddenId of Wikipedia.idsToRemove) {
const toRemove = content.querySelector("#" + forbiddenId)
toRemove?.parentElement?.removeChild(toRemove)
}
const links = Array.from(content.getElementsByTagName("a"))
// Rewrite relative links to absolute links + open them in a new tab
links
.filter((link) => link.getAttribute("href")?.startsWith("/") ?? false)
.forEach((link) => {
link.target = "_blank"
// note: link.getAttribute("href") gets the textual value, link.href is the rewritten version which'll contain the host for relative paths
link.href = `${this.backend}${link.getAttribute("href")}`
})
return content.innerHTML
}
}