forked from MapComplete/MapComplete
Cleanup of wikipedia and download functions
This commit is contained in:
parent
9bedf8e681
commit
48953cf266
4 changed files with 167 additions and 84 deletions
|
@ -3,6 +3,7 @@
|
|||
*/
|
||||
import {Utils} from "../../Utils";
|
||||
import {UIEventSource} from "../UIEventSource";
|
||||
import {WikipediaBoxOptions} from "../../UI/Wikipedia/WikipediaBox";
|
||||
|
||||
export default class Wikipedia {
|
||||
|
||||
|
@ -29,55 +30,133 @@ export default class Wikipedia {
|
|||
|
||||
private static readonly _cache = new Map<string, UIEventSource<{ success: string } | { error: any }>>()
|
||||
|
||||
public static GetArticle(options: {
|
||||
pageName: string,
|
||||
language?: "en" | string,
|
||||
firstParagraphOnly?: false | boolean
|
||||
}): UIEventSource<{ success: string } | { error: any }> {
|
||||
const key = (options.language ?? "en") + ":" + options.pageName + ":" + (options.firstParagraphOnly ?? false)
|
||||
const cached = Wikipedia._cache.get(key)
|
||||
if (cached !== undefined) {
|
||||
return cached
|
||||
}
|
||||
const v = UIEventSource.FromPromiseWithErr(Wikipedia.GetArticleAsync(options))
|
||||
Wikipedia._cache.set(key, v)
|
||||
return v;
|
||||
}
|
||||
|
||||
public static getDataUrl(options: {language?: "en" | string, pageName: string}): string{
|
||||
return `https://${options.language ?? "en"}.wikipedia.org/w/api.php?action=parse&format=json&origin=*&prop=text&page=` + options.pageName
|
||||
}
|
||||
private readonly _backend: string;
|
||||
|
||||
public static getPageUrl(options: {language?: "en" | string, pageName: string}): string{
|
||||
return `https://${options.language ?? "en"}.wikipedia.org/wiki/` + options.pageName
|
||||
constructor(options?: ({ language?: "en" | string } | { backend?: string })) {
|
||||
this._backend = Wikipedia.getBackendUrl(options ?? {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to extract the language and article name from the given string
|
||||
*
|
||||
*
|
||||
* Wikipedia.extractLanguageAndName("qsdf") // => undefined
|
||||
* Wikipedia.extractLanguageAndName("nl:Warandeputten") // => {language: "nl", pageName: "Warandeputten"}
|
||||
*/
|
||||
public static extractLanguageAndName(input: string):{language: string, pageName: string} {
|
||||
public static extractLanguageAndName(input: string): { language: string, pageName: string } {
|
||||
const matched = input.match("([^:]+):(.*)")
|
||||
if(matched === undefined || matched === null){
|
||||
if (matched === undefined || matched === null) {
|
||||
return undefined
|
||||
}
|
||||
const [_ , language, pageName] = matched
|
||||
const [_, language, pageName] = matched
|
||||
return {
|
||||
language, pageName
|
||||
}
|
||||
}
|
||||
|
||||
public static async GetArticleAsync(options: {
|
||||
pageName: string,
|
||||
language?: "en" | string,
|
||||
firstParagraphOnly?: false | boolean
|
||||
}): Promise<string> {
|
||||
|
||||
const response = await Utils.downloadJson(Wikipedia.getDataUrl(options))
|
||||
/**
|
||||
* Extracts the actual pagename; returns undefined if this came from a different wikimedia entry
|
||||
*
|
||||
* new Wikipedia({backend: "https://wiki.openstreetmap.org"}).extractPageName("https://wiki.openstreetmap.org/wiki/NL:Speelbos") // => "NL:Speelbos"
|
||||
* new Wikipedia().extractPageName("https://wiki.openstreetmap.org/wiki/NL:Speelbos") // => undefined
|
||||
*/
|
||||
public extractPageName(input: string):string | undefined{
|
||||
if(!input.startsWith(this._backend)){
|
||||
return undefined
|
||||
}
|
||||
input = input.substring(this._backend.length);
|
||||
|
||||
const matched = input.match("/?wiki/\(.+\)")
|
||||
if (matched === undefined || matched === null) {
|
||||
return undefined
|
||||
}
|
||||
const [_, pageName] = matched
|
||||
return pageName
|
||||
}
|
||||
|
||||
private static getBackendUrl(options: { language?: "en" | string } | { backend?: "en.wikipedia.org" | string }): string {
|
||||
let backend = "en.wikipedia.org"
|
||||
if (options["backend"]) {
|
||||
backend = options["backend"]
|
||||
} else if (options["language"]) {
|
||||
backend = `${options["language"] ?? "en"}.wikipedia.org`
|
||||
}
|
||||
if (!backend.startsWith("http")) {
|
||||
backend = "https://" + backend
|
||||
}
|
||||
return backend
|
||||
}
|
||||
|
||||
public GetArticle(pageName: string, options: WikipediaBoxOptions): UIEventSource<{ success: string } | { error: any }> {
|
||||
const key = this._backend + ":" + pageName + ":" + (options.firstParagraphOnly ?? false)
|
||||
const cached = Wikipedia._cache.get(key)
|
||||
if (cached !== undefined) {
|
||||
return cached
|
||||
}
|
||||
const v = UIEventSource.FromPromiseWithErr(this.GetArticleAsync(pageName, options))
|
||||
Wikipedia._cache.set(key, v)
|
||||
return v;
|
||||
}
|
||||
|
||||
public getDataUrl(pageName: string): string {
|
||||
return `${this._backend}/w/api.php?action=parse&format=json&origin=*&prop=text&page=` + pageName
|
||||
}
|
||||
|
||||
public getPageUrl(pageName: string): string {
|
||||
return `${this._backend}/wiki/${pageName}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Textual search of the specified wiki-instance. If searching Wikipedia, we recommend using wikidata.search instead
|
||||
* @param searchTerm
|
||||
*/
|
||||
public async search(searchTerm: string): Promise<{ title: string, snippet: string }[]> {
|
||||
const url = this._backend + "/w/api.php?action=query&format=json&list=search&srsearch=" + encodeURIComponent(searchTerm);
|
||||
return (await Utils.downloadJson(url))["query"]["search"];
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches via 'index.php' and scrapes the result.
|
||||
* This gives better results then via the API
|
||||
* @param searchTerm
|
||||
*/
|
||||
public async searchViaIndex(searchTerm: string): Promise<{ title: string, snippet: string, url: string } []> {
|
||||
const url = `${this._backend}/w/index.php?search=${encodeURIComponent(searchTerm)}`
|
||||
const result = await Utils.downloadAdvanced(url);
|
||||
if(result["redirect"] ){
|
||||
// This is an exact match
|
||||
return [{
|
||||
title: this.extractPageName(result["redirect"]),
|
||||
url: result["redirect"],
|
||||
snippet: ""
|
||||
}]
|
||||
}
|
||||
const el = document.createElement('html');
|
||||
el.innerHTML = result["content"].replace(/href="\//g, "href=\""+this._backend+"/");
|
||||
const searchResults = el.getElementsByClassName("mw-search-results")
|
||||
const individualResults = Array.from(searchResults[0]?.getElementsByClassName("mw-search-result") ?? [])
|
||||
return individualResults.map(result => {
|
||||
return {
|
||||
title: result.getElementsByClassName("mw-search-result-heading")[0].textContent,
|
||||
url: result.getElementsByTagName("a")[0].href,
|
||||
snippet: result.getElementsByClassName("searchresult")[0].textContent
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
public async GetArticleAsync(pageName: string, options:
|
||||
{
|
||||
firstParagraphOnly?: false | boolean
|
||||
}): Promise<string | undefined> {
|
||||
|
||||
const response = await Utils.downloadJson(this.getDataUrl(pageName))
|
||||
if (response?.parse?.text === undefined) {
|
||||
return undefined
|
||||
}
|
||||
const html = response["parse"]["text"]["*"];
|
||||
|
||||
if (html === undefined) {
|
||||
return undefined
|
||||
}
|
||||
const div = document.createElement("div")
|
||||
div.innerHTML = html
|
||||
const content = Array.from(div.children)[0]
|
||||
|
@ -98,11 +177,10 @@ export default class Wikipedia {
|
|||
const links = Array.from(content.getElementsByTagName("a"))
|
||||
|
||||
// Rewrite relative links to absolute links + open them in a new tab
|
||||
const language = options.language ?? "en"
|
||||
links.filter(link => link.getAttribute("href")?.startsWith("/") ?? false).forEach(link => {
|
||||
link.target = '_blank'
|
||||
// note: link.getAttribute("href") gets the textual value, link.href is the rewritten version which'll contain the host for relative paths
|
||||
link.href = `https://${language}.wikipedia.org${link.getAttribute("href")}`;
|
||||
link.href = `${this._backend}${link.getAttribute("href")}`;
|
||||
})
|
||||
|
||||
if (options?.firstParagraphOnly) {
|
||||
|
|
|
@ -16,18 +16,17 @@ import Link from "../Base/Link";
|
|||
import WikidataPreviewBox from "./WikidataPreviewBox";
|
||||
import {Paragraph} from "../Base/Paragraph";
|
||||
|
||||
export interface WikipediaBoxOptions {
|
||||
addHeader: boolean,
|
||||
firstParagraphOnly: boolean
|
||||
}
|
||||
|
||||
export default class WikipediaBox extends Combine {
|
||||
|
||||
public static configuration = {
|
||||
onlyFirstParagaph: false,
|
||||
addHeader: false
|
||||
}
|
||||
|
||||
constructor(wikidataIds: string[]) {
|
||||
|
||||
constructor(wikidataIds: string[], options?: WikipediaBoxOptions) {
|
||||
const mainContents = []
|
||||
|
||||
const pages = wikidataIds.map(entry => WikipediaBox.createLinkedContent(entry.trim()))
|
||||
options = options??{addHeader: false, firstParagraphOnly: true};
|
||||
const pages = wikidataIds.map(entry => WikipediaBox.createLinkedContent(entry.trim(), options))
|
||||
if (wikidataIds.length == 1) {
|
||||
const page = pages[0]
|
||||
mainContents.push(
|
||||
|
@ -68,31 +67,29 @@ export default class WikipediaBox extends Combine {
|
|||
|
||||
|
||||
super(mainContents)
|
||||
|
||||
|
||||
|
||||
this.SetClass("block rounded-xl subtle-background m-1 p-2 flex flex-col")
|
||||
.SetStyle("max-height: inherit")
|
||||
}
|
||||
|
||||
private static createLinkedContent(entry: string): {
|
||||
private static createLinkedContent(entry: string, options: WikipediaBoxOptions): {
|
||||
titleElement: BaseUIElement,
|
||||
contents: BaseUIElement,
|
||||
linkElement: BaseUIElement
|
||||
} {
|
||||
if (entry.match("[qQ][0-9]+")) {
|
||||
return WikipediaBox.createWikidatabox(entry)
|
||||
return WikipediaBox.createWikidatabox(entry, options)
|
||||
} else {
|
||||
console.log("Creating wikipedia box for ", entry)
|
||||
return WikipediaBox.createWikipediabox(entry)
|
||||
return WikipediaBox.createWikipediabox(entry, options)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a '<language>:<article-name>'-string, constructs the wikipedia article
|
||||
* @param wikipediaArticle
|
||||
* @private
|
||||
*/
|
||||
private static createWikipediabox(wikipediaArticle: string): {
|
||||
private static createWikipediabox(wikipediaArticle: string, options: WikipediaBoxOptions): {
|
||||
titleElement: BaseUIElement,
|
||||
contents: BaseUIElement,
|
||||
linkElement: BaseUIElement
|
||||
|
@ -107,12 +104,13 @@ export default class WikipediaBox extends Combine {
|
|||
linkElement: undefined
|
||||
}
|
||||
}
|
||||
const url = Wikipedia.getPageUrl(article) // `https://${language}.wikipedia.org/wiki/${pagetitle}`
|
||||
const wikipedia = new Wikipedia({language: article.language})
|
||||
const url = wikipedia.getPageUrl(article.pageName)
|
||||
const linkElement = new Link(Svg.pop_out_svg().SetStyle("width: 1.2rem").SetClass("block "), url, true) .SetClass("flex items-center enable-links")
|
||||
|
||||
return {
|
||||
titleElement: new Title(article.pageName, 3),
|
||||
contents: WikipediaBox.createContents(article.pageName, article.language),
|
||||
contents: WikipediaBox.createContents(article.pageName, wikipedia, options),
|
||||
linkElement
|
||||
}
|
||||
}
|
||||
|
@ -120,7 +118,7 @@ export default class WikipediaBox extends Combine {
|
|||
/**
|
||||
* Given a `Q1234`, constructs a wikipedia box or wikidata box
|
||||
*/
|
||||
private static createWikidatabox(wikidataId: string): {
|
||||
private static createWikidatabox(wikidataId: string, options: WikipediaBoxOptions): {
|
||||
titleElement: BaseUIElement,
|
||||
contents: BaseUIElement,
|
||||
linkElement: BaseUIElement
|
||||
|
@ -176,8 +174,9 @@ export default class WikipediaBox extends Combine {
|
|||
}
|
||||
|
||||
const [pagetitle, language, wd] = <[string, string, WikidataResponse]>status
|
||||
const wikipedia = new Wikipedia({language})
|
||||
const quickFacts = WikidataPreviewBox.QuickFacts(wd);
|
||||
return WikipediaBox.createContents(pagetitle, language, quickFacts)
|
||||
return WikipediaBox.createContents(pagetitle, wikipedia, {topBar: quickFacts, ...options})
|
||||
|
||||
})
|
||||
)
|
||||
|
@ -223,13 +222,9 @@ export default class WikipediaBox extends Combine {
|
|||
/**
|
||||
* Returns the actual content in a scrollable way
|
||||
*/
|
||||
private static createContents(pagename: string, language: string, topBar?: BaseUIElement): BaseUIElement {
|
||||
const wpOptions = {
|
||||
pageName: pagename,
|
||||
language: language,
|
||||
firstParagraphOnly: WikipediaBox.configuration.onlyFirstParagaph
|
||||
}
|
||||
const htmlContent = Wikipedia.GetArticle(wpOptions)
|
||||
private static createContents(pagename: string, wikipedia: Wikipedia, options:{
|
||||
topBar?: BaseUIElement} & WikipediaBoxOptions): BaseUIElement {
|
||||
const htmlContent = wikipedia.GetArticle(pagename, options)
|
||||
const wp = Translations.t.general.wikipedia
|
||||
const contents: UIEventSource<string | BaseUIElement> = htmlContent.map(htmlContent => {
|
||||
if (htmlContent === undefined) {
|
||||
|
@ -238,11 +233,11 @@ export default class WikipediaBox extends Combine {
|
|||
}
|
||||
if (htmlContent["success"] !== undefined) {
|
||||
let content: BaseUIElement = new FixedUiElement(htmlContent["success"]);
|
||||
if (WikipediaBox.configuration.addHeader) {
|
||||
if (options?.addHeader) {
|
||||
content = new Combine(
|
||||
[
|
||||
new Paragraph(
|
||||
new Link(wp.fromWikipedia, Wikipedia.getPageUrl(wpOptions), true),
|
||||
new Link(wp.fromWikipedia, wikipedia.getPageUrl(pagename), true),
|
||||
),
|
||||
new Paragraph(
|
||||
content
|
||||
|
@ -261,7 +256,7 @@ export default class WikipediaBox extends Combine {
|
|||
})
|
||||
|
||||
return new Combine([
|
||||
topBar?.SetClass("border-2 border-grey rounded-lg m-1 mb-0"),
|
||||
options?.topBar?.SetClass("border-2 border-grey rounded-lg m-1 mb-0"),
|
||||
new VariableUiElement(contents)
|
||||
.SetClass("block pl-6 pt-2")])
|
||||
}
|
||||
|
|
34
Utils.ts
34
Utils.ts
|
@ -9,7 +9,7 @@ export class Utils {
|
|||
*/
|
||||
public static runningFromConsole = typeof window === "undefined";
|
||||
public static readonly assets_path = "./assets/svg/";
|
||||
public static externalDownloadFunction: (url: string, headers?: any) => Promise<any>;
|
||||
public static externalDownloadFunction: (url: string, headers?: any) => Promise<{ content: string } | { redirect: string }>;
|
||||
public static Special_visualizations_tagsToApplyHelpText = `These can either be a tag to add, such as \`amenity=fast_food\` or can use a substitution, e.g. \`addr:housenumber=$number\`.
|
||||
This new point will then have the tags \`amenity=fast_food\` and \`addr:housenumber\` with the value that was saved in \`number\` in the original feature.
|
||||
|
||||
|
@ -517,17 +517,17 @@ In the case that MapComplete is pointed to the testing grounds, the edit will be
|
|||
/**
|
||||
* Apply a function on every leaf of the JSON; used to rewrite parts of the JSON.
|
||||
* Returns a modified copy of the original object.
|
||||
*
|
||||
*
|
||||
* 'null' and 'undefined' are _always_ considered a leaf, even if 'isLeaf' says it isn't
|
||||
*
|
||||
*
|
||||
* Hangs if the object contains a loop
|
||||
*
|
||||
*
|
||||
* // should walk a json
|
||||
* const walked = Utils.WalkJson({
|
||||
* key: "value"
|
||||
* }, (x: string) => x + "!")
|
||||
* walked // => {key: "value!"}
|
||||
*
|
||||
*
|
||||
* // should preserve undefined and null:
|
||||
* const walked = Utils.WalkJson({
|
||||
* u: undefined,
|
||||
|
@ -535,7 +535,7 @@ In the case that MapComplete is pointed to the testing grounds, the edit will be
|
|||
* v: "value"
|
||||
* }, (x) => {if(x !== undefined && x !== null){return x+"!}; return x})
|
||||
* walked // => {v: "value!", u: undefined, n: null}
|
||||
*
|
||||
*
|
||||
* // should preserve undefined and null, also with a negative isLeaf:
|
||||
* const walked = Utils.WalkJson({
|
||||
* u: undefined,
|
||||
|
@ -561,8 +561,8 @@ In the case that MapComplete is pointed to the testing grounds, the edit will be
|
|||
return f(json, path)
|
||||
}
|
||||
if (Array.isArray(json)) {
|
||||
return json.map((sub,i) => {
|
||||
return Utils.WalkJson(sub, f, isLeaf, [...path,""+i]);
|
||||
return json.map((sub, i) => {
|
||||
return Utils.WalkJson(sub, f, isLeaf, [...path, "" + i]);
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -575,7 +575,7 @@ In the case that MapComplete is pointed to the testing grounds, the edit will be
|
|||
|
||||
/**
|
||||
* Walks an object recursively, will execute the 'collect'-callback on every leaf.
|
||||
*
|
||||
*
|
||||
* Will hang on objects with loops
|
||||
*/
|
||||
static WalkObject(json: any, collect: (v: number | string | boolean | undefined, path: string[]) => any, isLeaf: (object) => boolean = undefined, path = []): void {
|
||||
|
@ -664,7 +664,16 @@ In the case that MapComplete is pointed to the testing grounds, the edit will be
|
|||
Utils.injectedDownloads[url] = data
|
||||
}
|
||||
|
||||
public static download(url: string, headers?: any): Promise<string> {
|
||||
public static async download(url: string, headers?: any): Promise<string | undefined> {
|
||||
return (await Utils.downloadAdvanced(url, headers))["content"]
|
||||
}
|
||||
|
||||
/**
|
||||
* Download function which also indicates advanced options, such as redirects
|
||||
* @param url
|
||||
* @param headers
|
||||
*/
|
||||
public static downloadAdvanced(url: string, headers?: any): Promise<{ content: string } | { redirect: string }> {
|
||||
if (this.externalDownloadFunction !== undefined) {
|
||||
return this.externalDownloadFunction(url, headers)
|
||||
}
|
||||
|
@ -673,7 +682,9 @@ In the case that MapComplete is pointed to the testing grounds, the edit will be
|
|||
const xhr = new XMLHttpRequest();
|
||||
xhr.onload = () => {
|
||||
if (xhr.status == 200) {
|
||||
resolve(xhr.response)
|
||||
resolve({content: xhr.response})
|
||||
} else if (xhr.status === 302) {
|
||||
resolve({redirect: xhr.getResponseHeader("location")})
|
||||
} else if (xhr.status === 509 || xhr.status === 429) {
|
||||
reject("rate limited")
|
||||
} else {
|
||||
|
@ -682,7 +693,6 @@ In the case that MapComplete is pointed to the testing grounds, the edit will be
|
|||
};
|
||||
xhr.open('GET', url);
|
||||
if (headers !== undefined) {
|
||||
|
||||
for (const key in headers) {
|
||||
xhr.setRequestHeader(key, headers[key])
|
||||
}
|
||||
|
|
|
@ -5,10 +5,11 @@ import * as https from "https";
|
|||
import {LayoutConfigJson} from "../Models/ThemeConfig/Json/LayoutConfigJson";
|
||||
import {LayerConfigJson} from "../Models/ThemeConfig/Json/LayerConfigJson";
|
||||
import xml2js from 'xml2js';
|
||||
|
||||
export default class ScriptUtils {
|
||||
|
||||
public static fixUtils() {
|
||||
Utils.externalDownloadFunction = ScriptUtils.DownloadJSON
|
||||
Utils.externalDownloadFunction = ScriptUtils.Download
|
||||
}
|
||||
|
||||
|
||||
|
@ -44,8 +45,13 @@ export default class ScriptUtils {
|
|||
|
||||
})
|
||||
}
|
||||
|
||||
private static async DownloadJSON(url: string, headers?: any): Promise<any>{
|
||||
const data = await ScriptUtils.Download(url, headers);
|
||||
return JSON.parse(data.content)
|
||||
}
|
||||
|
||||
private static DownloadJSON(url, headers?: any): Promise<any> {
|
||||
private static Download(url, headers?: any): Promise<{content: string}> {
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
headers = headers ?? {}
|
||||
|
@ -67,13 +73,7 @@ export default class ScriptUtils {
|
|||
});
|
||||
|
||||
res.addListener('end', function () {
|
||||
const result = parts.join("")
|
||||
try {
|
||||
resolve(JSON.parse(result))
|
||||
} catch (e) {
|
||||
console.error("Could not parse the following as JSON:", result)
|
||||
reject(e)
|
||||
}
|
||||
resolve({content: parts.join("")})
|
||||
});
|
||||
})
|
||||
} catch (e) {
|
||||
|
|
Loading…
Reference in a new issue