From 7a76109d61f50c5e6e228626f91a189f27fe8ff9 Mon Sep 17 00:00:00 2001 From: pietervdvn Date: Sat, 30 Apr 2022 00:30:15 +0200 Subject: [PATCH] Improve wikipedia fetcher --- Logic/Web/Wikipedia.ts | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/Logic/Web/Wikipedia.ts b/Logic/Web/Wikipedia.ts index 3a548ee57f..66d848b2e6 100644 --- a/Logic/Web/Wikipedia.ts +++ b/Logic/Web/Wikipedia.ts @@ -31,9 +31,10 @@ export default class Wikipedia { public static GetArticle(options: { pageName: string, - language?: "en" | string + language?: "en" | string, + firstParagraphOnly?: false | boolean }): UIEventSource<{ success: string } | { error: any }> { - const key = (options.language ?? "en") + ":" + options.pageName + const key = (options.language ?? "en") + ":" + options.pageName + ":" + (options.firstParagraphOnly ?? false) const cached = Wikipedia._cache.get(key) if (cached !== undefined) { return cached @@ -43,14 +44,21 @@ export default class Wikipedia { return v; } + public static getDataUrl(options: {language?: "en" | string, pageName: string}): string{ + return `https://${options.language ?? "en"}.wikipedia.org/w/api.php?action=parse&format=json&origin=*&prop=text&page=` + options.pageName + } + + public static getPageUrl(options: {language?: "en" | string, pageName: string}): string{ + return `https://${options.language ?? "en"}.wikipedia.org/wiki/` + options.pageName + } + public static async GetArticleAsync(options: { pageName: string, - language?: "en" | string + language?: "en" | string, + firstParagraphOnly?: false | boolean }): Promise { - const language = options.language ?? "en" - const url = `https://${language}.wikipedia.org/w/api.php?action=parse&format=json&origin=*&prop=text&page=` + options.pageName - const response = await Utils.downloadJson(url) + const response = await Utils.downloadJson(Wikipedia.getDataUrl(options)) const html = response["parse"]["text"]["*"]; const div = document.createElement("div") @@ -73,12 +81,17 @@ export default class Wikipedia { const links = Array.from(content.getElementsByTagName("a")) // Rewrite relative links to absolute links + open them in a new tab + const language = options.language ?? "en" links.filter(link => link.getAttribute("href")?.startsWith("/") ?? false).forEach(link => { link.target = '_blank' // note: link.getAttribute("href") gets the textual value, link.href is the rewritten version which'll contain the host for relative paths link.href = `https://${language}.wikipedia.org${link.getAttribute("href")}`; }) + if (options?.firstParagraphOnly) { + return content.getElementsByTagName("p").item(0).innerHTML + } + return content.innerHTML }