| 
									
										
										
										
											2021-10-02 17:57:54 +02:00
										 |  |  | /** | 
					
						
							|  |  |  |  * Some usefull utility functions around the wikipedia API | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | import {Utils} from "../../Utils"; | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  | import {UIEventSource} from "../UIEventSource"; | 
					
						
							| 
									
										
										
										
											2021-10-02 17:57:54 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | export default class Wikipedia { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * When getting a wikipedia page data result, some elements (e.g. navigation, infoboxes, ...) should be removed if 'removeInfoBoxes' is set. | 
					
						
							|  |  |  |      * We do this based on the classes. This set contains a blacklist of the classes to remove | 
					
						
							|  |  |  |      * @private | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     private static readonly classesToRemove = [ | 
					
						
							|  |  |  |         "shortdescription", | 
					
						
							|  |  |  |         "sidebar", | 
					
						
							| 
									
										
										
										
											2021-11-07 16:34:51 +01:00
										 |  |  |         "infobox", "infobox_v2", | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  |         "noprint", | 
					
						
							|  |  |  |         "ambox", | 
					
						
							| 
									
										
										
										
											2021-10-02 17:57:54 +02:00
										 |  |  |         "mw-editsection", | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  |         "mw-selflink", | 
					
						
							| 
									
										
										
										
											2021-10-18 20:40:24 +02:00
										 |  |  |         "mw-empty-elt", | 
					
						
							| 
									
										
										
										
											2021-10-02 17:57:54 +02:00
										 |  |  |         "hatnote" // Often redirects
 | 
					
						
							|  |  |  |     ] | 
					
						
							| 
									
										
										
										
											2021-11-07 16:34:51 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-07 22:06:47 +02:00
										 |  |  |     private static readonly idsToRemove = [ | 
					
						
							|  |  |  |         "sjabloon_zie" | 
					
						
							|  |  |  |     ] | 
					
						
							| 
									
										
										
										
											2021-10-02 17:57:54 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  |     private static readonly _cache = new Map<string, UIEventSource<{ success: string } | { error: any }>>() | 
					
						
							| 
									
										
										
										
											2021-11-07 16:34:51 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  |     public static GetArticle(options: { | 
					
						
							| 
									
										
										
										
											2021-10-02 17:57:54 +02:00
										 |  |  |         pageName: string, | 
					
						
							| 
									
										
										
										
											2022-04-30 00:30:15 +02:00
										 |  |  |         language?: "en" | string, | 
					
						
							|  |  |  |         firstParagraphOnly?: false | boolean | 
					
						
							| 
									
										
										
										
											2021-11-07 16:34:51 +01:00
										 |  |  |     }): UIEventSource<{ success: string } | { error: any }> { | 
					
						
							| 
									
										
										
										
											2022-04-30 00:30:15 +02:00
										 |  |  |         const key = (options.language ?? "en") + ":" + options.pageName + ":" + (options.firstParagraphOnly ?? false) | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  |         const cached = Wikipedia._cache.get(key) | 
					
						
							| 
									
										
										
										
											2021-11-07 16:34:51 +01:00
										 |  |  |         if (cached !== undefined) { | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  |             return cached | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         const v = UIEventSource.FromPromiseWithErr(Wikipedia.GetArticleAsync(options)) | 
					
						
							|  |  |  |         Wikipedia._cache.set(key, v) | 
					
						
							|  |  |  |         return v; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-11-07 16:34:51 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-30 00:30:15 +02:00
										 |  |  |     public static getDataUrl(options: {language?: "en" | string, pageName: string}): string{ | 
					
						
							|  |  |  |         return `https://${options.language ?? "en"}.wikipedia.org/w/api.php?action=parse&format=json&origin=*&prop=text&page=` + options.pageName | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     public static getPageUrl(options: {language?: "en" | string, pageName: string}): string{ | 
					
						
							|  |  |  |         return `https://${options.language ?? "en"}.wikipedia.org/wiki/` + options.pageName | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-05-01 20:56:16 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Tries to extract the language and article name from the given string | 
					
						
							|  |  |  |      *  | 
					
						
							|  |  |  |      * Wikipedia.extractLanguageAndName("qsdf") // => undefined
 | 
					
						
							| 
									
										
										
										
											2022-05-01 21:05:58 +02:00
										 |  |  |      * Wikipedia.extractLanguageAndName("nl:Warandeputten") // => {language: "nl", pageName: "Warandeputten"}
 | 
					
						
							| 
									
										
										
										
											2022-05-01 20:56:16 +02:00
										 |  |  |      */ | 
					
						
							|  |  |  |     public static extractLanguageAndName(input: string):{language: string, pageName: string} { | 
					
						
							|  |  |  |         const matched = input.match("([^:]+):(.*)") | 
					
						
							|  |  |  |         if(matched === undefined || matched === null){ | 
					
						
							|  |  |  |             return undefined | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         const [_ , language, pageName] = matched | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             language, pageName | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-04-30 00:30:15 +02:00
										 |  |  |      | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  |     public static async GetArticleAsync(options: { | 
					
						
							|  |  |  |         pageName: string, | 
					
						
							| 
									
										
										
										
											2022-04-30 00:30:15 +02:00
										 |  |  |         language?: "en" | string, | 
					
						
							|  |  |  |         firstParagraphOnly?: false | boolean | 
					
						
							| 
									
										
										
										
											2021-10-02 17:57:54 +02:00
										 |  |  |     }): Promise<string> { | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-30 00:30:15 +02:00
										 |  |  |         const response = await Utils.downloadJson(Wikipedia.getDataUrl(options)) | 
					
						
							| 
									
										
										
										
											2021-10-02 17:57:54 +02:00
										 |  |  |         const html = response["parse"]["text"]["*"]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         const div = document.createElement("div") | 
					
						
							|  |  |  |         div.innerHTML = html | 
					
						
							|  |  |  |         const content = Array.from(div.children)[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for (const forbiddenClass of Wikipedia.classesToRemove) { | 
					
						
							| 
									
										
										
										
											2021-11-07 16:34:51 +01:00
										 |  |  |             const toRemove = content.getElementsByClassName(forbiddenClass) | 
					
						
							| 
									
										
										
										
											2021-10-02 17:57:54 +02:00
										 |  |  |             for (const toRemoveElement of Array.from(toRemove)) { | 
					
						
							|  |  |  |                 toRemoveElement.parentElement?.removeChild(toRemoveElement) | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-07 22:06:47 +02:00
										 |  |  |         for (const forbiddenId of Wikipedia.idsToRemove) { | 
					
						
							| 
									
										
										
										
											2021-11-07 16:34:51 +01:00
										 |  |  |             const toRemove = content.querySelector("#" + forbiddenId) | 
					
						
							| 
									
										
										
										
											2021-10-07 22:06:47 +02:00
										 |  |  |             toRemove?.parentElement?.removeChild(toRemove) | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-11-07 16:34:51 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-07 22:06:47 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  |         const links = Array.from(content.getElementsByTagName("a")) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Rewrite relative links to absolute links + open them in a new tab
 | 
					
						
							| 
									
										
										
										
											2022-04-30 00:30:15 +02:00
										 |  |  |         const language = options.language ?? "en" | 
					
						
							| 
									
										
										
										
											2021-11-07 16:34:51 +01:00
										 |  |  |         links.filter(link => link.getAttribute("href")?.startsWith("/") ?? false).forEach(link => { | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  |             link.target = '_blank' | 
					
						
							|  |  |  |             // note: link.getAttribute("href") gets the textual value, link.href is the rewritten version which'll contain the host for relative paths
 | 
					
						
							|  |  |  |             link.href = `https://${language}.wikipedia.org${link.getAttribute("href")}`; | 
					
						
							|  |  |  |         }) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-30 00:30:15 +02:00
										 |  |  |         if (options?.firstParagraphOnly) { | 
					
						
							|  |  |  |             return content.getElementsByTagName("p").item(0).innerHTML | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-02 22:31:16 +02:00
										 |  |  |         return content.innerHTML | 
					
						
							| 
									
										
										
										
											2021-10-02 17:57:54 +02:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } |