From 2af6af7630556c29704c8103af579b69f3b930e9 Mon Sep 17 00:00:00 2001 From: Pieter Vander Vennet Date: Thu, 22 Feb 2024 14:59:05 +0100 Subject: [PATCH] Add JSON-LD proxy server --- scripts/server.ts | 4 ++-- scripts/serverLdScrape.ts | 43 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 scripts/serverLdScrape.ts diff --git a/scripts/server.ts b/scripts/server.ts index 8c036721c..8bed5bc18 100644 --- a/scripts/server.ts +++ b/scripts/server.ts @@ -9,7 +9,7 @@ export class Server { handle: { mustMatch: string | RegExp mimetype: string - handle: (path: string) => Promise + handle: (path: string, queryParams: URLSearchParams) => Promise }[] ) { handle.push({ @@ -89,7 +89,7 @@ export class Server { } try { - const result = await handler.handle(path) + const result = await handler.handle(path, url.searchParams) res.writeHead(200, { "Content-Type": handler.mimetype }) res.write(result) res.end() diff --git a/scripts/serverLdScrape.ts b/scripts/serverLdScrape.ts new file mode 100644 index 000000000..31d66b0e8 --- /dev/null +++ b/scripts/serverLdScrape.ts @@ -0,0 +1,43 @@ +import Script from "../scripts/Script" +import { Server } from "../scripts/server" +import { Utils } from "../src/Utils" +import parse from "node-html-parser" +class ServerLdScrape extends Script { + constructor() { + super("Starts a server which fetches a webpage and returns embedded LD+JSON") + } + async main(args: string[]): Promise { + const port = Number(args[0] ?? 2346) + new Server(port, {}, [ + { + mustMatch: "extractgraph", + mimetype: "application/ld+json", + async handle(content, searchParams: URLSearchParams) { + const url = searchParams.get("url") + const dloaded = await Utils.download(url, { + "User-Agent": + "MapComplete/openstreetmap scraper; pietervdvn@posteo.net; https://github.com/pietervdvn/MapComplete", + }) + const parsed = parse(dloaded) + const scripts = Array.from(parsed.getElementsByTagName("script")) + const snippets = [] + for (const script of scripts) { + const tp = script.attributes["type"] + if (tp !== "application/ld+json") { + continue + } + try { + snippets.push(JSON.parse(script.textContent)) + } catch (e) { + console.error(e) + } + } + + return JSON.stringify(snippets) + }, + }, + ]) + } +} + +new ServerLdScrape().run()