More tweaks to the linked data loader

This commit is contained in:
Pieter Vander Vennet 2024-06-19 03:22:57 +02:00
parent 684932aebd
commit 734be4a702
5 changed files with 76 additions and 55 deletions

View file

@ -2,14 +2,14 @@ import Script from "./Script"
import LinkedDataLoader from "../src/Logic/Web/LinkedDataLoader" import LinkedDataLoader from "../src/Logic/Web/LinkedDataLoader"
import { writeFileSync } from "fs" import { writeFileSync } from "fs"
export default class DownloadLinkedDataList extends Script { class DownloadLinkedDataList extends Script {
constructor() { constructor() {
super("Downloads the localBusinesses from the given location. Usage: url [--no-proxy]") super("Downloads the localBusinesses from the given location. Usage: url [--no-proxy]")
} }
async main([url, noProxy]: string[]): Promise<void> { async main([url, noProxy]: string[]): Promise<void> {
const useProxy = noProxy !== "--no-proxy" const useProxy = noProxy !== "--no-proxy"
const data = await LinkedDataLoader.fetchJsonLd(url, {}, useProxy) const data = await LinkedDataLoader.fetchJsonLd(url, {}, useProxy ? "proxy" : "fetch-lod")
const path = "linked_data_" + url.replace(/[^a-zA-Z0-9_]/g, "_") + ".jsonld" const path = "linked_data_" + url.replace(/[^a-zA-Z0-9_]/g, "_") + ".jsonld"
writeFileSync(path, JSON.stringify(data), "utf8") writeFileSync(path, JSON.stringify(data), "utf8")
console.log("Written", path) console.log("Written", path)

View file

@ -17,7 +17,7 @@ class CompareWebsiteData extends Script {
if (fs.existsSync(filename)) { if (fs.existsSync(filename)) {
return JSON.parse(fs.readFileSync(filename, "utf-8")) return JSON.parse(fs.readFileSync(filename, "utf-8"))
} }
const jsonLd = await LinkedDataLoader.fetchJsonLd(url, undefined, true) const jsonLd = await LinkedDataLoader.fetchJsonLd(url, undefined, "proxy")
console.log("Got:", jsonLd) console.log("Got:", jsonLd)
fs.writeFileSync(filename, JSON.stringify(jsonLd)) fs.writeFileSync(filename, JSON.stringify(jsonLd))
return jsonLd return jsonLd

View file

@ -27,23 +27,23 @@ export default class LinkedDataLoader {
opening_hours: { "@id": "http://schema.org/openingHoursSpecification" }, opening_hours: { "@id": "http://schema.org/openingHoursSpecification" },
openingHours: { "@id": "http://schema.org/openingHours", "@container": "@set" }, openingHours: { "@id": "http://schema.org/openingHours", "@container": "@set" },
geo: { "@id": "http://schema.org/geo" }, geo: { "@id": "http://schema.org/geo" },
alt_name: { "@id": "http://schema.org/alternateName" }, alt_name: { "@id": "http://schema.org/alternateName" }
} }
private static COMPACTING_CONTEXT_OH = { private static COMPACTING_CONTEXT_OH = {
dayOfWeek: { "@id": "http://schema.org/dayOfWeek", "@container": "@set" }, dayOfWeek: { "@id": "http://schema.org/dayOfWeek", "@container": "@set" },
closes: { closes: {
"@id": "http://schema.org/closes", "@id": "http://schema.org/closes",
"@type": "http://www.w3.org/2001/XMLSchema#time", "@type": "http://www.w3.org/2001/XMLSchema#time"
}, },
opens: { opens: {
"@id": "http://schema.org/opens", "@id": "http://schema.org/opens",
"@type": "http://www.w3.org/2001/XMLSchema#time", "@type": "http://www.w3.org/2001/XMLSchema#time"
}, }
} }
private static formatters: Record<"phone" | "email" | "website", Validator> = { private static formatters: Record<"phone" | "email" | "website", Validator> = {
phone: new PhoneValidator(), phone: new PhoneValidator(),
email: new EmailValidator(), email: new EmailValidator(),
website: new UrlValidator(undefined, undefined, true), website: new UrlValidator(undefined, undefined, true)
} }
private static ignoreKeys = [ private static ignoreKeys = [
"http://schema.org/logo", "http://schema.org/logo",
@ -56,7 +56,7 @@ export default class LinkedDataLoader {
"http://schema.org/description", "http://schema.org/description",
"http://schema.org/hasMap", "http://schema.org/hasMap",
"http://schema.org/priceRange", "http://schema.org/priceRange",
"http://schema.org/contactPoint", "http://schema.org/contactPoint"
] ]
private static shapeToPolygon(str: string): Polygon { private static shapeToPolygon(str: string): Polygon {
@ -69,8 +69,8 @@ export default class LinkedDataLoader {
.trim() .trim()
.split(" ") .split(" ")
.map((n) => Number(n)) .map((n) => Number(n))
), )
], ]
} }
} }
@ -92,18 +92,18 @@ export default class LinkedDataLoader {
const context = { const context = {
lat: { lat: {
"@id": "http://schema.org/latitude", "@id": "http://schema.org/latitude",
"@type": "http://www.w3.org/2001/XMLSchema#double", "@type": "http://www.w3.org/2001/XMLSchema#double"
}, },
lon: { lon: {
"@id": "http://schema.org/longitude", "@id": "http://schema.org/longitude",
"@type": "http://www.w3.org/2001/XMLSchema#double", "@type": "http://www.w3.org/2001/XMLSchema#double"
}, }
} }
const flattened = await jsonld.compact(geo, context) const flattened = await jsonld.compact(geo, context)
return { return {
type: "Point", type: "Point",
coordinates: [Number(flattened.lon), Number(flattened.lat)], coordinates: [Number(flattened.lon), Number(flattened.lat)]
} }
} }
@ -236,15 +236,30 @@ export default class LinkedDataLoader {
static async fetchJsonLd( static async fetchJsonLd(
url: string, url: string,
options?: JsonLdLoaderOptions, options?: JsonLdLoaderOptions,
useProxy: boolean = false mode: "fetch-lod" | "fetch-raw" | "proxy"
): Promise<object> { ): Promise<object> {
if (useProxy) { if (mode === "proxy") {
url = Constants.linkedDataProxy.replace("{url}", encodeURIComponent(url)) url = Constants.linkedDataProxy.replace("{url}", encodeURIComponent(url))
} }
if (mode !== "fetch-raw") {
const data = await Utils.downloadJson(url) const data = await Utils.downloadJson(url)
return await LinkedDataLoader.compact(data, options) return await LinkedDataLoader.compact(data, options)
} }
let htmlContent = await Utils.download(url)
const div = document.createElement("div")
div.innerHTML = htmlContent
const script = Array.from(div.getElementsByTagName("script"))
.find(script => script.type === "application/ld+json")
const snippet = JSON.parse(script.textContent)
snippet["@base"] = url
return await LinkedDataLoader.compact(snippet, options)
}
/** /**
* Only returns different items * Only returns different items
* @param externalData * @param externalData
@ -293,7 +308,7 @@ export default class LinkedDataLoader {
if (properties["latitude"] && properties["longitude"]) { if (properties["latitude"] && properties["longitude"]) {
geometry = { geometry = {
type: "Point", type: "Point",
coordinates: [Number(properties["longitude"]), Number(properties["latitude"])], coordinates: [Number(properties["longitude"]), Number(properties["latitude"])]
} }
delete properties["latitude"] delete properties["latitude"]
delete properties["longitude"] delete properties["longitude"]
@ -305,7 +320,7 @@ export default class LinkedDataLoader {
const geo: GeoJSON = { const geo: GeoJSON = {
type: "Feature", type: "Feature",
properties, properties,
geometry, geometry
} }
delete linkedData.geo delete linkedData.geo
delete properties.shape delete properties.shape
@ -423,7 +438,7 @@ export default class LinkedDataLoader {
"brede publiek", "brede publiek",
"iedereen", "iedereen",
"bezoekers", "bezoekers",
"iedereen - vooral bezoekers gemeentehuis of bibliotheek.", "iedereen - vooral bezoekers gemeentehuis of bibliotheek."
].indexOf(audience.toLowerCase()) >= 0 ].indexOf(audience.toLowerCase()) >= 0
) { ) {
return "yes" return "yes"
@ -506,7 +521,7 @@ export default class LinkedDataLoader {
mv: "http://schema.mobivoc.org/", mv: "http://schema.mobivoc.org/",
gr: "http://purl.org/goodrelations/v1#", gr: "http://purl.org/goodrelations/v1#",
vp: "https://data.velopark.be/openvelopark/vocabulary#", vp: "https://data.velopark.be/openvelopark/vocabulary#",
vpt: "https://data.velopark.be/openvelopark/terms#", vpt: "https://data.velopark.be/openvelopark/terms#"
}, },
[url], [url],
undefined, undefined,
@ -527,7 +542,7 @@ export default class LinkedDataLoader {
mv: "http://schema.mobivoc.org/", mv: "http://schema.mobivoc.org/",
gr: "http://purl.org/goodrelations/v1#", gr: "http://purl.org/goodrelations/v1#",
vp: "https://data.velopark.be/openvelopark/vocabulary#", vp: "https://data.velopark.be/openvelopark/vocabulary#",
vpt: "https://data.velopark.be/openvelopark/terms#", vpt: "https://data.velopark.be/openvelopark/terms#"
}, },
[url], [url],
"g", "g",
@ -670,20 +685,20 @@ export default class LinkedDataLoader {
const withProxyUrl = Constants.linkedDataProxy.replace("{url}", encodeURIComponent(url)) const withProxyUrl = Constants.linkedDataProxy.replace("{url}", encodeURIComponent(url))
const optionalPaths: Record<string, string | Record<string, string>> = { const optionalPaths: Record<string, string | Record<string, string>> = {
"schema:interactionService": { "schema:interactionService": {
"schema:url": "website", "schema:url": "website"
}, },
"mv:operatedBy": { "mv:operatedBy": {
"gr:legalName": "operator", "gr:legalName": "operator"
}, },
"schema:contactPoint": { "schema:contactPoint": {
"schema:email": "email", "schema:email": "email",
"schema:telephone": "phone", "schema:telephone": "phone"
}, },
"schema:dateModified": "_last_edit_timestamp", "schema:dateModified": "_last_edit_timestamp"
} }
if (includeExtras) { if (includeExtras) {
optionalPaths["schema:address"] = { optionalPaths["schema:address"] = {
"schema:streetAddress": "addr", "schema:streetAddress": "addr"
} }
optionalPaths["schema:name"] = "name" optionalPaths["schema:name"] = "name"
optionalPaths["schema:description"] = "description" optionalPaths["schema:description"] = "description"
@ -701,19 +716,19 @@ export default class LinkedDataLoader {
"schema:geo": { "schema:geo": {
"schema:latitude": "latitude", "schema:latitude": "latitude",
"schema:longitude": "longitude", "schema:longitude": "longitude",
"schema:polygon": "shape", "schema:polygon": "shape"
}, },
"schema:priceSpecification": { "schema:priceSpecification": {
"mv:freeOfCharge": "fee", "mv:freeOfCharge": "fee",
"schema:price": "charge", "schema:price": "charge"
}, }
} }
const extra = [ const extra = [
"schema:priceSpecification [ mv:dueForTime [ mv:timeStartValue ?chargeStart; mv:timeEndValue ?chargeEnd; mv:timeUnit ?timeUnit ] ]", "schema:priceSpecification [ mv:dueForTime [ mv:timeStartValue ?chargeStart; mv:timeEndValue ?chargeEnd; mv:timeUnit ?timeUnit ] ]",
"vp:allows [vp:bicycleType <https://data.velopark.be/openvelopark/terms#CargoBicycle>; vp:bicyclesAmount ?capacityCargobike; vp:bicycleType ?cargoBikeType]", "vp:allows [vp:bicycleType <https://data.velopark.be/openvelopark/terms#CargoBicycle>; vp:bicyclesAmount ?capacityCargobike; vp:bicycleType ?cargoBikeType]",
"vp:allows [vp:bicycleType <https://data.velopark.be/openvelopark/terms#ElectricBicycle>; vp:bicyclesAmount ?capacityElectric; vp:bicycleType ?electricBikeType]", "vp:allows [vp:bicycleType <https://data.velopark.be/openvelopark/terms#ElectricBicycle>; vp:bicyclesAmount ?capacityElectric; vp:bicycleType ?electricBikeType]",
"vp:allows [vp:bicycleType <https://data.velopark.be/openvelopark/terms#TandemBicycle>; vp:bicyclesAmount ?capacityTandem; vp:bicycleType ?tandemBikeType]", "vp:allows [vp:bicycleType <https://data.velopark.be/openvelopark/terms#TandemBicycle>; vp:bicyclesAmount ?capacityTandem; vp:bicycleType ?tandemBikeType]"
] ]
const unpatched = await this.fetchEntry( const unpatched = await this.fetchEntry(

View file

@ -66,9 +66,8 @@
</script> </script>
<div> <div>
<div class:interactive={!readonly} class="flex w-full justify-between py-1 px-2"> <div class:interactive={!readonly} class="flex flex-col items-end py-1 px-2">
<div class="flex flex-col"> <div class="flex flex-col w-full">
<div>
{#if renderingExternal} {#if renderingExternal}
<TagRenderingAnswer <TagRenderingAnswer
tags={new UIEventSource(mockPropertiesExternal)} tags={new UIEventSource(mockPropertiesExternal)}
@ -83,7 +82,6 @@
{externalProperties[key]} {externalProperties[key]}
</div> </div>
{/if} {/if}
</div>
{#if !readonly && ($isTesting || $isDebug || $showTags === "yes" || $showTags === "always" || $showTags === "full")} {#if !readonly && ($isTesting || $isDebug || $showTags === "yes" || $showTags === "always" || $showTags === "full")}
<div class="subtle text-sm"> <div class="subtle text-sm">
@ -103,7 +101,7 @@
{#if !readonly} {#if !readonly}
{#if currentStep === "init"} {#if currentStep === "init"}
<button <button
class="small" class="w-fit"
on:click={() => apply(key)} on:click={() => apply(key)}
on:mouseover={() => (onOverwrite = true)} on:mouseover={() => (onOverwrite = true)}
on:focus={() => (onOverwrite = true)} on:focus={() => (onOverwrite = true)}

View file

@ -1833,8 +1833,15 @@ export default class SpecialVisualizations {
})() })()
) )
} }
return Stores.FromPromiseWithErr( return Stores.FromPromiseWithErr((async () => {
LinkedDataLoader.fetchJsonLd(url, { country }, useProxy) try {
return await LinkedDataLoader.fetchJsonLd(url, { country }, useProxy ? "proxy" : "fetch-lod")
} catch (e) {
console.log("Could not get with proxy/download LOD, attempting to download directly. Error for ",url,"is",e)
return await LinkedDataLoader.fetchJsonLd(url, { country }, "fetch-raw")
}
})()
) )
}) })
@ -1850,7 +1857,8 @@ export default class SpecialVisualizations {
layer, layer,
externalData, externalData,
sourceUrl, sourceUrl,
readonly readonly,
collapsed: isClosed
}), }),
undefined, undefined,
url.map((url) => !!url) url.map((url) => !!url)