Add linked data module which scrapes websites

This commit is contained in:
Pieter Vander Vennet 2024-02-22 18:58:34 +01:00
parent 2af6af7630
commit 35c31f9861
15 changed files with 870 additions and 130 deletions

View file

@ -14,7 +14,7 @@
export let selectedElement: Feature
export let highlightedRendering: UIEventSource<string> = undefined
export let tags: UIEventSource<Record<string, string>> = state.featureProperties.getStore(
export let tags: UIEventSource<Record<string, string>> = state?.featureProperties?.getStore(
selectedElement.properties.id
)
@ -22,11 +22,14 @@
let stillMatches = tags.map(tags => !layer?.source?.osmTags || layer.source.osmTags?.matchesProperties(tags))
let _metatags: Record<string, string>
if(state?.userRelatedState?.preferencesAsTags){
onDestroy(
state.userRelatedState.preferencesAsTags.addCallbackAndRun((tags) => {
_metatags = tags
})
)
}
let knownTagRenderings: Store<TagRenderingConfig[]> = tags.mapD((tgs) =>
layer.tagRenderings.filter(

View file

@ -30,7 +30,7 @@ export class ImageCarousel extends Toggle {
try {
let image: BaseUIElement = new SvelteUIElement(AttributedImage, {
image: url,
previewedImage: state.previewedImage,
previewedImage: state?.previewedImage,
})
if (url.key !== undefined) {

View file

@ -24,7 +24,7 @@ export default class PhoneValidator extends Validator {
return generic
}
public isValid(str, country: () => string): boolean {
public isValid(str: string, country?: () => string): boolean {
if (str === undefined) {
return false
}

View file

@ -1,13 +1,15 @@
import { Validator } from "../Validator"
export default class UrlValidator extends Validator {
constructor(name?: string, explanation?: string) {
private readonly _forceHttps: boolean
constructor(name?: string, explanation?: string, forceHttps?: boolean) {
super(
name ?? "url",
explanation ??
"The validatedTextField will format URLs to always be valid and have a https://-header (even though the 'https'-part will be hidden from the user. Furthermore, some tracking parameters will be removed",
"url"
)
this._forceHttps = forceHttps ?? false
}
reformat(str: string): string {
try {
@ -22,6 +24,9 @@ export default class UrlValidator extends Validator {
} else {
url = new URL(str)
}
if (this._forceHttps) {
url.protocol = "https:"
}
const blacklistedTrackingParams = [
"fbclid", // Oh god, how I hate the fbclid. Let it burn, burn in hell!
"gclid",

View file

@ -0,0 +1,90 @@
<script lang="ts">
/**
* Shows attributes that are loaded via linked data and which are suitable for import
*/
import type { SpecialVisualizationState } from "./SpecialVisualization"
import type { Store } from "../Logic/UIEventSource"
import { Stores, UIEventSource } from "../Logic/UIEventSource"
import type { Feature, Geometry } from "geojson"
import LayerConfig from "../Models/ThemeConfig/LayerConfig"
import LinkedDataLoader from "../Logic/Web/LinkedDataLoader"
import Loading from "./Base/Loading.svelte"
import { GeoOperations } from "../Logic/GeoOperations"
import { OH } from "./OpeningHours/OpeningHours"
export let state: SpecialVisualizationState
export let tagsSource: UIEventSource<Record<string, string>>
export let argument: string[]
export let feature: Feature
export let layer: LayerConfig
export let key: string
let url = tagsSource.mapD(tags => {
if (!tags._country || !tags[key] || tags[key] === "undefined") {
return undefined
}
return ({ url: tags[key], country: tags._country })
})
let dataWithErr = url.bindD(({ url, country }) => {
return Stores.FromPromiseWithErr(LinkedDataLoader.fetchJsonLd(url, country))
})
let error = dataWithErr.mapD(d => d["error"])
let data = dataWithErr.mapD(d => d["success"])
let distanceToFeature: Store<string> = data.mapD(d => d.geo).mapD(geo => {
const dist = Math.round(GeoOperations.distanceBetween(
GeoOperations.centerpointCoordinates(<Geometry>geo), GeoOperations.centerpointCoordinates(feature)))
return dist + "m"
})
let dataCleaned = data.mapD(d => {
const featureTags = tagsSource.data
console.log("Downloaded data is", d)
d = { ...d }
delete d["@context"]
for (const k in d) {
const v = featureTags[k]
if (!v) {
continue
}
if (k === "opening_hours") {
const oh = [].concat(...v.split(";").map(r => OH.ParseRule(r) ?? []))
const merged = OH.ToString(OH.MergeTimes(oh ?? []))
if (merged === d[k]) {
delete d[k]
continue
}
}
if (featureTags[k] === d[k]) {
delete d[k]
}
delete d.geo
}
return d
}, [tagsSource])
</script>
{#if $error}
<div class="alert">
{$error}
</div>
{:else if $url}
<div class="flex flex-col border border-dashed border-gray-500 p-1">
{#if $dataCleaned !== undefined && Object.keys($dataCleaned).length === 0}
No new data from website
{:else if !$data}
<Loading />
{:else}
{$distanceToFeature}
<ul>
{#each Object.keys($dataCleaned) as k}
<li>
<b>{k}</b>: {JSON.stringify($dataCleaned[k])} {$tagsSource[k]} {($dataCleaned[k]) === $tagsSource[k]}
</li>
{/each}
</ul>
{/if}
</div>
{/if}

View file

@ -93,6 +93,7 @@ import SpecialVisualisationUtils from "./SpecialVisualisationUtils"
import LoginButton from "./Base/LoginButton.svelte"
import Toggle from "./Input/Toggle"
import ImportReviewIdentity from "./Reviews/ImportReviewIdentity.svelte"
import LinkedDataDisplay from "./LinkedDataDisplay.svelte"
class NearbyImageVis implements SpecialVisualization {
// Class must be in SpecialVisualisations due to weird cyclical import that breaks the tests
@ -741,12 +742,20 @@ export default class SpecialVisualizations {
{
funcName: "import_mangrove_key",
docs: "Only makes sense in the usersettings. Allows to import a mangrove public key and to use this to make reviews",
args: [{
name: "text",
doc: "The text that is shown on the button",
}],
args: [
{
name: "text",
doc: "The text that is shown on the button",
},
],
needsUrls: [],
constr(state: SpecialVisualizationState, tagSource: UIEventSource<Record<string, string>>, argument: string[], feature: Feature, layer: LayerConfig): BaseUIElement {
constr(
state: SpecialVisualizationState,
tagSource: UIEventSource<Record<string, string>>,
argument: string[],
feature: Feature,
layer: LayerConfig
): BaseUIElement {
const [text] = argument
return new SvelteUIElement(ImportReviewIdentity, { state, text })
},
@ -1718,6 +1727,34 @@ export default class SpecialVisualizations {
)
},
},
{
funcName: "linked_data_from_website",
docs: "Attempts to load (via a proxy) the specified website and parsed ld+json from there. Suitable data will be offered to import into OSM",
args: [
{
name: "key",
defaultValue: "website",
doc: "Attempt to load ld+json from the specified URL. This can be in an embedded <script type='ld+json'>",
},
],
needsUrls: [Constants.linkedDataProxy],
constr(
state: SpecialVisualizationState,
tagsSource: UIEventSource<Record<string, string>>,
argument: string[],
feature: Feature,
layer: LayerConfig
): BaseUIElement {
const key = argument[0] ?? "website"
return new SvelteUIElement(LinkedDataDisplay, {
feature,
state,
tagsSource,
key,
layer,
})
},
},
]
specialVisualizations.push(new AutoApplyButton(specialVisualizations))

View file

@ -1,19 +1,39 @@
<script lang="ts">
// Testing grounds
import LanguageElement from "./Popup/LanguageElement/LanguageElement.svelte"
import { UIEventSource } from "../Logic/UIEventSource"
let tags = new UIEventSource({ _country: "Be" })
import { Stores } from "../Logic/UIEventSource"
import { Utils } from "../Utils"
import jsonld from "jsonld"
import SelectedElementView from "./BigComponents/SelectedElementView.svelte"
import * as shop from "../assets/generated/layers/shops.json"
import LayerConfig from "../Models/ThemeConfig/LayerConfig"
import type { OpeningHour } from "./OpeningHours/OpeningHours"
import { OH } from "./OpeningHours/OpeningHours"
import type { Geometry } from "geojson"
const shopLayer = new LayerConfig(<any>shop, "shops")
const colruytUrl = "https://www.colruyt.be/nl/winkelzoeker/colruyt-gent"
const url = "https://stores.delhaize.be/nl/ad-delhaize-dok-noord"
let data = Stores.FromPromise(fetchJsonLd(url)).mapD(properties => ({
...properties,
id: properties["website"],
shop: "supermarket",
_country: "be",
}))
let feature = data.mapD(properties => {
return <any>{
type: "Feature",
properties,
geometry: {
type: "Point",
coordinates: properties["geo"],
},
}
})
</script>
<LanguageElement
feature={undefined}
item_render={"{language()} is spoken here"}
key="language"
layer={undefined}
question="What languages are spoken here?"
render_all={"Following languages are spoken here: {list()}"}
single_render={"Only {language()} is spoken here"}
state={undefined}
{tags}
/>
{#if $data}
<SelectedElementView layer={shopLayer} selectedElement={$feature} state={undefined} tags={data} />
{/if}