forked from MapComplete/MapComplete
Add linked data module which scrapes websites
This commit is contained in:
parent
2af6af7630
commit
35c31f9861
15 changed files with 870 additions and 130 deletions
142
src/Logic/Web/LinkedDataLoader.ts
Normal file
142
src/Logic/Web/LinkedDataLoader.ts
Normal file
|
@ -0,0 +1,142 @@
|
|||
import type { Geometry } from "geojson"
|
||||
import jsonld from "jsonld"
|
||||
import { OH, OpeningHour } from "../../UI/OpeningHours/OpeningHours"
|
||||
import { Utils } from "../../Utils"
|
||||
import PhoneValidator from "../../UI/InputElement/Validators/PhoneValidator"
|
||||
import EmailValidator from "../../UI/InputElement/Validators/EmailValidator"
|
||||
import { Validator } from "../../UI/InputElement/Validator"
|
||||
import UrlValidator from "../../UI/InputElement/Validators/UrlValidator"
|
||||
|
||||
export default class LinkedDataLoader {
|
||||
private static readonly COMPACTING_CONTEXT = {
|
||||
name: "http://schema.org/name",
|
||||
website: { "@id": "http://schema.org/url", "@type": "@id" },
|
||||
phone: { "@id": "http://schema.org/telephone" },
|
||||
email: { "@id": "http://schema.org/email" },
|
||||
image: { "@id": "http://schema.org/image", "@type": "@id" },
|
||||
opening_hours: { "@id": "http://schema.org/openingHoursSpecification" },
|
||||
openingHours: { "@id": "http://schema.org/openingHours", "@container": "@set" },
|
||||
|
||||
geo: { "@id": "http://schema.org/geo" },
|
||||
}
|
||||
private static COMPACTING_CONTEXT_OH = {
|
||||
dayOfWeek: { "@id": "http://schema.org/dayOfWeek", "@container": "@set" },
|
||||
closes: { "@id": "http://schema.org/closes" },
|
||||
opens: { "@id": "http://schema.org/opens" },
|
||||
}
|
||||
private static formatters: Record<string, Validator> = {
|
||||
phone: new PhoneValidator(),
|
||||
email: new EmailValidator(),
|
||||
website: new UrlValidator(undefined, undefined, true),
|
||||
}
|
||||
private static ignoreKeys = [
|
||||
"http://schema.org/logo",
|
||||
"http://schema.org/address",
|
||||
"@type",
|
||||
"@id",
|
||||
"@base",
|
||||
"http://schema.org/contentUrl",
|
||||
"http://schema.org/datePublished",
|
||||
"http://schema.org/description",
|
||||
"http://schema.org/hasMap",
|
||||
"http://schema.org/priceRange",
|
||||
"http://schema.org/contactPoint",
|
||||
]
|
||||
|
||||
static async geoToGeometry(geo): Promise<Geometry> {
|
||||
const context = {
|
||||
lat: {
|
||||
"@id": "http://schema.org/latitude",
|
||||
},
|
||||
lon: {
|
||||
"@id": "http://schema.org/longitude", // TODO formatting to decimal should be possible from this type?
|
||||
},
|
||||
}
|
||||
const flattened = await jsonld.compact(geo, context)
|
||||
|
||||
return {
|
||||
type: "Point",
|
||||
coordinates: [Number(flattened.lon), Number(flattened.lat)],
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses http://schema.org/openingHours
|
||||
*
|
||||
* // Weird data format from C&A
|
||||
* LinkedDataLoader.ohStringToOsmFormat("MO 09:30-18:00 TU 09:30-18:00 WE 09:30-18:00 TH 09:30-18:00 FR 09:30-18:00 SA 09:30-18:00") // => "Mo-Sa 09:30-18:00"
|
||||
*/
|
||||
static ohStringToOsmFormat(oh: string) {
|
||||
oh = oh.toLowerCase()
|
||||
if (oh === "mo-su") {
|
||||
return "24/7"
|
||||
}
|
||||
const regex = /([a-z]+ [0-9:]+-[0-9:]+) (.*)/
|
||||
let match = oh.match(regex)
|
||||
let parts: string[] = []
|
||||
while (match) {
|
||||
parts.push(match[1])
|
||||
oh = match[2]
|
||||
match = oh?.match(regex)
|
||||
}
|
||||
parts.push(oh)
|
||||
|
||||
// actually the same as OSM-oh
|
||||
return OH.simplify(parts.join(";"))
|
||||
}
|
||||
|
||||
static async ohToOsmFormat(openingHoursSpecification): Promise<string> {
|
||||
const compacted = await jsonld.flatten(
|
||||
openingHoursSpecification,
|
||||
<any>LinkedDataLoader.COMPACTING_CONTEXT_OH
|
||||
)
|
||||
const spec: any = compacted["@graph"]
|
||||
let allRules: OpeningHour[] = []
|
||||
for (const rule of spec) {
|
||||
const dow: string[] = rule.dayOfWeek.map((dow) => dow.toLowerCase().substring(0, 2))
|
||||
const opens: string = rule.opens
|
||||
const closes: string = rule.closes === "23:59" ? "24:00" : rule.closes
|
||||
allRules.push(...OH.ParseRule(dow + " " + opens + "-" + closes))
|
||||
}
|
||||
|
||||
return OH.ToString(OH.MergeTimes(allRules))
|
||||
}
|
||||
|
||||
static async fetchJsonLd(url: string, country?: string): Promise<Record<string, any>> {
|
||||
const proxy = "http://127.0.0.1:2346/extractgraph" // "https://cache.mapcomplete.org/extractgraph"
|
||||
const data = await Utils.downloadJson(`${proxy}?url=${url}`)
|
||||
const compacted = await jsonld.compact(data, LinkedDataLoader.COMPACTING_CONTEXT)
|
||||
compacted["opening_hours"] = await LinkedDataLoader.ohToOsmFormat(
|
||||
compacted["opening_hours"]
|
||||
)
|
||||
if (compacted["openingHours"]) {
|
||||
const ohspec: string[] = compacted["openingHours"]
|
||||
compacted["opening_hours"] = OH.simplify(
|
||||
ohspec.map((r) => LinkedDataLoader.ohStringToOsmFormat(r)).join("; ")
|
||||
)
|
||||
delete compacted["openingHours"]
|
||||
}
|
||||
if (compacted["geo"]) {
|
||||
compacted["geo"] = <any>await LinkedDataLoader.geoToGeometry(compacted["geo"])
|
||||
}
|
||||
for (const k in compacted) {
|
||||
if (compacted[k] === "") {
|
||||
delete compacted[k]
|
||||
continue
|
||||
}
|
||||
if (this.ignoreKeys.indexOf(k) >= 0) {
|
||||
delete compacted[k]
|
||||
continue
|
||||
}
|
||||
const formatter = LinkedDataLoader.formatters[k]
|
||||
if (formatter) {
|
||||
if (country) {
|
||||
compacted[k] = formatter.reformat(<string>compacted[k], () => country)
|
||||
} else {
|
||||
compacted[k] = formatter.reformat(<string>compacted[k])
|
||||
}
|
||||
}
|
||||
}
|
||||
return <any>compacted
|
||||
}
|
||||
}
|
|
@ -112,6 +112,7 @@ export default class Constants {
|
|||
public static countryCoderEndpoint: string = Constants.config.country_coder_host
|
||||
public static osmAuthConfig: AuthConfig = Constants.config.oauth_credentials
|
||||
public static nominatimEndpoint: string = Constants.config.nominatimEndpoint
|
||||
public static linkedDataProxy: string = Constants.config["jsonld-proxy"]
|
||||
/**
|
||||
* These are the values that are allowed to use as 'backdrop' icon for a map pin
|
||||
*/
|
||||
|
|
|
@ -610,6 +610,9 @@ export class AddEditingElements extends DesugaringStep<LayerConfigJson> {
|
|||
typeof sv === "string" ? undefined : sv.func.funcName
|
||||
)
|
||||
)
|
||||
if (!allIds.has("lod")) {
|
||||
json.tagRenderings.push(this._desugaring.tagRenderings.get("lod"))
|
||||
}
|
||||
if (!usedSpecialFunctions.has("minimap")) {
|
||||
json.tagRenderings.push(this._desugaring.tagRenderings.get("minimap"))
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
export let selectedElement: Feature
|
||||
export let highlightedRendering: UIEventSource<string> = undefined
|
||||
|
||||
export let tags: UIEventSource<Record<string, string>> = state.featureProperties.getStore(
|
||||
export let tags: UIEventSource<Record<string, string>> = state?.featureProperties?.getStore(
|
||||
selectedElement.properties.id
|
||||
)
|
||||
|
||||
|
@ -22,11 +22,14 @@
|
|||
let stillMatches = tags.map(tags => !layer?.source?.osmTags || layer.source.osmTags?.matchesProperties(tags))
|
||||
|
||||
let _metatags: Record<string, string>
|
||||
if(state?.userRelatedState?.preferencesAsTags){
|
||||
|
||||
onDestroy(
|
||||
state.userRelatedState.preferencesAsTags.addCallbackAndRun((tags) => {
|
||||
_metatags = tags
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
let knownTagRenderings: Store<TagRenderingConfig[]> = tags.mapD((tgs) =>
|
||||
layer.tagRenderings.filter(
|
||||
|
|
|
@ -30,7 +30,7 @@ export class ImageCarousel extends Toggle {
|
|||
try {
|
||||
let image: BaseUIElement = new SvelteUIElement(AttributedImage, {
|
||||
image: url,
|
||||
previewedImage: state.previewedImage,
|
||||
previewedImage: state?.previewedImage,
|
||||
})
|
||||
|
||||
if (url.key !== undefined) {
|
||||
|
|
|
@ -24,7 +24,7 @@ export default class PhoneValidator extends Validator {
|
|||
return generic
|
||||
}
|
||||
|
||||
public isValid(str, country: () => string): boolean {
|
||||
public isValid(str: string, country?: () => string): boolean {
|
||||
if (str === undefined) {
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -1,13 +1,15 @@
|
|||
import { Validator } from "../Validator"
|
||||
|
||||
export default class UrlValidator extends Validator {
|
||||
constructor(name?: string, explanation?: string) {
|
||||
private readonly _forceHttps: boolean
|
||||
constructor(name?: string, explanation?: string, forceHttps?: boolean) {
|
||||
super(
|
||||
name ?? "url",
|
||||
explanation ??
|
||||
"The validatedTextField will format URLs to always be valid and have a https://-header (even though the 'https'-part will be hidden from the user. Furthermore, some tracking parameters will be removed",
|
||||
"url"
|
||||
)
|
||||
this._forceHttps = forceHttps ?? false
|
||||
}
|
||||
reformat(str: string): string {
|
||||
try {
|
||||
|
@ -22,6 +24,9 @@ export default class UrlValidator extends Validator {
|
|||
} else {
|
||||
url = new URL(str)
|
||||
}
|
||||
if (this._forceHttps) {
|
||||
url.protocol = "https:"
|
||||
}
|
||||
const blacklistedTrackingParams = [
|
||||
"fbclid", // Oh god, how I hate the fbclid. Let it burn, burn in hell!
|
||||
"gclid",
|
||||
|
|
90
src/UI/LinkedDataDisplay.svelte
Normal file
90
src/UI/LinkedDataDisplay.svelte
Normal file
|
@ -0,0 +1,90 @@
|
|||
<script lang="ts">
|
||||
/**
|
||||
* Shows attributes that are loaded via linked data and which are suitable for import
|
||||
*/
|
||||
import type { SpecialVisualizationState } from "./SpecialVisualization"
|
||||
import type { Store } from "../Logic/UIEventSource"
|
||||
import { Stores, UIEventSource } from "../Logic/UIEventSource"
|
||||
|
||||
import type { Feature, Geometry } from "geojson"
|
||||
import LayerConfig from "../Models/ThemeConfig/LayerConfig"
|
||||
import LinkedDataLoader from "../Logic/Web/LinkedDataLoader"
|
||||
import Loading from "./Base/Loading.svelte"
|
||||
import { GeoOperations } from "../Logic/GeoOperations"
|
||||
import { OH } from "./OpeningHours/OpeningHours"
|
||||
|
||||
export let state: SpecialVisualizationState
|
||||
export let tagsSource: UIEventSource<Record<string, string>>
|
||||
export let argument: string[]
|
||||
export let feature: Feature
|
||||
export let layer: LayerConfig
|
||||
export let key: string
|
||||
|
||||
|
||||
let url = tagsSource.mapD(tags => {
|
||||
if (!tags._country || !tags[key] || tags[key] === "undefined") {
|
||||
return undefined
|
||||
}
|
||||
return ({ url: tags[key], country: tags._country })
|
||||
})
|
||||
let dataWithErr = url.bindD(({ url, country }) => {
|
||||
return Stores.FromPromiseWithErr(LinkedDataLoader.fetchJsonLd(url, country))
|
||||
})
|
||||
|
||||
let error = dataWithErr.mapD(d => d["error"])
|
||||
let data = dataWithErr.mapD(d => d["success"])
|
||||
|
||||
let distanceToFeature: Store<string> = data.mapD(d => d.geo).mapD(geo => {
|
||||
const dist = Math.round(GeoOperations.distanceBetween(
|
||||
GeoOperations.centerpointCoordinates(<Geometry>geo), GeoOperations.centerpointCoordinates(feature)))
|
||||
return dist + "m"
|
||||
})
|
||||
let dataCleaned = data.mapD(d => {
|
||||
const featureTags = tagsSource.data
|
||||
console.log("Downloaded data is", d)
|
||||
d = { ...d }
|
||||
delete d["@context"]
|
||||
for (const k in d) {
|
||||
const v = featureTags[k]
|
||||
if (!v) {
|
||||
continue
|
||||
}
|
||||
if (k === "opening_hours") {
|
||||
const oh = [].concat(...v.split(";").map(r => OH.ParseRule(r) ?? []))
|
||||
const merged = OH.ToString(OH.MergeTimes(oh ?? []))
|
||||
if (merged === d[k]) {
|
||||
delete d[k]
|
||||
continue
|
||||
}
|
||||
}
|
||||
if (featureTags[k] === d[k]) {
|
||||
delete d[k]
|
||||
}
|
||||
delete d.geo
|
||||
}
|
||||
return d
|
||||
}, [tagsSource])
|
||||
|
||||
</script>
|
||||
{#if $error}
|
||||
<div class="alert">
|
||||
{$error}
|
||||
</div>
|
||||
{:else if $url}
|
||||
<div class="flex flex-col border border-dashed border-gray-500 p-1">
|
||||
{#if $dataCleaned !== undefined && Object.keys($dataCleaned).length === 0}
|
||||
No new data from website
|
||||
{:else if !$data}
|
||||
<Loading />
|
||||
{:else}
|
||||
{$distanceToFeature}
|
||||
<ul>
|
||||
{#each Object.keys($dataCleaned) as k}
|
||||
<li>
|
||||
<b>{k}</b>: {JSON.stringify($dataCleaned[k])} {$tagsSource[k]} {($dataCleaned[k]) === $tagsSource[k]}
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
|
@ -93,6 +93,7 @@ import SpecialVisualisationUtils from "./SpecialVisualisationUtils"
|
|||
import LoginButton from "./Base/LoginButton.svelte"
|
||||
import Toggle from "./Input/Toggle"
|
||||
import ImportReviewIdentity from "./Reviews/ImportReviewIdentity.svelte"
|
||||
import LinkedDataDisplay from "./LinkedDataDisplay.svelte"
|
||||
|
||||
class NearbyImageVis implements SpecialVisualization {
|
||||
// Class must be in SpecialVisualisations due to weird cyclical import that breaks the tests
|
||||
|
@ -741,12 +742,20 @@ export default class SpecialVisualizations {
|
|||
{
|
||||
funcName: "import_mangrove_key",
|
||||
docs: "Only makes sense in the usersettings. Allows to import a mangrove public key and to use this to make reviews",
|
||||
args: [{
|
||||
name: "text",
|
||||
doc: "The text that is shown on the button",
|
||||
}],
|
||||
args: [
|
||||
{
|
||||
name: "text",
|
||||
doc: "The text that is shown on the button",
|
||||
},
|
||||
],
|
||||
needsUrls: [],
|
||||
constr(state: SpecialVisualizationState, tagSource: UIEventSource<Record<string, string>>, argument: string[], feature: Feature, layer: LayerConfig): BaseUIElement {
|
||||
constr(
|
||||
state: SpecialVisualizationState,
|
||||
tagSource: UIEventSource<Record<string, string>>,
|
||||
argument: string[],
|
||||
feature: Feature,
|
||||
layer: LayerConfig
|
||||
): BaseUIElement {
|
||||
const [text] = argument
|
||||
return new SvelteUIElement(ImportReviewIdentity, { state, text })
|
||||
},
|
||||
|
@ -1718,6 +1727,34 @@ export default class SpecialVisualizations {
|
|||
)
|
||||
},
|
||||
},
|
||||
{
|
||||
funcName: "linked_data_from_website",
|
||||
docs: "Attempts to load (via a proxy) the specified website and parsed ld+json from there. Suitable data will be offered to import into OSM",
|
||||
args: [
|
||||
{
|
||||
name: "key",
|
||||
defaultValue: "website",
|
||||
doc: "Attempt to load ld+json from the specified URL. This can be in an embedded <script type='ld+json'>",
|
||||
},
|
||||
],
|
||||
needsUrls: [Constants.linkedDataProxy],
|
||||
constr(
|
||||
state: SpecialVisualizationState,
|
||||
tagsSource: UIEventSource<Record<string, string>>,
|
||||
argument: string[],
|
||||
feature: Feature,
|
||||
layer: LayerConfig
|
||||
): BaseUIElement {
|
||||
const key = argument[0] ?? "website"
|
||||
return new SvelteUIElement(LinkedDataDisplay, {
|
||||
feature,
|
||||
state,
|
||||
tagsSource,
|
||||
key,
|
||||
layer,
|
||||
})
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
specialVisualizations.push(new AutoApplyButton(specialVisualizations))
|
||||
|
|
|
@ -1,19 +1,39 @@
|
|||
<script lang="ts">
|
||||
// Testing grounds
|
||||
import LanguageElement from "./Popup/LanguageElement/LanguageElement.svelte"
|
||||
import { UIEventSource } from "../Logic/UIEventSource"
|
||||
|
||||
let tags = new UIEventSource({ _country: "Be" })
|
||||
import { Stores } from "../Logic/UIEventSource"
|
||||
import { Utils } from "../Utils"
|
||||
import jsonld from "jsonld"
|
||||
import SelectedElementView from "./BigComponents/SelectedElementView.svelte"
|
||||
import * as shop from "../assets/generated/layers/shops.json"
|
||||
import LayerConfig from "../Models/ThemeConfig/LayerConfig"
|
||||
import type { OpeningHour } from "./OpeningHours/OpeningHours"
|
||||
import { OH } from "./OpeningHours/OpeningHours"
|
||||
import type { Geometry } from "geojson"
|
||||
|
||||
const shopLayer = new LayerConfig(<any>shop, "shops")
|
||||
|
||||
|
||||
const colruytUrl = "https://www.colruyt.be/nl/winkelzoeker/colruyt-gent"
|
||||
const url = "https://stores.delhaize.be/nl/ad-delhaize-dok-noord"
|
||||
let data = Stores.FromPromise(fetchJsonLd(url)).mapD(properties => ({
|
||||
...properties,
|
||||
id: properties["website"],
|
||||
shop: "supermarket",
|
||||
_country: "be",
|
||||
}))
|
||||
|
||||
let feature = data.mapD(properties => {
|
||||
return <any>{
|
||||
type: "Feature",
|
||||
properties,
|
||||
geometry: {
|
||||
type: "Point",
|
||||
coordinates: properties["geo"],
|
||||
},
|
||||
}
|
||||
})
|
||||
</script>
|
||||
|
||||
<LanguageElement
|
||||
feature={undefined}
|
||||
item_render={"{language()} is spoken here"}
|
||||
key="language"
|
||||
layer={undefined}
|
||||
question="What languages are spoken here?"
|
||||
render_all={"Following languages are spoken here: {list()}"}
|
||||
single_render={"Only {language()} is spoken here"}
|
||||
state={undefined}
|
||||
{tags}
|
||||
/>
|
||||
{#if $data}
|
||||
<SelectedElementView layer={shopLayer} selectedElement={$feature} state={undefined} tags={data} />
|
||||
{/if}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue