Add module to fetch data (via a proxy) from the website with jsonld
This commit is contained in:
parent
1b06eee15b
commit
352414b29d
17 changed files with 388 additions and 351 deletions
|
@ -148,7 +148,16 @@ export default class ScriptUtils {
|
|||
const data = await ScriptUtils.Download(url, headers)
|
||||
return JSON.parse(data["content"])
|
||||
}
|
||||
|
||||
public static async DownloadFetch(
|
||||
url: string,
|
||||
headers?: any
|
||||
): Promise<{ content: string } | { redirect: string }> {
|
||||
console.log("Fetching", url)
|
||||
const req = await fetch(url, {headers})
|
||||
const data= await req.text()
|
||||
console.log("Fetched", url,data)
|
||||
return {content: data}
|
||||
}
|
||||
public static Download(
|
||||
url: string,
|
||||
headers?: any
|
||||
|
|
80
scripts/importscripts/compareWebsiteData.ts
Normal file
80
scripts/importscripts/compareWebsiteData.ts
Normal file
|
@ -0,0 +1,80 @@
|
|||
import fs from "fs"
|
||||
// import readline from "readline"
|
||||
import Script from "../Script"
|
||||
import LinkedDataLoader from "../../src/Logic/Web/LinkedDataLoader"
|
||||
import UrlValidator from "../../src/UI/InputElement/Validators/UrlValidator"
|
||||
// vite-node scripts/importscripts/compareWebsiteData.ts -- ~/Downloads/ShopsWithWebsiteNodes.csv ~/data/scraped_websites/
|
||||
/*
|
||||
class CompareWebsiteData extends Script {
|
||||
constructor() {
|
||||
super("Given a csv file with 'id', 'tags' and 'website', attempts to fetch jsonld and compares the attributes. Usage: csv-file datadir")
|
||||
}
|
||||
|
||||
private readonly urlFormatter = new UrlValidator()
|
||||
async getWithCache(cachedir : string, url: string): Promise<any>{
|
||||
const filename= cachedir+"/"+encodeURIComponent(url)
|
||||
if(fs.existsSync(filename)){
|
||||
return JSON.parse(fs.readFileSync(filename, "utf-8"))
|
||||
}
|
||||
const jsonLd = await LinkedDataLoader.fetchJsonLdWithProxy(url)
|
||||
console.log("Got:", jsonLd)
|
||||
fs.writeFileSync(filename, JSON.stringify(jsonLd))
|
||||
return jsonLd
|
||||
}
|
||||
async handleEntry(line: string, cachedir: string, targetfile: string) : Promise<boolean>{
|
||||
const id = JSON.parse(line.split(",")[0])
|
||||
let tags = line.substring(line.indexOf("{") - 1)
|
||||
tags = tags.substring(1, tags.length - 1)
|
||||
tags = tags.replace(/""/g, "\"")
|
||||
const data = JSON.parse(tags)
|
||||
|
||||
const website = data.website //this.urlFormatter.reformat(data.website)
|
||||
if(!website.startsWith("https://stores.delhaize.be")){
|
||||
return false
|
||||
}
|
||||
console.log(website)
|
||||
const jsonld = await this.getWithCache(cachedir, website)
|
||||
console.log(jsonld)
|
||||
if(Object.keys(jsonld).length === 0){
|
||||
return false
|
||||
}
|
||||
const diff = LinkedDataLoader.removeDuplicateData(jsonld, data)
|
||||
fs.appendFileSync(targetfile, id +", "+ JSON.stringify(diff)+"\n")
|
||||
return true
|
||||
}
|
||||
|
||||
async main(args: string[]): Promise<void> {
|
||||
if (args.length < 2) {
|
||||
throw "Not enough arguments"
|
||||
}
|
||||
|
||||
|
||||
const readInterface = readline.createInterface({
|
||||
input: fs.createReadStream(args[0]),
|
||||
})
|
||||
|
||||
let handled = 0
|
||||
let diffed = 0
|
||||
const targetfile = "diff.csv"
|
||||
fs.writeFileSync(targetfile, "id, diff-json\n")
|
||||
for await (const line of readInterface) {
|
||||
try {
|
||||
if(line.startsWith("\"id\"")){
|
||||
continue
|
||||
}
|
||||
const madeComparison = await this.handleEntry(line, args[1], targetfile)
|
||||
handled ++
|
||||
diffed = diffed + (madeComparison ? 1 : 0)
|
||||
if(handled % 1000 == 0){
|
||||
// console.log("Handled ",handled," got ",diffed,"diff results")
|
||||
}
|
||||
} catch (e) {
|
||||
// console.error(e)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
new CompareWebsiteData().run()
|
||||
*/
|
0
scripts/scrapeOsm.ts
Normal file
0
scripts/scrapeOsm.ts
Normal file
|
@ -15,6 +15,7 @@ class ServerLdScrape extends Script {
|
|||
mimetype: "application/ld+json",
|
||||
async handle(content, searchParams: URLSearchParams) {
|
||||
const url = searchParams.get("url")
|
||||
console.log("Fetching", url)
|
||||
if (cache[url]) {
|
||||
return JSON.stringify(cache[url])
|
||||
}
|
||||
|
|
|
@ -1,39 +1,42 @@
|
|||
import Script from "../Script"
|
||||
import { Utils } from "../../src/Utils"
|
||||
import VeloparkLoader, { VeloparkData } from "../../src/Logic/Web/VeloparkLoader"
|
||||
import fs from "fs"
|
||||
import { Overpass } from "../../src/Logic/Osm/Overpass"
|
||||
import { RegexTag } from "../../src/Logic/Tags/RegexTag"
|
||||
import Constants from "../../src/Models/Constants"
|
||||
import { ImmutableStore } from "../../src/Logic/UIEventSource"
|
||||
import { BBox } from "../../src/Logic/BBox"
|
||||
import LinkedDataLoader from "../../src/Logic/Web/LinkedDataLoader"
|
||||
|
||||
class VeloParkToGeojson extends Script {
|
||||
constructor() {
|
||||
super(
|
||||
"Downloads the latest Velopark data and converts it to a geojson, which will be saved at the current directory"
|
||||
"Downloads the latest Velopark data and converts it to a geojson, which will be saved at the current directory",
|
||||
)
|
||||
}
|
||||
|
||||
exportTo(filename: string, features) {
|
||||
fs.writeFileSync(
|
||||
filename + "_" + new Date().toISOString() + ".geojson",
|
||||
features = features.slice(0,25) // TODO REMOVE
|
||||
const file = filename + "_" + /*new Date().toISOString() + */".geojson"
|
||||
fs.writeFileSync(file,
|
||||
JSON.stringify(
|
||||
{
|
||||
type: "FeatureCollection",
|
||||
"#":"Only 25 features are shown!", // TODO REMOVE
|
||||
features,
|
||||
},
|
||||
null,
|
||||
" "
|
||||
)
|
||||
" ",
|
||||
),
|
||||
)
|
||||
console.log("Written",file)
|
||||
}
|
||||
|
||||
async main(args: string[]): Promise<void> {
|
||||
console.log("Downloading velopark data")
|
||||
// Download data for NIS-code 1000. 1000 means: all of belgium
|
||||
const url = "https://www.velopark.be/api/parkings/1000"
|
||||
const data = <VeloparkData[]>await Utils.downloadJson(url)
|
||||
const allVelopark = await LinkedDataLoader.fetchJsonLd(url, { country: "be" })
|
||||
this.exportTo("velopark_all", allVelopark)
|
||||
|
||||
const bboxBelgium = new BBox([
|
||||
[2.51357303225, 49.5294835476],
|
||||
|
@ -44,15 +47,13 @@ class VeloParkToGeojson extends Script {
|
|||
[],
|
||||
Constants.defaultOverpassUrls[0],
|
||||
new ImmutableStore(60 * 5),
|
||||
false
|
||||
false,
|
||||
)
|
||||
const alreadyLinkedFeatures = await alreadyLinkedQuery.queryGeoJson(bboxBelgium)
|
||||
const seenIds = new Set<string>(
|
||||
alreadyLinkedFeatures[0].features.map((f) => f.properties["ref:velopark"])
|
||||
alreadyLinkedFeatures[0].features.map((f) => f.properties["ref:velopark"]),
|
||||
)
|
||||
console.log("OpenStreetMap contains", seenIds.size, "bicycle parkings with a velopark ref")
|
||||
const allVelopark = data.map((f) => VeloparkLoader.convert(f))
|
||||
this.exportTo("velopark_all", allVelopark)
|
||||
|
||||
const features = allVelopark.filter((f) => !seenIds.has(f.properties["ref:velopark"]))
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue