forked from MapComplete/MapComplete
		
	Add module to fetch data (via a proxy) from the website with jsonld
This commit is contained in:
		
							parent
							
								
									1b06eee15b
								
							
						
					
					
						commit
						352414b29d
					
				
					 17 changed files with 388 additions and 351 deletions
				
			
		|  | @ -148,7 +148,16 @@ export default class ScriptUtils { | |||
|         const data = await ScriptUtils.Download(url, headers) | ||||
|         return JSON.parse(data["content"]) | ||||
|     } | ||||
| 
 | ||||
|     public static async DownloadFetch( | ||||
|         url: string, | ||||
|         headers?: any | ||||
|     ): Promise<{ content: string } | { redirect: string }> { | ||||
|         console.log("Fetching", url) | ||||
|         const req = await fetch(url, {headers}) | ||||
|         const data= await req.text() | ||||
|         console.log("Fetched", url,data) | ||||
|         return {content: data} | ||||
|     } | ||||
|     public static Download( | ||||
|         url: string, | ||||
|         headers?: any | ||||
|  |  | |||
							
								
								
									
										80
									
								
								scripts/importscripts/compareWebsiteData.ts
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								scripts/importscripts/compareWebsiteData.ts
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,80 @@ | |||
| import fs from "fs" | ||||
| // import readline from "readline"
 | ||||
| import Script from "../Script" | ||||
| import LinkedDataLoader from "../../src/Logic/Web/LinkedDataLoader" | ||||
| import UrlValidator from "../../src/UI/InputElement/Validators/UrlValidator" | ||||
| // vite-node scripts/importscripts/compareWebsiteData.ts -- ~/Downloads/ShopsWithWebsiteNodes.csv ~/data/scraped_websites/
 | ||||
| /* | ||||
| class CompareWebsiteData extends Script { | ||||
|     constructor() { | ||||
|         super("Given a csv file with 'id', 'tags' and 'website', attempts to fetch jsonld and compares the attributes. Usage: csv-file datadir") | ||||
|     } | ||||
| 
 | ||||
|     private readonly urlFormatter = new UrlValidator() | ||||
|     async getWithCache(cachedir : string, url: string): Promise<any>{ | ||||
|         const filename=  cachedir+"/"+encodeURIComponent(url) | ||||
|         if(fs.existsSync(filename)){ | ||||
|             return JSON.parse(fs.readFileSync(filename, "utf-8")) | ||||
|         } | ||||
|         const jsonLd = await LinkedDataLoader.fetchJsonLdWithProxy(url) | ||||
|         console.log("Got:", jsonLd) | ||||
|         fs.writeFileSync(filename, JSON.stringify(jsonLd)) | ||||
|         return jsonLd | ||||
|     } | ||||
|     async handleEntry(line: string, cachedir: string, targetfile: string) : Promise<boolean>{ | ||||
|         const id = JSON.parse(line.split(",")[0]) | ||||
|         let tags = line.substring(line.indexOf("{") - 1) | ||||
|         tags = tags.substring(1, tags.length - 1) | ||||
|         tags = tags.replace(/""/g, "\"") | ||||
|         const data = JSON.parse(tags) | ||||
| 
 | ||||
|         const website = data.website //this.urlFormatter.reformat(data.website)
 | ||||
|         if(!website.startsWith("https://stores.delhaize.be")){ | ||||
|             return false | ||||
|         } | ||||
|         console.log(website) | ||||
|         const jsonld = await this.getWithCache(cachedir, website) | ||||
|         console.log(jsonld) | ||||
|         if(Object.keys(jsonld).length === 0){ | ||||
|             return false | ||||
|         } | ||||
|         const diff = LinkedDataLoader.removeDuplicateData(jsonld, data) | ||||
|         fs.appendFileSync(targetfile, id +", "+ JSON.stringify(diff)+"\n") | ||||
|         return true | ||||
|     } | ||||
| 
 | ||||
|     async main(args: string[]): Promise<void> { | ||||
|         if (args.length < 2) { | ||||
|             throw "Not enough arguments" | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
|         const readInterface = readline.createInterface({ | ||||
|             input: fs.createReadStream(args[0]), | ||||
|         }) | ||||
| 
 | ||||
|         let handled = 0 | ||||
|         let diffed = 0 | ||||
|         const targetfile = "diff.csv" | ||||
|         fs.writeFileSync(targetfile, "id, diff-json\n") | ||||
|         for await (const line of readInterface) { | ||||
|             try { | ||||
|                 if(line.startsWith("\"id\"")){ | ||||
|                     continue | ||||
|                 } | ||||
|                 const madeComparison = await this.handleEntry(line, args[1], targetfile) | ||||
|                 handled ++ | ||||
|                 diffed = diffed + (madeComparison ? 1 : 0) | ||||
|                 if(handled % 1000 == 0){ | ||||
|                  //   console.log("Handled ",handled," got ",diffed,"diff results")
 | ||||
|                 } | ||||
|             } catch (e) { | ||||
|                // console.error(e)
 | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| new CompareWebsiteData().run() | ||||
| */ | ||||
							
								
								
									
										0
									
								
								scripts/scrapeOsm.ts
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								scripts/scrapeOsm.ts
									
										
									
									
									
										Normal file
									
								
							|  | @ -15,6 +15,7 @@ class ServerLdScrape extends Script { | |||
|                 mimetype: "application/ld+json", | ||||
|                 async handle(content, searchParams: URLSearchParams) { | ||||
|                     const url = searchParams.get("url") | ||||
|                     console.log("Fetching", url) | ||||
|                     if (cache[url]) { | ||||
|                         return JSON.stringify(cache[url]) | ||||
|                     } | ||||
|  |  | |||
|  | @ -1,39 +1,42 @@ | |||
| import Script from "../Script" | ||||
| import { Utils } from "../../src/Utils" | ||||
| import VeloparkLoader, { VeloparkData } from "../../src/Logic/Web/VeloparkLoader" | ||||
| import fs from "fs" | ||||
| import { Overpass } from "../../src/Logic/Osm/Overpass" | ||||
| import { RegexTag } from "../../src/Logic/Tags/RegexTag" | ||||
| import Constants from "../../src/Models/Constants" | ||||
| import { ImmutableStore } from "../../src/Logic/UIEventSource" | ||||
| import { BBox } from "../../src/Logic/BBox" | ||||
| import LinkedDataLoader from "../../src/Logic/Web/LinkedDataLoader" | ||||
| 
 | ||||
| class VeloParkToGeojson extends Script { | ||||
|     constructor() { | ||||
|         super( | ||||
|             "Downloads the latest Velopark data and converts it to a geojson, which will be saved at the current directory" | ||||
|             "Downloads the latest Velopark data and converts it to a geojson, which will be saved at the current directory", | ||||
|         ) | ||||
|     } | ||||
| 
 | ||||
|     exportTo(filename: string, features) { | ||||
|         fs.writeFileSync( | ||||
|             filename + "_" + new Date().toISOString() + ".geojson", | ||||
|         features = features.slice(0,25) // TODO REMOVE
 | ||||
|            const file = filename + "_" + /*new Date().toISOString() + */".geojson" | ||||
|         fs.writeFileSync(file, | ||||
|             JSON.stringify( | ||||
|                 { | ||||
|                     type: "FeatureCollection", | ||||
|                     "#":"Only 25 features are shown!", // TODO REMOVE
 | ||||
|                     features, | ||||
|                 }, | ||||
|                 null, | ||||
|                 "    " | ||||
|             ) | ||||
|                 "    ", | ||||
|             ), | ||||
|         ) | ||||
|         console.log("Written",file) | ||||
|     } | ||||
| 
 | ||||
|     async main(args: string[]): Promise<void> { | ||||
|         console.log("Downloading velopark data") | ||||
|         // Download data for NIS-code 1000. 1000 means: all of belgium
 | ||||
|         const url = "https://www.velopark.be/api/parkings/1000" | ||||
|         const data = <VeloparkData[]>await Utils.downloadJson(url) | ||||
|         const allVelopark = await LinkedDataLoader.fetchJsonLd(url, { country: "be" }) | ||||
|         this.exportTo("velopark_all", allVelopark) | ||||
| 
 | ||||
|         const bboxBelgium = new BBox([ | ||||
|             [2.51357303225, 49.5294835476], | ||||
|  | @ -44,15 +47,13 @@ class VeloParkToGeojson extends Script { | |||
|             [], | ||||
|             Constants.defaultOverpassUrls[0], | ||||
|             new ImmutableStore(60 * 5), | ||||
|             false | ||||
|             false, | ||||
|         ) | ||||
|         const alreadyLinkedFeatures = await alreadyLinkedQuery.queryGeoJson(bboxBelgium) | ||||
|         const seenIds = new Set<string>( | ||||
|             alreadyLinkedFeatures[0].features.map((f) => f.properties["ref:velopark"]) | ||||
|             alreadyLinkedFeatures[0].features.map((f) => f.properties["ref:velopark"]), | ||||
|         ) | ||||
|         console.log("OpenStreetMap contains", seenIds.size, "bicycle parkings with a velopark ref") | ||||
|         const allVelopark = data.map((f) => VeloparkLoader.convert(f)) | ||||
|         this.exportTo("velopark_all", allVelopark) | ||||
| 
 | ||||
|         const features = allVelopark.filter((f) => !seenIds.has(f.properties["ref:velopark"])) | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue