| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  | import Script from "./Script" | 
					
						
							| 
									
										
										
										
											2023-07-27 01:48:54 +02:00
										 |  |  | import { Overpass } from "../src/Logic/Osm/Overpass" | 
					
						
							|  |  |  | import { RegexTag } from "../src/Logic/Tags/RegexTag" | 
					
						
							|  |  |  | import { ImmutableStore } from "../src/Logic/UIEventSource" | 
					
						
							|  |  |  | import { BBox } from "../src/Logic/BBox" | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  | import * as fs from "fs" | 
					
						
							| 
									
										
										
										
											2023-12-06 17:27:30 +01:00
										 |  |  | import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs" | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  | import { Feature } from "geojson" | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  | import ScriptUtils from "./ScriptUtils" | 
					
						
							| 
									
										
										
										
											2023-07-27 01:48:54 +02:00
										 |  |  | import { Imgur } from "../src/Logic/ImageProviders/Imgur" | 
					
						
							|  |  |  | import { LicenseInfo } from "../src/Logic/ImageProviders/LicenseInfo" | 
					
						
							|  |  |  | import { Utils } from "../src/Utils" | 
					
						
							|  |  |  | import Constants from "../src/Models/Constants" | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | export default class GenerateImageAnalysis extends Script { | 
					
						
							| 
									
										
										
										
											2024-01-15 15:22:57 +01:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Max N in `image:N`-keys and `imageN` keys | 
					
						
							|  |  |  |      * @private | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     private static readonly maxImageIndex = 31 | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |     constructor() { | 
					
						
							|  |  |  |         super( | 
					
						
							| 
									
										
										
										
											2023-11-02 17:55:01 +01:00
										 |  |  |             [ | 
					
						
							|  |  |  |                 "Downloads (from overpass) all tags which have an imgur-image; then analyses the licenses and downloads all the images", | 
					
						
							|  |  |  |                 "", | 
					
						
							|  |  |  |                 "Arguments:", | 
					
						
							|  |  |  |                 "Path to download the images to", | 
					
						
							|  |  |  |                 "Path to save the overview to", | 
					
						
							|  |  |  |             ].join("\n") | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         ) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |     async fetchImages(key: string, datapath: string, refresh: boolean): Promise<void> { | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         const targetPath = `${datapath}/features_with_${key.replace(/[:\/]/, "_")}.geojson` | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |         if (fs.existsSync(targetPath) && !refresh) { | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |             console.log("Skipping", key) | 
					
						
							|  |  |  |             return | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-01-10 02:52:09 +01:00
										 |  |  |         const tag = new RegexTag(key, /^https:\/\/i.imgur.com\/.*$/i) | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         const overpass = new Overpass( | 
					
						
							|  |  |  |             tag, | 
					
						
							|  |  |  |             [], | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |             Constants.defaultOverpassUrls[0], //"https://overpass.kumi.systems/api/interpreter",
 | 
					
						
							| 
									
										
										
										
											2023-04-20 23:47:51 +02:00
										 |  |  |             new ImmutableStore(500), | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |             false | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         console.log("Starting query...") | 
					
						
							|  |  |  |         const data = await overpass.queryGeoJson(BBox.global) | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |         console.log( | 
					
						
							|  |  |  |             "Got data:", | 
					
						
							|  |  |  |             data[0].features.length, | 
					
						
							|  |  |  |             "items; timestamp:", | 
					
						
							|  |  |  |             data[1].toISOString() | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         fs.writeFileSync(targetPath, JSON.stringify(data[0]), "utf8") | 
					
						
							|  |  |  |         console.log("Written", targetPath) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |     async downloadData(datapath: string, refresh: boolean): Promise<void> { | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         if (!fs.existsSync(datapath)) { | 
					
						
							|  |  |  |             fs.mkdirSync(datapath) | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |         await this.fetchImages("image", datapath, refresh) | 
					
						
							|  |  |  |         await this.fetchImages("image:streetsign", datapath, refresh) | 
					
						
							| 
									
										
										
										
											2024-07-29 01:33:32 +02:00
										 |  |  |         await this.fetchImages("image:menu", datapath, refresh) | 
					
						
							| 
									
										
										
										
											2024-01-15 15:22:57 +01:00
										 |  |  |         for (let i = 0; i < GenerateImageAnalysis.maxImageIndex; i++) { | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |             await this.fetchImages("image:" + i, datapath, refresh) | 
					
						
							| 
									
										
										
										
											2024-01-15 15:22:57 +01:00
										 |  |  |             await this.fetchImages("image" + i, datapath, refresh) | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     loadData(datapath: string): Feature[] { | 
					
						
							|  |  |  |         const allFeatures: Feature[] = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         const files = ScriptUtils.readDirRecSync(datapath) | 
					
						
							|  |  |  |         for (const file of files) { | 
					
						
							|  |  |  |             if (!file.endsWith(".geojson")) { | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             const contents = JSON.parse(fs.readFileSync(file, "utf8")) | 
					
						
							|  |  |  |             allFeatures.push(...contents.features) | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return allFeatures | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     async fetchImageMetadata(datapath: string, image: string): Promise<boolean> { | 
					
						
							|  |  |  |         if (image === undefined) { | 
					
						
							|  |  |  |             return false | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-01-10 02:52:09 +01:00
										 |  |  |         if (!image.match(/https:\/\/i\.imgur\.com\/[a-zA-Z0-9]+\.jpg/)) { | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |             return false | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |         const filename = image.replace(/[\/:.\-%]/g, "_") + ".json" | 
					
						
							|  |  |  |         const targetPath = datapath + "/" + filename | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         if (fs.existsSync(targetPath)) { | 
					
						
							|  |  |  |             return false | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2024-04-13 02:40:21 +02:00
										 |  |  |         const attribution = await Imgur.singleton.DownloadAttribution({ url: image }) | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if ((attribution.artist ?? "") === "") { | 
					
						
							|  |  |  |             // This is an invalid attribution. We save the raw response as well
 | 
					
						
							|  |  |  |             const hash = image.substr("https://i.imgur.com/".length).split(".jpg")[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             const apiUrl = "https://api.imgur.com/3/image/" + hash | 
					
						
							|  |  |  |             const response = await Utils.downloadJsonCached(apiUrl, 365 * 24 * 60 * 60, { | 
					
						
							|  |  |  |                 Authorization: "Client-ID " + Constants.ImgurApiKey, | 
					
						
							|  |  |  |             }) | 
					
						
							|  |  |  |             const rawTarget = datapath + "/raw/" + filename | 
					
						
							|  |  |  |             console.log("Also storing the raw response to", rawTarget) | 
					
						
							|  |  |  |             await fs.writeFileSync(rawTarget, JSON.stringify(response, null, "    ")) | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         await fs.writeFileSync(targetPath, JSON.stringify(attribution, null, "    ")) | 
					
						
							|  |  |  |         return true | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |     loadImageUrls(datapath: string): { allImages: Set<string>; imageSource: Map<string, string> } { | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         let allImages = new Set<string>() | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |         const features = this.loadData(datapath) | 
					
						
							|  |  |  |         let imageSource: Map<string, string> = new Map<string, string>() | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         for (const feature of features) { | 
					
						
							|  |  |  |             allImages.add(feature.properties["image"]) | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |             imageSource[feature.properties["image"]] = feature.properties.id | 
					
						
							|  |  |  |             allImages.add(feature.properties["image:streetsign"]) | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |             imageSource[feature.properties["image:streetsign"]] = | 
					
						
							|  |  |  |                 feature.properties.id + " (streetsign)" | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-15 15:22:57 +01:00
										 |  |  |             for (let i = 0; i < GenerateImageAnalysis.maxImageIndex; i++) { | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |                 allImages.add(feature.properties["image:" + i]) | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |                 imageSource[ | 
					
						
							|  |  |  |                     feature.properties["image:" + i] | 
					
						
							|  |  |  |                 ] = `${feature.properties.id} (image:${i})` | 
					
						
							| 
									
										
										
										
											2024-01-15 15:22:57 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 allImages.add(feature.properties["image" + i]) | 
					
						
							|  |  |  |                 imageSource[ | 
					
						
							|  |  |  |                     feature.properties["image" + i] | 
					
						
							|  |  |  |                 ] = `${feature.properties.id} (image${i})` | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |         allImages.delete(undefined) | 
					
						
							|  |  |  |         allImages.delete(null) | 
					
						
							|  |  |  |         imageSource.delete(undefined) | 
					
						
							|  |  |  |         imageSource.delete(null) | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |         return { allImages, imageSource } | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     async downloadMetadata(datapath: string): Promise<void> { | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |         const { allImages, imageSource } = this.loadImageUrls(datapath) | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         console.log("Detected", allImages.size, "images") | 
					
						
							|  |  |  |         let i = 0 | 
					
						
							|  |  |  |         let d = 0 | 
					
						
							|  |  |  |         let s = 0 | 
					
						
							|  |  |  |         let f = 0 | 
					
						
							|  |  |  |         let start = Date.now() | 
					
						
							|  |  |  |         for (const image of Array.from(allImages)) { | 
					
						
							|  |  |  |             i++ | 
					
						
							|  |  |  |             try { | 
					
						
							|  |  |  |                 const downloaded = await this.fetchImageMetadata(datapath, image) | 
					
						
							|  |  |  |                 const runningSecs = (Date.now() - start) / 1000 | 
					
						
							|  |  |  |                 const left = allImages.size - i | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 const estimatedActualSeconds = Math.floor((left * runningSecs) / (f + d)) | 
					
						
							|  |  |  |                 const estimatedActualMinutes = Math.floor(estimatedActualSeconds / 60) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 const msg = `${i}/${ | 
					
						
							|  |  |  |                     allImages.size | 
					
						
							| 
									
										
										
										
											2023-05-07 23:50:39 +02:00
										 |  |  |                 } downloaded: ${d},skipped: ${s}, failed: ${f}, running: ${Math.floor( | 
					
						
							|  |  |  |                     runningSecs | 
					
						
							|  |  |  |                 )}sec, ETA: ${estimatedActualMinutes}:${estimatedActualSeconds % 60}`
 | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |                 if (d + (f % 1000) === 1 || downloaded) { | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |                     ScriptUtils.erasableLog(msg) | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |                 if (downloaded) { | 
					
						
							|  |  |  |                     d++ | 
					
						
							|  |  |  |                 } else { | 
					
						
							|  |  |  |                     s++ | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 if (d + f == 75000) { | 
					
						
							|  |  |  |                     console.log("Used 75000 API calls, leaving 5000 for the rest of the day...") | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |                     break | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |                 } | 
					
						
							|  |  |  |             } catch (e) { | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |                 // console.log(e)
 | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |                 console.log( | 
					
						
							|  |  |  |                     "Offending image hash is", | 
					
						
							|  |  |  |                     image, | 
					
						
							|  |  |  |                     "from https://openstreetmap.org/" + imageSource[image] | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |                 f++ | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-06 17:27:30 +01:00
										 |  |  |     async downloadViews(datapath: string): Promise<void> { | 
					
						
							|  |  |  |         const { allImages, imageSource } = this.loadImageUrls(datapath) | 
					
						
							|  |  |  |         console.log("Detected", allImages.size, "images") | 
					
						
							|  |  |  |         const results: [string, number][] = [] | 
					
						
							|  |  |  |         const today = new Date().toISOString().substring(0, "YYYY-MM-DD".length) | 
					
						
							|  |  |  |         const viewDir = datapath + "/views_" + today | 
					
						
							|  |  |  |         if (!existsSync(viewDir)) { | 
					
						
							|  |  |  |             mkdirSync(viewDir) | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-12-07 21:57:20 +01:00
										 |  |  |         const targetpath = datapath + "/views.csv" | 
					
						
							| 
									
										
										
										
											2023-12-06 17:27:30 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 21:57:20 +01:00
										 |  |  |         const total = allImages.size | 
					
						
							|  |  |  |         let dloaded = 0 | 
					
						
							|  |  |  |         let skipped = 0 | 
					
						
							|  |  |  |         let err = 0 | 
					
						
							| 
									
										
										
										
											2023-12-06 17:27:30 +01:00
										 |  |  |         for (const image of Array.from(allImages)) { | 
					
						
							| 
									
										
										
										
											2023-12-07 21:57:20 +01:00
										 |  |  |             const cachedView = viewDir + "/" + image.replace(/\//g, "_") | 
					
						
							| 
									
										
										
										
											2023-12-06 17:27:30 +01:00
										 |  |  |             let attribution: LicenseInfo | 
					
						
							|  |  |  |             if (existsSync(cachedView)) { | 
					
						
							|  |  |  |                 attribution = JSON.parse(readFileSync(cachedView, "utf8")) | 
					
						
							| 
									
										
										
										
											2023-12-07 21:57:20 +01:00
										 |  |  |                 skipped++ | 
					
						
							| 
									
										
										
										
											2023-12-06 17:27:30 +01:00
										 |  |  |             } else { | 
					
						
							| 
									
										
										
										
											2023-12-07 21:57:20 +01:00
										 |  |  |                 try { | 
					
						
							| 
									
										
										
										
											2024-04-01 02:00:48 +02:00
										 |  |  |                     attribution = await Imgur.singleton.DownloadAttribution({ url: image }) | 
					
						
							| 
									
										
										
										
											2023-12-07 21:57:20 +01:00
										 |  |  |                     await ScriptUtils.sleep(500) | 
					
						
							|  |  |  |                     writeFileSync(cachedView, JSON.stringify(attribution)) | 
					
						
							|  |  |  |                     dloaded++ | 
					
						
							|  |  |  |                 } catch (e) { | 
					
						
							|  |  |  |                     err++ | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2023-12-06 17:27:30 +01:00
										 |  |  |             } | 
					
						
							|  |  |  |             results.push([image, attribution.views]) | 
					
						
							| 
									
										
										
										
											2023-12-07 21:57:20 +01:00
										 |  |  |             if (dloaded % 50 === 0) { | 
					
						
							|  |  |  |                 console.log({ | 
					
						
							|  |  |  |                     dloaded, | 
					
						
							|  |  |  |                     skipped, | 
					
						
							|  |  |  |                     total, | 
					
						
							|  |  |  |                     err, | 
					
						
							|  |  |  |                     progress: Math.round(dloaded + skipped + err), | 
					
						
							|  |  |  |                 }) | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if ((dloaded + skipped + err) % 100 === 0) { | 
					
						
							|  |  |  |                 console.log("Writing views to", targetpath) | 
					
						
							|  |  |  |                 fs.writeFileSync(targetpath, results.map((r) => r.join(",")).join("\n")) | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2023-12-06 17:27:30 +01:00
										 |  |  |         } | 
					
						
							|  |  |  |         console.log("Writing views to", targetpath) | 
					
						
							|  |  |  |         fs.writeFileSync(targetpath, results.map((r) => r.join(",")).join("\n")) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |     async downloadImage(url: string, imagePath: string): Promise<boolean> { | 
					
						
							|  |  |  |         const filenameLong = url.replace(/[\/:.\-%]/g, "_") + ".jpg" | 
					
						
							|  |  |  |         const targetPathLong = imagePath + "/" + filenameLong | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         const filename = url.substring("https://i.imgur.com/".length) | 
					
						
							|  |  |  |         const targetPath = imagePath + "/" + filename | 
					
						
							|  |  |  |         if (fs.existsSync(targetPathLong)) { | 
					
						
							|  |  |  |             if (fs.existsSync(targetPath)) { | 
					
						
							|  |  |  |                 fs.unlinkSync(targetPathLong) | 
					
						
							|  |  |  |                 console.log("Unlinking duplicate") | 
					
						
							|  |  |  |                 return false | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             console.log("Renaming...") | 
					
						
							|  |  |  |             fs.renameSync(targetPathLong, targetPath) | 
					
						
							|  |  |  |             return false | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if (fs.existsSync(targetPath)) { | 
					
						
							|  |  |  |             return false | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         await ScriptUtils.DownloadFileTo(url, targetPath) | 
					
						
							|  |  |  |         return true | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     async downloadAllImages(datapath: string, imagePath: string): Promise<void> { | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |         const { allImages } = this.loadImageUrls(datapath) | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |         let skipped = 0 | 
					
						
							|  |  |  |         let failed = 0 | 
					
						
							|  |  |  |         let downloaded = 0 | 
					
						
							|  |  |  |         let invalid = 0 | 
					
						
							|  |  |  |         const startTime = Date.now() | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |         const urls = Array.from(allImages).filter((url) => url.startsWith("https://i.imgur.com")) | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |         for (const url of urls) { | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |             const runningTime = (Date.now() - startTime) / 1000 | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |             const handled = skipped + downloaded + failed | 
					
						
							|  |  |  |             const itemsLeft = allImages.size - handled | 
					
						
							|  |  |  |             const speed = handled / runningTime | 
					
						
							|  |  |  |             const timeLeft = Math.round(itemsLeft * speed) | 
					
						
							|  |  |  |             try { | 
					
						
							| 
									
										
										
										
											2023-06-04 22:14:23 +02:00
										 |  |  |                 const urls = url.split(/[;,]/) | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |                 const downloadedStatus = await Promise.all( | 
					
						
							| 
									
										
										
										
											2023-06-04 22:14:23 +02:00
										 |  |  |                     urls.map((url) => this.downloadImage(url.trim(), imagePath)) | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 for (const b of downloadedStatus) { | 
					
						
							|  |  |  |                     if (b) { | 
					
						
							|  |  |  |                         downloaded += 1 | 
					
						
							|  |  |  |                     } else { | 
					
						
							|  |  |  |                         skipped += 1 | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |                 if (downloadedStatus.some((i) => i) || skipped % 10000 === 0) { | 
					
						
							|  |  |  |                     console.log( | 
					
						
							|  |  |  |                         "Handled", | 
					
						
							|  |  |  |                         url, | 
					
						
							|  |  |  |                         JSON.stringify({ | 
					
						
							|  |  |  |                             skipped, | 
					
						
							|  |  |  |                             failed, | 
					
						
							|  |  |  |                             downloaded, | 
					
						
							|  |  |  |                             invalid, | 
					
						
							|  |  |  |                             total: allImages.size, | 
					
						
							|  |  |  |                             eta: timeLeft + "s", | 
					
						
							|  |  |  |                         }) | 
					
						
							|  |  |  |                     ) | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |                 } | 
					
						
							|  |  |  |             } catch (e) { | 
					
						
							|  |  |  |                 console.log(e) | 
					
						
							|  |  |  |                 failed++ | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |     analyze(datapath: string) { | 
					
						
							|  |  |  |         const files = ScriptUtils.readDirRecSync(datapath) | 
					
						
							|  |  |  |         const byAuthor = new Map<string, string[]>() | 
					
						
							|  |  |  |         const byLicense = new Map<string, string[]>() | 
					
						
							|  |  |  |         const licenseByAuthor = new Map<string, Set<string>>() | 
					
						
							|  |  |  |         for (const file of files) { | 
					
						
							|  |  |  |             if (!file.endsWith(".json")) { | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |             const attr = <LicenseInfo>JSON.parse(fs.readFileSync(file, { encoding: "utf8" })) | 
					
						
							| 
									
										
										
										
											2023-01-10 02:52:09 +01:00
										 |  |  |             const license = attr.licenseShortName | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-10 02:52:09 +01:00
										 |  |  |             if (license === undefined || attr.artist === undefined) { | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |             if (byAuthor.get(attr.artist) === undefined) { | 
					
						
							|  |  |  |                 byAuthor.set(attr.artist, []) | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             byAuthor.get(attr.artist).push(file) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if (byLicense.get(license) === undefined) { | 
					
						
							|  |  |  |                 byLicense.set(license, []) | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             byLicense.get(license).push(file) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if (licenseByAuthor.get(license) === undefined) { | 
					
						
							|  |  |  |                 licenseByAuthor.set(license, new Set<string>()) | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             licenseByAuthor.get(license).add(attr.artist) | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         byAuthor.delete(undefined) | 
					
						
							|  |  |  |         byLicense.delete(undefined) | 
					
						
							|  |  |  |         licenseByAuthor.delete(undefined) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         const byLicenseCount = Utils.MapToObj(byLicense, (a) => a.length) | 
					
						
							|  |  |  |         const byAuthorCount = Utils.MapToObj(byAuthor, (a) => a.length) | 
					
						
							|  |  |  |         const licenseByAuthorCount = Utils.MapToObj(licenseByAuthor, (a) => a.size) | 
					
						
							| 
									
										
										
										
											2023-01-10 02:52:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         const countsPerAuthor: number[] = Array.from(Object.keys(byAuthorCount)).map( | 
					
						
							|  |  |  |             (k) => byAuthorCount[k] | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         console.log(countsPerAuthor) | 
					
						
							|  |  |  |         countsPerAuthor.sort() | 
					
						
							|  |  |  |         const median = countsPerAuthor[Math.floor(countsPerAuthor.length / 2)] | 
					
						
							| 
									
										
										
										
											2023-10-30 13:44:27 +01:00
										 |  |  |         const json: { | 
					
						
							|  |  |  |             leaderboard: { rank: number; account: string; name: string; nrOfImages: number }[] | 
					
						
							|  |  |  |         } = { | 
					
						
							|  |  |  |             leaderboard: [], | 
					
						
							| 
									
										
										
										
											2023-10-17 13:31:06 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-01-10 02:52:09 +01:00
										 |  |  |         for (let i = 0; i < 100; i++) { | 
					
						
							|  |  |  |             let maxAuthor: string = undefined | 
					
						
							|  |  |  |             let maxCount = 0 | 
					
						
							|  |  |  |             for (const author in byAuthorCount) { | 
					
						
							|  |  |  |                 const count = byAuthorCount[author] | 
					
						
							|  |  |  |                 if (maxAuthor === undefined || count > maxCount) { | 
					
						
							|  |  |  |                     maxAuthor = author | 
					
						
							|  |  |  |                     maxCount = count | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2023-10-17 13:31:06 +02:00
										 |  |  |             json.leaderboard.push({ | 
					
						
							| 
									
										
										
										
											2023-10-30 13:44:27 +01:00
										 |  |  |                 rank: i + 1, | 
					
						
							| 
									
										
										
										
											2023-10-17 13:31:06 +02:00
										 |  |  |                 name: maxAuthor, | 
					
						
							| 
									
										
										
										
											2023-10-30 13:44:27 +01:00
										 |  |  |                 account: "https://openstreetmap.org/user/" + maxAuthor.replace(/ /g, "%20"), | 
					
						
							|  |  |  |                 nrOfImages: maxCount, | 
					
						
							| 
									
										
										
										
											2023-10-17 13:31:06 +02:00
										 |  |  |             }) | 
					
						
							| 
									
										
										
										
											2023-01-10 02:52:09 +01:00
										 |  |  |             console.log( | 
					
						
							|  |  |  |                 "|", | 
					
						
							|  |  |  |                 i + 1, | 
					
						
							|  |  |  |                 "|", | 
					
						
							|  |  |  |                 `[${maxAuthor}](https://openstreetmap.org/user/${maxAuthor.replace(/ /g, "%20")})`, | 
					
						
							|  |  |  |                 "|", | 
					
						
							|  |  |  |                 maxCount, | 
					
						
							|  |  |  |                 "|" | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             delete byAuthorCount[maxAuthor] | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         const totalAuthors = byAuthor.size | 
					
						
							|  |  |  |         let totalLicensedImages = 0 | 
					
						
							| 
									
										
										
										
											2023-10-17 13:31:06 +02:00
										 |  |  |         json["totalAuthors"] = totalAuthors | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         for (const license in byLicenseCount) { | 
					
						
							|  |  |  |             totalLicensedImages += byLicenseCount[license] | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-10-17 13:31:06 +02:00
										 |  |  |         json["byLicense"] = {} | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         for (const license in byLicenseCount) { | 
					
						
							|  |  |  |             const total = byLicenseCount[license] | 
					
						
							|  |  |  |             const authors = licenseByAuthorCount[license] | 
					
						
							|  |  |  |             console.log( | 
					
						
							|  |  |  |                 `License ${license}: ${total} total pictures (${ | 
					
						
							|  |  |  |                     Math.floor((1000 * total) / totalLicensedImages) / 10 | 
					
						
							|  |  |  |                 }%), ${authors} authors (${ | 
					
						
							|  |  |  |                     Math.floor((1000 * authors) / totalAuthors) / 10 | 
					
						
							|  |  |  |                 }%), ${Math.floor(total / authors)} images/author`
 | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2023-10-17 13:31:06 +02:00
										 |  |  |             json["byLicense"] = { | 
					
						
							| 
									
										
										
										
											2023-10-30 13:44:27 +01:00
										 |  |  |                 license, | 
					
						
							|  |  |  |                 total, | 
					
						
							|  |  |  |                 authors, | 
					
						
							| 
									
										
										
										
											2023-10-17 13:31:06 +02:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-01-10 02:52:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         const nonDefaultAuthors = [ | 
					
						
							|  |  |  |             ...Array.from(licenseByAuthor.get("CC-BY 4.0").values()), | 
					
						
							|  |  |  |             ...Array.from(licenseByAuthor.get("CC-BY-SA 4.0").values()), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |         console.log( | 
					
						
							|  |  |  |             "Total number of correctly licenses pictures: ", | 
					
						
							|  |  |  |             totalLicensedImages, | 
					
						
							|  |  |  |             "(out of ", | 
					
						
							|  |  |  |             files.length, | 
					
						
							|  |  |  |             " images)" | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-01-10 02:52:09 +01:00
										 |  |  |         console.log("Total number of authors:", byAuthor.size) | 
					
						
							|  |  |  |         console.log( | 
					
						
							|  |  |  |             "Total number of authors which used a valid, non CC0 license at one point in time", | 
					
						
							|  |  |  |             nonDefaultAuthors.length | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         console.log("Median contributions per author:", median) | 
					
						
							| 
									
										
										
										
											2023-10-17 13:31:06 +02:00
										 |  |  |         json["median"] = median | 
					
						
							|  |  |  |         json["date"] = new Date().toISOString() | 
					
						
							| 
									
										
										
										
											2023-10-30 13:44:27 +01:00
										 |  |  |         writeFileSync( | 
					
						
							|  |  |  |             "../../git/MapComplete-data/picture-leaderboard.json", | 
					
						
							|  |  |  |             JSON.stringify(json), | 
					
						
							|  |  |  |             "utf8" | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     async main(args: string[]): Promise<void> { | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |         console.log("Usage: [--cached] to use the cached osm data") | 
					
						
							|  |  |  |         console.log("Args are", args) | 
					
						
							|  |  |  |         const cached = args.indexOf("--cached") < 0 | 
					
						
							| 
									
										
										
										
											2023-06-01 14:32:45 +02:00
										 |  |  |         args = args.filter((a) => a !== "--cached") | 
					
						
							| 
									
										
										
										
											2023-11-02 17:55:01 +01:00
										 |  |  |         const datapath = args[1] ?? "../../git/MapComplete-data/ImageLicenseInfo" | 
					
						
							|  |  |  |         const imageBackupPath = args[0] | 
					
						
							| 
									
										
										
										
											2024-04-13 02:40:21 +02:00
										 |  |  |         if (imageBackupPath === "" || imageBackupPath === undefined) { | 
					
						
							| 
									
										
										
										
											2024-02-20 16:13:38 +01:00
										 |  |  |             throw "No imageBackup path specified" | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-05-18 13:07:14 +02:00
										 |  |  |         await this.downloadData(datapath, cached) | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-11 02:21:53 +01:00
										 |  |  |         // await this.downloadViews(datapath)
 | 
					
						
							| 
									
										
										
										
											2023-12-07 21:57:20 +01:00
										 |  |  |         await this.downloadMetadata(datapath) | 
					
						
							| 
									
										
										
										
											2023-11-02 17:55:01 +01:00
										 |  |  |         await this.downloadAllImages(datapath, imageBackupPath) | 
					
						
							| 
									
										
										
										
											2023-01-09 20:30:13 +01:00
										 |  |  |         this.analyze(datapath) | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | new GenerateImageAnalysis().run() |