forked from MapComplete/MapComplete
		
	Create image license analysis script
This commit is contained in:
		
							parent
							
								
									7aea97c68b
								
							
						
					
					
						commit
						13f8bea37a
					
				
					 2 changed files with 215 additions and 0 deletions
				
			
		
							
								
								
									
										18
									
								
								scripts/Script.ts
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								scripts/Script.ts
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,18 @@ | ||||||
|  | import ScriptUtils from "./ScriptUtils" | ||||||
|  | 
 | ||||||
|  | export default abstract class Script { | ||||||
|  |     private readonly _docs: string | ||||||
|  | 
 | ||||||
|  |     constructor(docs: string) { | ||||||
|  |         this._docs = docs | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     abstract main(args: string[]): Promise<void> | ||||||
|  | 
 | ||||||
|  |     public run(): void { | ||||||
|  |         ScriptUtils.fixUtils() | ||||||
|  |         const args = [...process.argv] | ||||||
|  |         args.splice(0, 2) | ||||||
|  |         this.main(args).then((_) => console.log("All done")) | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										197
									
								
								scripts/generateImageAnalysis.ts
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										197
									
								
								scripts/generateImageAnalysis.ts
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,197 @@ | ||||||
|  | import Script from "./Script" | ||||||
|  | import { Overpass } from "../Logic/Osm/Overpass" | ||||||
|  | import { RegexTag } from "../Logic/Tags/RegexTag" | ||||||
|  | import { ImmutableStore } from "../Logic/UIEventSource" | ||||||
|  | import { BBox } from "../Logic/BBox" | ||||||
|  | import * as fs from "fs" | ||||||
|  | import { Feature } from "geojson" | ||||||
|  | import ScriptUtils from "./ScriptUtils" | ||||||
|  | import { Imgur } from "../Logic/ImageProviders/Imgur" | ||||||
|  | import { LicenseInfo } from "../Logic/ImageProviders/LicenseInfo" | ||||||
|  | import { Utils } from "../Utils" | ||||||
|  | 
 | ||||||
|  | export default class GenerateImageAnalysis extends Script { | ||||||
|  |     constructor() { | ||||||
|  |         super( | ||||||
|  |             "Downloads (from overpass) all tags which have an imgur-image; then analyses the licenses" | ||||||
|  |         ) | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     async fetchImages(key: string, datapath: string): Promise<void> { | ||||||
|  |         const targetPath = `${datapath}/features_with_${key.replace(/[:\/]/, "_")}.geojson` | ||||||
|  |         if (fs.existsSync(targetPath)) { | ||||||
|  |             console.log("Skipping", key) | ||||||
|  |             return | ||||||
|  |         } | ||||||
|  |         const tag = new RegexTag("image", /https:\/\/i.imgur.com\/.*/i) | ||||||
|  |         const overpass = new Overpass( | ||||||
|  |             tag, | ||||||
|  |             [], | ||||||
|  |             "https://overpass.kumi.systems/api/interpreter", | ||||||
|  |             new ImmutableStore(180), | ||||||
|  |             undefined, | ||||||
|  |             false | ||||||
|  |         ) | ||||||
|  |         console.log("Starting query...") | ||||||
|  |         const data = await overpass.queryGeoJson(BBox.global) | ||||||
|  |         console.log("Got data: ", data[0].features.length) | ||||||
|  |         fs.writeFileSync(targetPath, JSON.stringify(data[0]), "utf8") | ||||||
|  |         console.log("Written", targetPath) | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     async downloadData(datapath: string): Promise<void> { | ||||||
|  |         if (!fs.existsSync(datapath)) { | ||||||
|  |             fs.mkdirSync(datapath) | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         await this.fetchImages("image", datapath) | ||||||
|  |         for (let i = 0; i < 5; i++) { | ||||||
|  |             await this.fetchImages("image:" + i, datapath) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     loadData(datapath: string): Feature[] { | ||||||
|  |         const allFeatures: Feature[] = [] | ||||||
|  | 
 | ||||||
|  |         const files = ScriptUtils.readDirRecSync(datapath) | ||||||
|  |         for (const file of files) { | ||||||
|  |             if (!file.endsWith(".geojson")) { | ||||||
|  |                 continue | ||||||
|  |             } | ||||||
|  |             const contents = JSON.parse(fs.readFileSync(file, "utf8")) | ||||||
|  |             allFeatures.push(...contents.features) | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         return allFeatures | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     async fetchImageMetadata(datapath: string, image: string): Promise<boolean> { | ||||||
|  |         if (image === undefined) { | ||||||
|  |             return false | ||||||
|  |         } | ||||||
|  |         if (image.endsWith(".png") || image.endsWith(".jpeg")) { | ||||||
|  |             console.log("Skipped invalid image") | ||||||
|  |             return false | ||||||
|  |         } | ||||||
|  |         const targetPath = datapath + "/" + image.replace(/[\/:.\-%]/g, "_") + ".json" | ||||||
|  |         if (fs.existsSync(targetPath)) { | ||||||
|  |             return false | ||||||
|  |         } | ||||||
|  |         const attribution = await Imgur.singleton.DownloadAttribution(image) | ||||||
|  |         await fs.writeFileSync(targetPath, JSON.stringify(attribution, null, "    ")) | ||||||
|  |         return true | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     async downloadMetadata(datapath: string): Promise<void> { | ||||||
|  |         const features = this.loadData(datapath) | ||||||
|  |         let allImages = new Set<string>() | ||||||
|  | 
 | ||||||
|  |         for (const feature of features) { | ||||||
|  |             allImages.add(feature.properties["image"]) | ||||||
|  |             for (let i = 0; i < 10; i++) { | ||||||
|  |                 allImages.add(feature.properties["image:" + i]) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         console.log("Detected", allImages.size, "images") | ||||||
|  |         let i = 0 | ||||||
|  |         let d = 0 | ||||||
|  |         let s = 0 | ||||||
|  |         let f = 0 | ||||||
|  |         let start = Date.now() | ||||||
|  |         for (const image of Array.from(allImages)) { | ||||||
|  |             i++ | ||||||
|  |             try { | ||||||
|  |                 const downloaded = await this.fetchImageMetadata(datapath, image) | ||||||
|  |                 const runningSecs = (Date.now() - start) / 1000 | ||||||
|  |                 const left = allImages.size - i | ||||||
|  | 
 | ||||||
|  |                 const estimatedActualSeconds = Math.floor((left * runningSecs) / (f + d)) | ||||||
|  |                 const estimatedActualMinutes = Math.floor(estimatedActualSeconds / 60) | ||||||
|  | 
 | ||||||
|  |                 const msg = `${i}/${ | ||||||
|  |                     allImages.size | ||||||
|  |                 } downloaded: ${d},skipped: ${s}, failed: ${f}, running: ${runningSecs}sec, ETA: ${estimatedActualMinutes}:${ | ||||||
|  |                     estimatedActualSeconds % 60 | ||||||
|  |                 }` | ||||||
|  |                 console.log(msg) | ||||||
|  |                 if (downloaded) { | ||||||
|  |                     d++ | ||||||
|  |                 } else { | ||||||
|  |                     s++ | ||||||
|  |                 } | ||||||
|  |                 if (d + f == 75000) { | ||||||
|  |                     console.log("Used 75000 API calls, leaving 5000 for the rest of the day...") | ||||||
|  |                 } | ||||||
|  |             } catch (e) { | ||||||
|  |                 console.log(e) | ||||||
|  |                 f++ | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     analyze(datapath: string) { | ||||||
|  |         const files = ScriptUtils.readDirRecSync(datapath) | ||||||
|  |         const byAuthor = new Map<string, string[]>() | ||||||
|  |         const byLicense = new Map<string, string[]>() | ||||||
|  |         const licenseByAuthor = new Map<string, Set<string>>() | ||||||
|  |         for (const file of files) { | ||||||
|  |             if (!file.endsWith(".json")) { | ||||||
|  |                 continue | ||||||
|  |             } | ||||||
|  |             const attr = <LicenseInfo>JSON.parse(fs.readFileSync(file, "UTF8")) | ||||||
|  | 
 | ||||||
|  |             if (byAuthor.get(attr.artist) === undefined) { | ||||||
|  |                 byAuthor.set(attr.artist, []) | ||||||
|  |             } | ||||||
|  |             byAuthor.get(attr.artist).push(file) | ||||||
|  | 
 | ||||||
|  |             const license = attr.licenseShortName | ||||||
|  |             if (byLicense.get(license) === undefined) { | ||||||
|  |                 byLicense.set(license, []) | ||||||
|  |             } | ||||||
|  |             byLicense.get(license).push(file) | ||||||
|  | 
 | ||||||
|  |             if (licenseByAuthor.get(license) === undefined) { | ||||||
|  |                 licenseByAuthor.set(license, new Set<string>()) | ||||||
|  |             } | ||||||
|  |             licenseByAuthor.get(license).add(attr.artist) | ||||||
|  |         } | ||||||
|  |         byAuthor.delete(undefined) | ||||||
|  |         byLicense.delete(undefined) | ||||||
|  |         licenseByAuthor.delete(undefined) | ||||||
|  | 
 | ||||||
|  |         const byLicenseCount = Utils.MapToObj(byLicense, (a) => a.length) | ||||||
|  |         const byAuthorCount = Utils.MapToObj(byAuthor, (a) => a.length) | ||||||
|  |         const licenseByAuthorCount = Utils.MapToObj(licenseByAuthor, (a) => a.size) | ||||||
|  |         console.log(byAuthorCount) | ||||||
|  |         console.log(byLicenseCount) | ||||||
|  |         console.log(licenseByAuthorCount) | ||||||
|  | 
 | ||||||
|  |         const totalAuthors = byAuthor.size | ||||||
|  |         let totalLicensedImages = 0 | ||||||
|  |         for (const license in byLicenseCount) { | ||||||
|  |             totalLicensedImages += byLicenseCount[license] | ||||||
|  |         } | ||||||
|  |         for (const license in byLicenseCount) { | ||||||
|  |             const total = byLicenseCount[license] | ||||||
|  |             const authors = licenseByAuthorCount[license] | ||||||
|  |             console.log( | ||||||
|  |                 `License ${license}: ${total} total pictures (${ | ||||||
|  |                     Math.floor((1000 * total) / totalLicensedImages) / 10 | ||||||
|  |                 }%), ${authors} authors (${ | ||||||
|  |                     Math.floor((1000 * authors) / totalAuthors) / 10 | ||||||
|  |                 }%), ${Math.floor(total / authors)} images/author` | ||||||
|  |             ) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     async main(args: string[]): Promise<void> { | ||||||
|  |         const datapath = args[0] ?? "../MapComplete-data/ImageLicenseInfo" | ||||||
|  |         await this.downloadData(datapath) | ||||||
|  | 
 | ||||||
|  |         //   await this.downloadMetadata(datapath)
 | ||||||
|  |         this.analyze(datapath) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | new GenerateImageAnalysis().run() | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue