forked from MapComplete/MapComplete
Create image license analysis script
This commit is contained in:
parent
7aea97c68b
commit
13f8bea37a
2 changed files with 215 additions and 0 deletions
18
scripts/Script.ts
Normal file
18
scripts/Script.ts
Normal file
|
@ -0,0 +1,18 @@
|
|||
import ScriptUtils from "./ScriptUtils"
|
||||
|
||||
export default abstract class Script {
|
||||
private readonly _docs: string
|
||||
|
||||
constructor(docs: string) {
|
||||
this._docs = docs
|
||||
}
|
||||
|
||||
abstract main(args: string[]): Promise<void>
|
||||
|
||||
public run(): void {
|
||||
ScriptUtils.fixUtils()
|
||||
const args = [...process.argv]
|
||||
args.splice(0, 2)
|
||||
this.main(args).then((_) => console.log("All done"))
|
||||
}
|
||||
}
|
197
scripts/generateImageAnalysis.ts
Normal file
197
scripts/generateImageAnalysis.ts
Normal file
|
@ -0,0 +1,197 @@
|
|||
import Script from "./Script"
|
||||
import { Overpass } from "../Logic/Osm/Overpass"
|
||||
import { RegexTag } from "../Logic/Tags/RegexTag"
|
||||
import { ImmutableStore } from "../Logic/UIEventSource"
|
||||
import { BBox } from "../Logic/BBox"
|
||||
import * as fs from "fs"
|
||||
import { Feature } from "geojson"
|
||||
import ScriptUtils from "./ScriptUtils"
|
||||
import { Imgur } from "../Logic/ImageProviders/Imgur"
|
||||
import { LicenseInfo } from "../Logic/ImageProviders/LicenseInfo"
|
||||
import { Utils } from "../Utils"
|
||||
|
||||
export default class GenerateImageAnalysis extends Script {
|
||||
constructor() {
|
||||
super(
|
||||
"Downloads (from overpass) all tags which have an imgur-image; then analyses the licenses"
|
||||
)
|
||||
}
|
||||
|
||||
async fetchImages(key: string, datapath: string): Promise<void> {
|
||||
const targetPath = `${datapath}/features_with_${key.replace(/[:\/]/, "_")}.geojson`
|
||||
if (fs.existsSync(targetPath)) {
|
||||
console.log("Skipping", key)
|
||||
return
|
||||
}
|
||||
const tag = new RegexTag("image", /https:\/\/i.imgur.com\/.*/i)
|
||||
const overpass = new Overpass(
|
||||
tag,
|
||||
[],
|
||||
"https://overpass.kumi.systems/api/interpreter",
|
||||
new ImmutableStore(180),
|
||||
undefined,
|
||||
false
|
||||
)
|
||||
console.log("Starting query...")
|
||||
const data = await overpass.queryGeoJson(BBox.global)
|
||||
console.log("Got data: ", data[0].features.length)
|
||||
fs.writeFileSync(targetPath, JSON.stringify(data[0]), "utf8")
|
||||
console.log("Written", targetPath)
|
||||
}
|
||||
|
||||
async downloadData(datapath: string): Promise<void> {
|
||||
if (!fs.existsSync(datapath)) {
|
||||
fs.mkdirSync(datapath)
|
||||
}
|
||||
|
||||
await this.fetchImages("image", datapath)
|
||||
for (let i = 0; i < 5; i++) {
|
||||
await this.fetchImages("image:" + i, datapath)
|
||||
}
|
||||
}
|
||||
|
||||
loadData(datapath: string): Feature[] {
|
||||
const allFeatures: Feature[] = []
|
||||
|
||||
const files = ScriptUtils.readDirRecSync(datapath)
|
||||
for (const file of files) {
|
||||
if (!file.endsWith(".geojson")) {
|
||||
continue
|
||||
}
|
||||
const contents = JSON.parse(fs.readFileSync(file, "utf8"))
|
||||
allFeatures.push(...contents.features)
|
||||
}
|
||||
|
||||
return allFeatures
|
||||
}
|
||||
|
||||
async fetchImageMetadata(datapath: string, image: string): Promise<boolean> {
|
||||
if (image === undefined) {
|
||||
return false
|
||||
}
|
||||
if (image.endsWith(".png") || image.endsWith(".jpeg")) {
|
||||
console.log("Skipped invalid image")
|
||||
return false
|
||||
}
|
||||
const targetPath = datapath + "/" + image.replace(/[\/:.\-%]/g, "_") + ".json"
|
||||
if (fs.existsSync(targetPath)) {
|
||||
return false
|
||||
}
|
||||
const attribution = await Imgur.singleton.DownloadAttribution(image)
|
||||
await fs.writeFileSync(targetPath, JSON.stringify(attribution, null, " "))
|
||||
return true
|
||||
}
|
||||
|
||||
async downloadMetadata(datapath: string): Promise<void> {
|
||||
const features = this.loadData(datapath)
|
||||
let allImages = new Set<string>()
|
||||
|
||||
for (const feature of features) {
|
||||
allImages.add(feature.properties["image"])
|
||||
for (let i = 0; i < 10; i++) {
|
||||
allImages.add(feature.properties["image:" + i])
|
||||
}
|
||||
}
|
||||
console.log("Detected", allImages.size, "images")
|
||||
let i = 0
|
||||
let d = 0
|
||||
let s = 0
|
||||
let f = 0
|
||||
let start = Date.now()
|
||||
for (const image of Array.from(allImages)) {
|
||||
i++
|
||||
try {
|
||||
const downloaded = await this.fetchImageMetadata(datapath, image)
|
||||
const runningSecs = (Date.now() - start) / 1000
|
||||
const left = allImages.size - i
|
||||
|
||||
const estimatedActualSeconds = Math.floor((left * runningSecs) / (f + d))
|
||||
const estimatedActualMinutes = Math.floor(estimatedActualSeconds / 60)
|
||||
|
||||
const msg = `${i}/${
|
||||
allImages.size
|
||||
} downloaded: ${d},skipped: ${s}, failed: ${f}, running: ${runningSecs}sec, ETA: ${estimatedActualMinutes}:${
|
||||
estimatedActualSeconds % 60
|
||||
}`
|
||||
console.log(msg)
|
||||
if (downloaded) {
|
||||
d++
|
||||
} else {
|
||||
s++
|
||||
}
|
||||
if (d + f == 75000) {
|
||||
console.log("Used 75000 API calls, leaving 5000 for the rest of the day...")
|
||||
}
|
||||
} catch (e) {
|
||||
console.log(e)
|
||||
f++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
analyze(datapath: string) {
|
||||
const files = ScriptUtils.readDirRecSync(datapath)
|
||||
const byAuthor = new Map<string, string[]>()
|
||||
const byLicense = new Map<string, string[]>()
|
||||
const licenseByAuthor = new Map<string, Set<string>>()
|
||||
for (const file of files) {
|
||||
if (!file.endsWith(".json")) {
|
||||
continue
|
||||
}
|
||||
const attr = <LicenseInfo>JSON.parse(fs.readFileSync(file, "UTF8"))
|
||||
|
||||
if (byAuthor.get(attr.artist) === undefined) {
|
||||
byAuthor.set(attr.artist, [])
|
||||
}
|
||||
byAuthor.get(attr.artist).push(file)
|
||||
|
||||
const license = attr.licenseShortName
|
||||
if (byLicense.get(license) === undefined) {
|
||||
byLicense.set(license, [])
|
||||
}
|
||||
byLicense.get(license).push(file)
|
||||
|
||||
if (licenseByAuthor.get(license) === undefined) {
|
||||
licenseByAuthor.set(license, new Set<string>())
|
||||
}
|
||||
licenseByAuthor.get(license).add(attr.artist)
|
||||
}
|
||||
byAuthor.delete(undefined)
|
||||
byLicense.delete(undefined)
|
||||
licenseByAuthor.delete(undefined)
|
||||
|
||||
const byLicenseCount = Utils.MapToObj(byLicense, (a) => a.length)
|
||||
const byAuthorCount = Utils.MapToObj(byAuthor, (a) => a.length)
|
||||
const licenseByAuthorCount = Utils.MapToObj(licenseByAuthor, (a) => a.size)
|
||||
console.log(byAuthorCount)
|
||||
console.log(byLicenseCount)
|
||||
console.log(licenseByAuthorCount)
|
||||
|
||||
const totalAuthors = byAuthor.size
|
||||
let totalLicensedImages = 0
|
||||
for (const license in byLicenseCount) {
|
||||
totalLicensedImages += byLicenseCount[license]
|
||||
}
|
||||
for (const license in byLicenseCount) {
|
||||
const total = byLicenseCount[license]
|
||||
const authors = licenseByAuthorCount[license]
|
||||
console.log(
|
||||
`License ${license}: ${total} total pictures (${
|
||||
Math.floor((1000 * total) / totalLicensedImages) / 10
|
||||
}%), ${authors} authors (${
|
||||
Math.floor((1000 * authors) / totalAuthors) / 10
|
||||
}%), ${Math.floor(total / authors)} images/author`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async main(args: string[]): Promise<void> {
|
||||
const datapath = args[0] ?? "../MapComplete-data/ImageLicenseInfo"
|
||||
await this.downloadData(datapath)
|
||||
|
||||
// await this.downloadMetadata(datapath)
|
||||
this.analyze(datapath)
|
||||
}
|
||||
}
|
||||
|
||||
new GenerateImageAnalysis().run()
|
Loading…
Reference in a new issue