From d920afa09ea924487922eec27ded0fa5889db45c Mon Sep 17 00:00:00 2001 From: Pieter Vander Vennet Date: Sat, 8 Feb 2025 01:46:54 +0100 Subject: [PATCH] Refactoring: remove scripts and leaderboard which depend on imgur --- leaderboard.html | 75 ------ package.json | 1 - scripts/generateImageAnalysis.ts | 414 ------------------------------- src/UI/Leaderboard.svelte | 71 ------ src/leaderboard.ts | 5 - 5 files changed, 566 deletions(-) delete mode 100644 leaderboard.html delete mode 100644 scripts/generateImageAnalysis.ts delete mode 100644 src/UI/Leaderboard.svelte delete mode 100644 src/leaderboard.ts diff --git a/leaderboard.html b/leaderboard.html deleted file mode 100644 index af1c41e755..0000000000 --- a/leaderboard.html +++ /dev/null @@ -1,75 +0,0 @@ - - - - - - - - - - - - - - - - - MapComplete - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - diff --git a/package.json b/package.json index ca3951d482..6ef5bd42bf 100644 --- a/package.json +++ b/package.json @@ -121,7 +121,6 @@ "download:nsi": "npm i name-suggestion-index && vite-node scripts/generateStats.ts && npm run download:nsi-logos && vite-node scripts/nsiLogos.ts -- all", "download:editor-layer-index": "vite-node scripts/downloadEli.ts", "download:stats": "vite-node scripts/GenerateSeries.ts", - "download:images": "vite-node scripts/generateImageAnalysis.ts -- ~/data/imgur-image-backup/", "download:community-index": "vite-node scripts/downloadCommunityIndex.ts ", "weblate:add-upstream": "git remote add weblate https://translate.mapcomplete.org/git/mapcomplete/core/ ; git remote update weblate", "weblate:fix": "npm run weblate:add-upstream && git merge weblate/master && git rebase origin/master && git push origin master", diff --git a/scripts/generateImageAnalysis.ts b/scripts/generateImageAnalysis.ts deleted file mode 100644 index 9b9afe811c..0000000000 --- a/scripts/generateImageAnalysis.ts +++ /dev/null @@ -1,414 +0,0 @@ -import Script from "./Script" -import { Overpass } from "../src/Logic/Osm/Overpass" -import { RegexTag } from "../src/Logic/Tags/RegexTag" -import { ImmutableStore } from "../src/Logic/UIEventSource" -import { BBox } from "../src/Logic/BBox" -import * as fs from "fs" -import { writeFileSync } from "fs" -import { Feature } from "geojson" -import ScriptUtils from "./ScriptUtils" -import { Imgur } from "../src/Logic/ImageProviders/Imgur" -import { LicenseInfo } from "../src/Logic/ImageProviders/LicenseInfo" -import { Utils } from "../src/Utils" -import Constants from "../src/Models/Constants" - -export default class GenerateImageAnalysis extends Script { - /** - * Max N in `image:N`-keys and `imageN` keys - * @private - */ - private static readonly maxImageIndex = 31 - constructor() { - super( - [ - "Downloads (from overpass) all tags which have an imgur-image; then analyses the licenses and downloads all the images", - "", - "Arguments:", - "Path to download the images to", - "Path to save the overview to", - ].join("\n") - ) - } - - async fetchImages(key: string, datapath: string, refresh: boolean): Promise { - const targetPath = `${datapath}/features_with_${key.replace(/[:\/]/, "_")}.geojson` - if (fs.existsSync(targetPath) && !refresh) { - console.log("Skipping", key) - return - } - const tag = new RegexTag(key, /^https:\/\/i.imgur.com\/.*$/i) - const overpass = new Overpass( - tag, - [], - Constants.defaultOverpassUrls[0], //"https://overpass.kumi.systems/api/interpreter", - new ImmutableStore(500), - false - ) - console.log("Starting query...") - const data = await overpass.queryGeoJson(BBox.global) - console.log( - "Got data:", - data[0].features.length, - "items; timestamp:", - data[1].toISOString() - ) - fs.writeFileSync(targetPath, JSON.stringify(data[0]), "utf8") - console.log("Written", targetPath) - } - - async downloadData(datapath: string, refresh: boolean): Promise { - if (!fs.existsSync(datapath)) { - fs.mkdirSync(datapath) - } - await this.fetchImages("image", datapath, refresh) - await this.fetchImages("image:streetsign", datapath, refresh) - await this.fetchImages("image:menu", datapath, refresh) - for (let i = 0; i < GenerateImageAnalysis.maxImageIndex; i++) { - await this.fetchImages("image:" + i, datapath, refresh) - await this.fetchImages("image" + i, datapath, refresh) - } - } - - loadData(datapath: string): Feature[] { - const allFeatures: Feature[] = [] - - const files = ScriptUtils.readDirRecSync(datapath) - for (const file of files) { - if (!file.endsWith(".geojson")) { - continue - } - const contents = JSON.parse(fs.readFileSync(file, "utf8")) - allFeatures.push(...contents.features) - } - - return allFeatures - } - - async fetchImageMetadata(datapath: string, image: string): Promise { - if (image === undefined) { - return false - } - if (!image.match(/https:\/\/i\.imgur\.com\/[a-zA-Z0-9]+(\.jpe?g)|(\.png)/)) { - return false - } - const filename = image.replace(/[\/:.\-%]/g, "_") + ".json" - const targetPath = datapath + "/" + filename - if (fs.existsSync(targetPath)) { - return false - } - const attribution = await Imgur.singleton.DownloadAttribution({ url: image }) - - if ((attribution.artist ?? "") === "") { - // This is an invalid attribution. We save the raw response as well - const hash = image.substr("https://i.imgur.com/".length).split(".jpg")[0] - - const apiUrl = "https://api.imgur.com/3/image/" + hash - const response = await Utils.downloadJsonCached(apiUrl, 365 * 24 * 60 * 60, { - Authorization: "Client-ID " + Constants.ImgurApiKey, - }) - const rawTarget = datapath + "/raw/" + filename - console.log("Also storing the raw response to", rawTarget) - await fs.writeFileSync(rawTarget, JSON.stringify(response, null, " ")) - } - - await fs.writeFileSync(targetPath, JSON.stringify(attribution, null, " ")) - return true - } - - loadImageUrls(datapath: string): { allImages: Set; imageSource: Map } { - let allImages = new Set() - const features = this.loadData(datapath) - let imageSource: Map = new Map() - - for (const feature of features) { - allImages.add(feature.properties["image"]) - imageSource[feature.properties["image"]] = feature.properties.id - allImages.add(feature.properties["image:streetsign"]) - imageSource[feature.properties["image:streetsign"]] = - feature.properties.id + " (streetsign)" - - for (let i = 0; i < GenerateImageAnalysis.maxImageIndex; i++) { - allImages.add(feature.properties["image:" + i]) - imageSource[ - feature.properties["image:" + i] - ] = `${feature.properties.id} (image:${i})` - - allImages.add(feature.properties["image" + i]) - imageSource[ - feature.properties["image" + i] - ] = `${feature.properties.id} (image${i})` - } - } - allImages.delete(undefined) - allImages.delete(null) - imageSource.delete(undefined) - imageSource.delete(null) - return { allImages, imageSource } - } - - async downloadMetadata(datapath: string): Promise { - const { allImages, imageSource } = this.loadImageUrls(datapath) - console.log("Detected", allImages.size, "images") - let i = 0 - let d = 0 - let s = 0 - let f = 0 - let start = Date.now() - for (const image of Array.from(allImages)) { - i++ - try { - const downloaded = await this.fetchImageMetadata(datapath, image) - const runningSecs = (Date.now() - start) / 1000 - const left = allImages.size - i - - const estimatedActualSeconds = Math.floor((left * runningSecs) / (f + d)) - const estimatedActualMinutes = Math.floor(estimatedActualSeconds / 60) - - const msg = `${i}/${ - allImages.size - } downloaded: ${d},skipped: ${s}, failed: ${f}, running: ${Math.floor( - runningSecs - )}sec, ETA: ${estimatedActualMinutes}:${estimatedActualSeconds % 60}` - if (d + (f % 1000) === 1 || downloaded) { - ScriptUtils.erasableLog(msg) - } - if (downloaded) { - d++ - } else { - s++ - } - if (d + f == 75000) { - console.log("Used 75000 API calls, leaving 5000 for the rest of the day...") - break - } - } catch (e) { - // console.log(e) - console.log( - "Offending image hash is", - image, - "from https://openstreetmap.org/" + imageSource[image] - ) - f++ - } - } - } - async downloadImage(url: string, imagePath: string): Promise { - const filenameLong = url.replace(/[\/:.\-%]/g, "_") + ".jpg" - const targetPathLong = imagePath + "/" + filenameLong - - const filename = url.substring("https://i.imgur.com/".length) - const targetPath = imagePath + "/" + filename - if (fs.existsSync(targetPathLong)) { - if (fs.existsSync(targetPath)) { - fs.unlinkSync(targetPathLong) - console.log("Unlinking duplicate") - return false - } - console.log("Renaming...") - fs.renameSync(targetPathLong, targetPath) - return false - } - if (fs.existsSync(targetPath)) { - return false - } - await ScriptUtils.DownloadFileTo(url, targetPath) - return true - } - - async downloadAllImages(datapath: string, imagePath: string): Promise { - const { allImages } = this.loadImageUrls(datapath) - let skipped = 0 - let failed = 0 - let downloaded = 0 - let invalid = 0 - const startTime = Date.now() - const urls = Array.from(allImages).filter((url) => url.startsWith("https://i.imgur.com")) - for (const url of urls) { - const runningTime = (Date.now() - startTime) / 1000 - const handled = skipped + downloaded + failed - const itemsLeft = allImages.size - handled - const speed = handled / runningTime - const timeLeft = Math.round(itemsLeft * speed) - try { - const urls = url.split(/[;,]/) - const downloadedStatus = await Promise.all( - urls.map((url) => this.downloadImage(url.trim(), imagePath)) - ) - - for (const b of downloadedStatus) { - if (b) { - downloaded += 1 - } else { - skipped += 1 - } - } - - if (downloadedStatus.some((i) => i) || skipped % 10000 === 0) { - console.log( - "Handled", - url, - JSON.stringify({ - skipped, - failed, - downloaded, - invalid, - total: allImages.size, - eta: timeLeft + "s", - }) - ) - } - } catch (e) { - console.log(e) - failed++ - } - } - } - - analyze(datapath: string) { - const files = ScriptUtils.readDirRecSync(datapath) - const byAuthor = new Map() - const byLicense = new Map() - const licenseByAuthor = new Map>() - for (const file of files) { - if (!file.endsWith(".json")) { - continue - } - const attr = JSON.parse(fs.readFileSync(file, { encoding: "utf8" })) - const license = attr.licenseShortName - - if (license === undefined || attr.artist === undefined) { - continue - } - if (byAuthor.get(attr.artist) === undefined) { - byAuthor.set(attr.artist, []) - } - byAuthor.get(attr.artist).push(file) - - if (byLicense.get(license) === undefined) { - byLicense.set(license, []) - } - byLicense.get(license).push(file) - - if (licenseByAuthor.get(license) === undefined) { - licenseByAuthor.set(license, new Set()) - } - licenseByAuthor.get(license).add(attr.artist) - } - byAuthor.delete(undefined) - byLicense.delete(undefined) - licenseByAuthor.delete(undefined) - - const byLicenseCount = Utils.MapToObj(byLicense, (a) => a.length) - const byAuthorCount = Utils.MapToObj(byAuthor, (a) => a.length) - const licenseByAuthorCount = Utils.MapToObj(licenseByAuthor, (a) => a.size) - - const countsPerAuthor: number[] = Array.from(Object.keys(byAuthorCount)).map( - (k) => byAuthorCount[k] - ) - console.log(countsPerAuthor) - countsPerAuthor.sort() - const median = countsPerAuthor[Math.floor(countsPerAuthor.length / 2)] - const json: { - leaderboard: { rank: number; account: string; name: string; nrOfImages: number }[] - } = { - leaderboard: [], - } - for (let i = 0; i < 100; i++) { - let maxAuthor: string = undefined - let maxCount = 0 - for (const author in byAuthorCount) { - const count = byAuthorCount[author] - if (maxAuthor === undefined || count > maxCount) { - maxAuthor = author - maxCount = count - } - } - json.leaderboard.push({ - rank: i + 1, - name: maxAuthor, - account: "https://openstreetmap.org/user/" + maxAuthor.replace(/ /g, "%20"), - nrOfImages: maxCount, - }) - console.log( - "|", - i + 1, - "|", - `[${maxAuthor}](https://openstreetmap.org/user/${maxAuthor.replace(/ /g, "%20")})`, - "|", - maxCount, - "|" - ) - delete byAuthorCount[maxAuthor] - } - - const totalAuthors = byAuthor.size - let totalLicensedImages = 0 - json["totalAuthors"] = totalAuthors - for (const license in byLicenseCount) { - totalLicensedImages += byLicenseCount[license] - } - json["byLicense"] = {} - for (const license in byLicenseCount) { - const total = byLicenseCount[license] - const authors = licenseByAuthorCount[license] - console.log( - `License ${license}: ${total} total pictures (${ - Math.floor((1000 * total) / totalLicensedImages) / 10 - }%), ${authors} authors (${ - Math.floor((1000 * authors) / totalAuthors) / 10 - }%), ${Math.floor(total / authors)} images/author` - ) - json["byLicense"] = { - license, - total, - authors, - } - } - - const nonDefaultAuthors = [ - ...Array.from(licenseByAuthor.get("CC-BY 4.0").values()), - ...Array.from(licenseByAuthor.get("CC-BY-SA 4.0").values()), - ] - - console.log( - "Total number of correctly licenses pictures: ", - totalLicensedImages, - "(out of ", - files.length, - " images)" - ) - console.log("Total number of authors:", byAuthor.size) - console.log( - "Total number of authors which used a valid, non CC0 license at one point in time", - nonDefaultAuthors.length - ) - console.log("Median contributions per author:", median) - json["median"] = median - json["date"] = new Date().toISOString() - writeFileSync( - "../../git/MapComplete-data/picture-leaderboard.json", - JSON.stringify(json), - "utf8" - ) - } - - async main(args: string[]): Promise { - console.log("Usage: [--cached] to use the cached osm data") - console.log("Args are", args) - const cached = args.indexOf("--cached") < 0 - args = args.filter((a) => a !== "--cached") - const datapath = args[1] ?? "../../git/MapComplete-data/ImageLicenseInfo" - const imageBackupPath = args[0] - if (imageBackupPath === "" || imageBackupPath === undefined) { - throw "No imageBackup path specified" - } - await this.downloadData(datapath, cached) - - // await this.downloadViews(datapath) - await this.downloadMetadata(datapath) - await this.downloadAllImages(datapath, imageBackupPath) - this.analyze(datapath) - } -} - -new GenerateImageAnalysis().run() diff --git a/src/UI/Leaderboard.svelte b/src/UI/Leaderboard.svelte deleted file mode 100644 index f5b58d6a9b..0000000000 --- a/src/UI/Leaderboard.svelte +++ /dev/null @@ -1,71 +0,0 @@ - - -
-

Contributed images with MapComplete: leaderboard

- - {#if $data} - - - - - - - {#each $data.leaderboard as contributor} - - - - - - {/each} -
RankContributorNumber of images contributed
- {contributor.rank} - - {#if $loggedInContributor === contributor.name} - {contributor.name} - {:else} - {contributor.name} - {/if} - - {contributor.nrOfImages} - total images -
- Statistics generated on {$data.date} - {:else} - - {/if} - -
- Logged in as {$loggedInContributor} -
-
diff --git a/src/leaderboard.ts b/src/leaderboard.ts deleted file mode 100644 index 29500b494e..0000000000 --- a/src/leaderboard.ts +++ /dev/null @@ -1,5 +0,0 @@ -import Leaderboard from "./UI/Leaderboard.svelte" - -new Leaderboard({ - target: document.getElementById("main"), -})