diff --git a/package.json b/package.json index 10531c5cf..66e334664 100644 --- a/package.json +++ b/package.json @@ -138,7 +138,7 @@ "generate:summaryCache": "vite-node scripts/generateSummaryTileCache.ts", "create:database": "vite-node scripts/osm2pgsql/createNewDatabase.ts", "delete:database:old": "vite-node scripts/osm2pgsql/deleteOldDbs.ts", - "upload:panoramax": "vite-node scripts/ImgurToPanoramax.ts && josm imgur_to_panoramax.osc" + "upload:panoramax": "vite-node scripts/ImgurToPanoramax.ts # && josm imgur_to_panoramax.osc" }, "keywords": [ "OpenStreetMap", @@ -209,7 +209,7 @@ "opening_hours": "^3.6.0", "osm-auth": "^2.5.0", "osmtogeojson": "^3.0.0-beta.5", - "panoramax-js": "^0.4.8", + "panoramax-js": "^0.3.12", "panzoom": "^9.4.3", "papaparse": "^5.3.1", "pg": "^8.11.3", diff --git a/scripts/ImgurToPanoramax.ts b/scripts/ImgurToPanoramax.ts index 314c26835..67c558c22 100644 --- a/scripts/ImgurToPanoramax.ts +++ b/scripts/ImgurToPanoramax.ts @@ -4,7 +4,7 @@ import { RegexTag } from "../src/Logic/Tags/RegexTag" import Constants from "../src/Models/Constants" import { BBox } from "../src/Logic/BBox" import { existsSync, readFileSync, writeFileSync } from "fs" -import { PanoramaxUploader } from "../src/Logic/ImageProviders/Panoramax" +import PanoramaxImageProvider, { PanoramaxUploader } from "../src/Logic/ImageProviders/Panoramax" import { Feature } from "geojson" import { LicenseInfo } from "../src/Logic/ImageProviders/LicenseInfo" import { GeoOperations } from "../src/Logic/GeoOperations" @@ -16,27 +16,31 @@ import { Changes } from "../src/Logic/Osm/Changes" import { ChangeDescription } from "../src/Logic/Osm/Actions/ChangeDescription" import OsmObjectDownloader from "../src/Logic/Osm/OsmObjectDownloader" import { OsmObject } from "../src/Logic/Osm/OsmObject" -import { createReadStream } from "node:fs" import { File } from "buffer" import { open } from "node:fs/promises" import { UploadableTag } from "../src/Logic/Tags/TagTypes" +import { Imgur } from "../src/Logic/ImageProviders/Imgur" +import { Or } from "../src/Logic/Tags/Or" +import ScriptUtils from "./ScriptUtils" +import { ImmutableStore } from "../src/Logic/UIEventSource" export class ImgurToPanoramax extends Script { private readonly panoramax = new PanoramaxUploader( Constants.panoramax.url, Constants.panoramax.token ) + private licenseChecker = new PanoramaxImageProvider() - private readonly alreadyUploaded: Record = {} - + private readonly alreadyUploaded: Record = this.readAlreadyUploaded() + private readonly alreadyUploadedInv: Record = Utils.transposeMapSimple(this.alreadyUploaded) private _imageDirectory: string private _licenseDirectory: string private readonly sequenceIds = { test: "7f34cf53-27ff-46c9-ac22-78511fa8457a", - cc0: "f0d6f78a-ff95-4db1-8494-6eb44a17bb37", + cc0: "1de6f4a1-73ac-4c75-ab7f-2a2aabddf50a", // "f0d6f78a-ff95-4db1-8494-6eb44a17bb37", ccby: "288a8052-b475-422c-811a-4f6f1a00015e", - ccbysa: "f3d02893-b4c1-4cd6-8b27-e27ab57eb59a", + ccbysa: "f3d02893-b4c1-4cd6-8b27-e27ab57eb59a" } as const constructor() { @@ -45,23 +49,92 @@ export class ImgurToPanoramax extends Script { ) } + + private async getRawInfo(imgurUrl): Promise<{ description?: string, datetime: number }> { + const fallbackpath = this._licenseDirectory + "/raw/" + imgurUrl.replaceAll(/[^a-zA-Z0-9]/g, "_") + ".json" + if (existsSync(fallbackpath)) { + console.log("Loaded raw info from fallback path") + return JSON.parse(readFileSync(fallbackpath, "utf8"))["data"] + } + // No local data available; lets ask imgur themselves + return new Promise((resolve) => { + Imgur.singleton.DownloadAttribution({ url: imgurUrl }, + raw => { + console.log("Writing fallback to", fallbackpath, "(via raw)") + writeFileSync(fallbackpath, JSON.stringify(raw), "utf8") + resolve(raw["data"]) + }) + }) + } + + private async getLicenseFor(imgurUrl: string): Promise { + const imageName = imgurUrl.split("/").at(-1) + const licensePath: string = this._licenseDirectory + "/" + imageName + if (existsSync(licensePath)) { + const rawText = readFileSync(licensePath, "utf8") + if (rawText?.toLowerCase() === "cc0" || rawText?.toLowerCase().startsWith("cc0")) { + return { licenseShortName: "CC0", artist: "Unknown" } + } + try { + + const licenseText: LicenseInfo = JSON.parse(rawText) + if (licenseText.licenseShortName) { + return licenseText + } + console.log("<<< No valid license found in text", rawText) + return undefined + } catch (e) { + console.error("Could not read ", rawText.slice(0, 20), "as json for image", imgurUrl, "from", licensePath) + } + } + + + // We didn't find the expected license in the expected location; search for the fallback (raw) license + const fallbackpath = this._licenseDirectory + "/raw/" + imgurUrl.replaceAll(/[^a-zA-Z0-9]/g, "_") + ".json" + if (existsSync(fallbackpath)) { + const fallbackRaw: string = JSON.parse(readFileSync(fallbackpath, "utf8"))["data"]?.description + if (fallbackRaw?.toLowerCase()?.startsWith("cc0") || fallbackRaw?.toLowerCase()?.indexOf("#cc0") >= 0) { + return { licenseShortName: "CC0", artist: "Unknown" } + } + const license = Imgur.parseLicense(fallbackRaw) + if(license){ + return license + } + console.log("No (fallback) license found for (but file exists), not uploading", imgurUrl, fallbackRaw) + return undefined + } + + + // No local data available; lets ask imgur themselves + const attr = await Imgur.singleton.DownloadAttribution({ url: imgurUrl }, + raw => { + console.log("Writing fallback to", fallbackpath) + writeFileSync(fallbackpath, JSON.stringify(raw), "utf8") + }) + console.log("Got license via API:", attr.licenseShortName) + await ScriptUtils.sleep(500) + if (attr.licenseShortName) { + return attr + } + return undefined + } + async uploadImage( key: string, - feat: Feature, - sequences: { - id: string - "stats:items": { count: number } - }[] + feat: Feature ): Promise { const v = feat.properties[key] if (!v) { return undefined } - const imageHash = v.split("/").at(-1).split(".").at(0) - if (this.alreadyUploaded[imageHash]) { + const imageHash = v.split("/").at(-1).split(".").at(0) + { const panohash = this.alreadyUploaded[imageHash] - return new And([new Tag(key.replace("image", panohash), panohash), new Tag(key, "")]) + if (panohash) { + console.log("Already uploaded", panohash) + return new And([new Tag(key.replace("image", "panoramax"), panohash), new Tag(key, "")]) + } } let path: string = undefined @@ -73,22 +146,12 @@ export class ImgurToPanoramax extends Script { if (!path) { return undefined } - const licensePath = - this._licenseDirectory + "/" + v.replaceAll(/[^a-zA-Z0-9]/g, "_") + ".json" - if (!existsSync(licensePath)) { + const license: LicenseInfo = await this.getLicenseFor(v) + if (license === undefined) { return undefined } - const licenseText: LicenseInfo = JSON.parse(readFileSync(licensePath, "utf8")) - if (!licenseText.licenseShortName) { - console.log("No license found for", path, licenseText) - return undefined - } - const license = licenseText.licenseShortName.toLowerCase().split(" ")[0].replace(/-/g, "") - const sequence = this.sequenceIds[license] - const author = licenseText.artist - + const sequence = this.sequenceIds[license.licenseShortName?.toLowerCase()] const handle = await open(path) - const stat = await handle.stat() class MyFile extends File { @@ -100,75 +163,152 @@ export class ImgurToPanoramax extends Script { const file = new MyFile([], path) - file.stream = function () { + file.stream = function() { return handle.readableWebStream() } + const licenseRaw = await this.getRawInfo(v) + const date = new Date(licenseRaw.datetime * 1000) + console.log("Uploading", imageHash, sequence) const result = await this.panoramax.uploadImage( file, GeoOperations.centerpointCoordinates(feat), - author, + license.artist, true, - sequence + sequence, + date.toISOString() ) - this.alreadyUploaded[imageHash] = result.value await handle.close() + this.alreadyUploaded[imageHash] = result.value + this.writeAlreadyUploaded() return new And([new Tag(key.replace("image", result.key), result.value), new Tag(key, "")]) } + private writeAlreadyUploaded() { + writeFileSync("uploaded_images.json", JSON.stringify(this.alreadyUploaded)) + } + + private readAlreadyUploaded() { + const uploaded = JSON.parse(readFileSync("uploaded_images.json", "utf8")) + console.log("Detected ", Object.keys(uploaded).length, "previously uploaded images") + return uploaded + } + + private async patchDate(panokey: string) { + const imgurkey = this.alreadyUploadedInv[panokey] + const license = await this.getRawInfo("https://i.imgur.com/" + imgurkey + ".jpg") + const date = new Date(license.datetime * 1000) + const panolicense = await this.panoramax.panoramax.search({ + ids: [panokey] + }) + const panodata = panolicense[0] + const collection: string = panodata.collection + console.log({ imgurkey, date, panodata, datetime: license.datetime }) + const p = this.panoramax.panoramax + const url = p.host+"/collections/" + collection + "/items/" + panokey + const result = await p.fetch(url, { + method: "PATCH", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + ts: date.getTime(), + }) + }) + console.log("Patched date of ", p.createViewLink({ + imageId: panokey, + }), url, "result is", result.status, await result.text()) + } + + async main(args: string[]): Promise { this._imageDirectory = args[0] ?? "/home/pietervdvn/data/imgur-image-backup" this._licenseDirectory = args[1] ?? "/home/pietervdvn/git/MapComplete-data/ImageLicenseInfo" - const bounds = new BBox([ - [3.6984301050112833, 51.06715570450848], - [3.7434328399847914, 51.039379568816145], - ]) - const maxcount = 500 - const filter = new RegexTag("image", /^https:\/\/i.imgur.com\/.*/) - const overpass = new Overpass(filter, [], Constants.defaultOverpassUrls[0]) - const features = (await overpass.queryGeoJson(bounds))[0].features + // await this.panoramax.panoramax.createCollection("CC0 - part 2") + // return + /* for (const panohash in this.alreadyUploadedInv) { + await this.patchDate(panohash) + break + }*/ + + + const bounds = new BBox([ + [ + 1.7217767788980893, + 41.00219164121438 + ], + [ + 2.7238939148245436, + 41.9258679932085 + ] + ]) + const maxcount = 10000 + const overpassfilters: RegexTag[] = [] + const r = /^https:\/\/i.imgur.com\/.*/ + for (const k of ["image", "image:menu", "image:streetsign"]) { + overpassfilters.push(new RegexTag(k, r)) + for (let i = 0; i < 20; i++) { + overpassfilters.push(new RegexTag(k + ":" + i, r)) + } + } + const overpass = new Overpass(new Or(overpassfilters), [], Constants.defaultOverpassUrls[0], new ImmutableStore(500) ) + const features = (await overpass.queryGeoJson(bounds))[0].features + const featuresCopy = [...features] let converted = 0 - const pano = this.panoramax.panoramax - const sequences = await pano.mySequences() + const total = features.length const changes: ChangeDescription[] = [] + do { const f = features.shift() if (!f) { break } + if (converted % 100 === 0) { + console.log("Converted:", converted, "total:", total, "progress:", Math.round(converted * 100 / total) + "%") + } - const changedTags: (UploadableTag | undefined)[] = [] + let changedTags: (UploadableTag | undefined)[] = [] + console.log("Handling "+f.properties.id) for (const k of ["image", "image:menu", "image:streetsign"]) { - changedTags.push(await this.uploadImage(k, f, sequences)) + changedTags.push(await this.uploadImage(k, f)) for (let i = 0; i < 20; i++) { - changedTags.push(await this.uploadImage(k + ":" + i, f, sequences)) + changedTags.push(await this.uploadImage(k + ":" + i, f)) } } - const action = new ChangeTagAction( - f.properties.id, - new And(Utils.NoNull(changedTags)), - f.properties, - { - theme: "image-mover", - changeType: "link-image", - } - ) - changes.push(...(await action.CreateChangeDescriptions())) + changedTags = Utils.NoNull(changedTags) + if (changedTags.length > 0) { + const action = new ChangeTagAction( + f.properties.id, + new And(changedTags), + f.properties, + { + theme: "image-mover", + changeType: "link-image" + } + ) + changes.push(...(await action.CreateChangeDescriptions())) + } converted++ } while (converted < maxcount) + console.log("Uploaded images for", converted, "items; now creating the changeset") + const modif: string[] = Utils.Dedup(changes.map((ch) => ch.type + "/" + ch.id)) - const modifiedObjectsFresh = ( - ( - await Promise.all( - modif.map((id) => new OsmObjectDownloader().DownloadObjectAsync(id)) - ) - ).filter((m) => m !== "deleted") - ) + const modifiedObjectsFresh: OsmObject[] = [] + const dloader = new OsmObjectDownloader() + for (let i = 0; i < modif.length; i++) { + if (i % 100 === 0) { + console.log("Downloaded osm object", i, "/", modif.length, "(" + Math.round(i * 100 / modif.length) + "%)") + } + const id = modif[i] + const obj = await dloader.DownloadObjectAsync(id) + if (obj === "deleted") { + continue + } + modifiedObjectsFresh.push(obj) + } const modifiedObjects = Changes.createChangesetObjectsStatic( changes, modifiedObjectsFresh, @@ -177,6 +317,14 @@ export class ImgurToPanoramax extends Script { ) const cs = Changes.buildChangesetXML("0", modifiedObjects) writeFileSync("imgur_to_panoramax.osc", cs, "utf8") + + + const usernames = featuresCopy.map(f => f.properties.user) + const hist : Record = {} + for (const username of usernames) { + hist[username] = (hist[username] ?? 0)+ 1 + } + console.log(hist) } } diff --git a/src/Logic/ImageProviders/Imgur.ts b/src/Logic/ImageProviders/Imgur.ts index c72ea54fd..ccd57b292 100644 --- a/src/Logic/ImageProviders/Imgur.ts +++ b/src/Logic/ImageProviders/Imgur.ts @@ -11,6 +11,7 @@ export class Imgur extends ImageProvider { public readonly defaultKeyPrefixes: string[] = ["image"] public static readonly apiUrl = "https://api.imgur.com/3/image" public static readonly supportingUrls = ["https://i.imgur.com"] + private constructor() { super() } @@ -30,13 +31,44 @@ export class Imgur extends ImageProvider { url: value, key: key, provider: this, - id: value, - }, + id: value + } ] } return undefined } + public static parseLicense(descr: string) { + const data: Record = {} + + if (!descr) { + return undefined + } + if (descr.toLowerCase() === "cc0") { + data.author = "Unknown" + data.license = "CC0" + } else { + for (const tag of descr.split("\n")) { + const kv = tag.split(":") + if (kv.length < 2) { + continue + } + const k = kv[0] + data[k] = kv[1]?.replace(/\r/g, "") + } + } + if (Object.keys(data).length === 0) { + return undefined + } + + const licenseInfo = new LicenseInfo() + + licenseInfo.licenseShortName = data.license + licenseInfo.artist = data.author + + return licenseInfo + } + /** * Download the attribution and license info for the picture at the given URL * @@ -56,7 +88,9 @@ export class Imgur extends ImageProvider { * * */ - public async DownloadAttribution(providedImage: { url: string }): Promise { + public async DownloadAttribution(providedImage: { + url: string + }, withResponse?: (obj) => void): Promise { const url = providedImage.url const hash = url.substr("https://i.imgur.com/".length).split(/\.jpe?g/i)[0] @@ -64,26 +98,17 @@ export class Imgur extends ImageProvider { const response = await Utils.downloadJsonCached<{ data: { description: string; datetime: string; views: number } }>(apiUrl, 365 * 24 * 60 * 60, { - Authorization: "Client-ID " + Constants.ImgurApiKey, + Authorization: "Client-ID " + Constants.ImgurApiKey }) - - const descr = response.data.description ?? "" - const data: any = {} - const imgurData = response.data - - for (const tag of descr.split("\n")) { - const kv = tag.split(":") - const k = kv[0] - data[k] = kv[1]?.replace(/\r/g, "") + if (withResponse) { + withResponse(response) } - const licenseInfo = new LicenseInfo() + const imgurData = response.data + const license= Imgur.parseLicense(imgurData.description ?? "") + license.views = imgurData.views + license.date = new Date(Number(imgurData.datetime) * 1000) - licenseInfo.licenseShortName = data.license - licenseInfo.artist = data.author - licenseInfo.date = new Date(Number(imgurData.datetime) * 1000) - licenseInfo.views = imgurData.views - - return licenseInfo + return license } } diff --git a/src/Logic/ImageProviders/Panoramax.ts b/src/Logic/ImageProviders/Panoramax.ts index bd8970efe..7bb09fae1 100644 --- a/src/Logic/ImageProviders/Panoramax.ts +++ b/src/Logic/ImageProviders/Panoramax.ts @@ -145,12 +145,7 @@ export default class PanoramaxImageProvider extends ImageProvider { ) } - Stores.Chronic(1500, () => hasLoading(source.data)).addCallback((_) => { - console.log( - "Testing panoramax URLS again as some were loading", - source.data, - hasLoading(source.data) - ) + Stores.Chronic(1500, () => hasLoading(source.data)).addCallback(() => { super.getRelevantUrlsFor(tags, prefixes).then((data) => { source.set(data) return !hasLoading(data) @@ -192,9 +187,9 @@ export default class PanoramaxImageProvider extends ImageProvider { export class PanoramaxUploader implements ImageUploader { public readonly panoramax: AuthorizedPanoramax maxFileSizeInMegabytes = 100 * 1000 * 1000 // 100MB - private readonly _targetSequence: Store + private readonly _targetSequence?: Store - constructor(url: string, token: string, targetSequence: Store) { + constructor(url: string, token: string, targetSequence?: Store) { this._targetSequence = targetSequence this.panoramax = new AuthorizedPanoramax(url, token) } @@ -204,7 +199,8 @@ export class PanoramaxUploader implements ImageUploader { currentGps: [number, number], author: string, noblur: boolean = false, - sequenceId?: string + sequenceId?: string, + datetime?: string ): Promise<{ key: string value: string @@ -213,7 +209,7 @@ export class PanoramaxUploader implements ImageUploader { // https://panoramax.openstreetmap.fr/api/docs/swagger#/ let [lon, lat] = currentGps - let datetime = new Date().toISOString() + datetime ??= new Date().toISOString() try { const tags = await ExifReader.load(blob) const [[latD], [latM], [latS, latSDenom]] = < diff --git a/src/Utils.ts b/src/Utils.ts index cbe079bce..ff9296c3f 100644 --- a/src/Utils.ts +++ b/src/Utils.ts @@ -1291,6 +1291,19 @@ In the case that MapComplete is pointed to the testing grounds, the edit will be return newD } + /** + * + * {"a": "b", "c":"d"} // => {"b":"a", "d":"c"} + */ + public static transposeMapSimple(d: Record): Record{ + const inv = > {} + for (const k in d) { + const v = d[k] + inv[v] = k + } + return inv + } + /** * Utils.colorAsHex({r: 255, g: 128, b: 0}) // => "#ff8000" * Utils.colorAsHex(undefined) // => undefined