Scripts: more fixes to the uploadPanoramaxScript

This commit is contained in:
Pieter Vander Vennet 2024-11-10 23:51:41 +01:00
parent 664ddc686c
commit 6e335c91d4
5 changed files with 275 additions and 93 deletions

View file

@ -138,7 +138,7 @@
"generate:summaryCache": "vite-node scripts/generateSummaryTileCache.ts", "generate:summaryCache": "vite-node scripts/generateSummaryTileCache.ts",
"create:database": "vite-node scripts/osm2pgsql/createNewDatabase.ts", "create:database": "vite-node scripts/osm2pgsql/createNewDatabase.ts",
"delete:database:old": "vite-node scripts/osm2pgsql/deleteOldDbs.ts", "delete:database:old": "vite-node scripts/osm2pgsql/deleteOldDbs.ts",
"upload:panoramax": "vite-node scripts/ImgurToPanoramax.ts && josm imgur_to_panoramax.osc" "upload:panoramax": "vite-node scripts/ImgurToPanoramax.ts # && josm imgur_to_panoramax.osc"
}, },
"keywords": [ "keywords": [
"OpenStreetMap", "OpenStreetMap",
@ -209,7 +209,7 @@
"opening_hours": "^3.6.0", "opening_hours": "^3.6.0",
"osm-auth": "^2.5.0", "osm-auth": "^2.5.0",
"osmtogeojson": "^3.0.0-beta.5", "osmtogeojson": "^3.0.0-beta.5",
"panoramax-js": "^0.4.8", "panoramax-js": "^0.3.12",
"panzoom": "^9.4.3", "panzoom": "^9.4.3",
"papaparse": "^5.3.1", "papaparse": "^5.3.1",
"pg": "^8.11.3", "pg": "^8.11.3",

View file

@ -4,7 +4,7 @@ import { RegexTag } from "../src/Logic/Tags/RegexTag"
import Constants from "../src/Models/Constants" import Constants from "../src/Models/Constants"
import { BBox } from "../src/Logic/BBox" import { BBox } from "../src/Logic/BBox"
import { existsSync, readFileSync, writeFileSync } from "fs" import { existsSync, readFileSync, writeFileSync } from "fs"
import { PanoramaxUploader } from "../src/Logic/ImageProviders/Panoramax" import PanoramaxImageProvider, { PanoramaxUploader } from "../src/Logic/ImageProviders/Panoramax"
import { Feature } from "geojson" import { Feature } from "geojson"
import { LicenseInfo } from "../src/Logic/ImageProviders/LicenseInfo" import { LicenseInfo } from "../src/Logic/ImageProviders/LicenseInfo"
import { GeoOperations } from "../src/Logic/GeoOperations" import { GeoOperations } from "../src/Logic/GeoOperations"
@ -16,27 +16,31 @@ import { Changes } from "../src/Logic/Osm/Changes"
import { ChangeDescription } from "../src/Logic/Osm/Actions/ChangeDescription" import { ChangeDescription } from "../src/Logic/Osm/Actions/ChangeDescription"
import OsmObjectDownloader from "../src/Logic/Osm/OsmObjectDownloader" import OsmObjectDownloader from "../src/Logic/Osm/OsmObjectDownloader"
import { OsmObject } from "../src/Logic/Osm/OsmObject" import { OsmObject } from "../src/Logic/Osm/OsmObject"
import { createReadStream } from "node:fs"
import { File } from "buffer" import { File } from "buffer"
import { open } from "node:fs/promises" import { open } from "node:fs/promises"
import { UploadableTag } from "../src/Logic/Tags/TagTypes" import { UploadableTag } from "../src/Logic/Tags/TagTypes"
import { Imgur } from "../src/Logic/ImageProviders/Imgur"
import { Or } from "../src/Logic/Tags/Or"
import ScriptUtils from "./ScriptUtils"
import { ImmutableStore } from "../src/Logic/UIEventSource"
export class ImgurToPanoramax extends Script { export class ImgurToPanoramax extends Script {
private readonly panoramax = new PanoramaxUploader( private readonly panoramax = new PanoramaxUploader(
Constants.panoramax.url, Constants.panoramax.url,
Constants.panoramax.token Constants.panoramax.token
) )
private licenseChecker = new PanoramaxImageProvider()
private readonly alreadyUploaded: Record<string, string> = {} private readonly alreadyUploaded: Record<string, string> = this.readAlreadyUploaded()
private readonly alreadyUploadedInv: Record<string, string> = Utils.transposeMapSimple(this.alreadyUploaded)
private _imageDirectory: string private _imageDirectory: string
private _licenseDirectory: string private _licenseDirectory: string
private readonly sequenceIds = { private readonly sequenceIds = {
test: "7f34cf53-27ff-46c9-ac22-78511fa8457a", test: "7f34cf53-27ff-46c9-ac22-78511fa8457a",
cc0: "f0d6f78a-ff95-4db1-8494-6eb44a17bb37", cc0: "1de6f4a1-73ac-4c75-ab7f-2a2aabddf50a", // "f0d6f78a-ff95-4db1-8494-6eb44a17bb37",
ccby: "288a8052-b475-422c-811a-4f6f1a00015e", ccby: "288a8052-b475-422c-811a-4f6f1a00015e",
ccbysa: "f3d02893-b4c1-4cd6-8b27-e27ab57eb59a", ccbysa: "f3d02893-b4c1-4cd6-8b27-e27ab57eb59a"
} as const } as const
constructor() { constructor() {
@ -45,23 +49,92 @@ export class ImgurToPanoramax extends Script {
) )
} }
private async getRawInfo(imgurUrl): Promise<{ description?: string, datetime: number }> {
const fallbackpath = this._licenseDirectory + "/raw/" + imgurUrl.replaceAll(/[^a-zA-Z0-9]/g, "_") + ".json"
if (existsSync(fallbackpath)) {
console.log("Loaded raw info from fallback path")
return JSON.parse(readFileSync(fallbackpath, "utf8"))["data"]
}
// No local data available; lets ask imgur themselves
return new Promise((resolve) => {
Imgur.singleton.DownloadAttribution({ url: imgurUrl },
raw => {
console.log("Writing fallback to", fallbackpath, "(via raw)")
writeFileSync(fallbackpath, JSON.stringify(raw), "utf8")
resolve(raw["data"])
})
})
}
private async getLicenseFor(imgurUrl: string): Promise<LicenseInfo> {
const imageName = imgurUrl.split("/").at(-1)
const licensePath: string = this._licenseDirectory + "/" + imageName
if (existsSync(licensePath)) {
const rawText = readFileSync(licensePath, "utf8")
if (rawText?.toLowerCase() === "cc0" || rawText?.toLowerCase().startsWith("cc0")) {
return { licenseShortName: "CC0", artist: "Unknown" }
}
try {
const licenseText: LicenseInfo = JSON.parse(rawText)
if (licenseText.licenseShortName) {
return licenseText
}
console.log("<<< No valid license found in text", rawText)
return undefined
} catch (e) {
console.error("Could not read ", rawText.slice(0, 20), "as json for image", imgurUrl, "from", licensePath)
}
}
// We didn't find the expected license in the expected location; search for the fallback (raw) license
const fallbackpath = this._licenseDirectory + "/raw/" + imgurUrl.replaceAll(/[^a-zA-Z0-9]/g, "_") + ".json"
if (existsSync(fallbackpath)) {
const fallbackRaw: string = JSON.parse(readFileSync(fallbackpath, "utf8"))["data"]?.description
if (fallbackRaw?.toLowerCase()?.startsWith("cc0") || fallbackRaw?.toLowerCase()?.indexOf("#cc0") >= 0) {
return { licenseShortName: "CC0", artist: "Unknown" }
}
const license = Imgur.parseLicense(fallbackRaw)
if(license){
return license
}
console.log("No (fallback) license found for (but file exists), not uploading", imgurUrl, fallbackRaw)
return undefined
}
// No local data available; lets ask imgur themselves
const attr = await Imgur.singleton.DownloadAttribution({ url: imgurUrl },
raw => {
console.log("Writing fallback to", fallbackpath)
writeFileSync(fallbackpath, JSON.stringify(raw), "utf8")
})
console.log("Got license via API:", attr.licenseShortName)
await ScriptUtils.sleep(500)
if (attr.licenseShortName) {
return attr
}
return undefined
}
async uploadImage( async uploadImage(
key: string, key: string,
feat: Feature, feat: Feature
sequences: {
id: string
"stats:items": { count: number }
}[]
): Promise<UploadableTag | undefined> { ): Promise<UploadableTag | undefined> {
const v = feat.properties[key] const v = feat.properties[key]
if (!v) { if (!v) {
return undefined return undefined
} }
const imageHash = v.split("/").at(-1).split(".").at(0)
if (this.alreadyUploaded[imageHash]) { const imageHash = v.split("/").at(-1).split(".").at(0)
{
const panohash = this.alreadyUploaded[imageHash] const panohash = this.alreadyUploaded[imageHash]
return new And([new Tag(key.replace("image", panohash), panohash), new Tag(key, "")]) if (panohash) {
console.log("Already uploaded", panohash)
return new And([new Tag(key.replace("image", "panoramax"), panohash), new Tag(key, "")])
}
} }
let path: string = undefined let path: string = undefined
@ -73,22 +146,12 @@ export class ImgurToPanoramax extends Script {
if (!path) { if (!path) {
return undefined return undefined
} }
const licensePath = const license: LicenseInfo = await this.getLicenseFor(v)
this._licenseDirectory + "/" + v.replaceAll(/[^a-zA-Z0-9]/g, "_") + ".json" if (license === undefined) {
if (!existsSync(licensePath)) {
return undefined return undefined
} }
const licenseText: LicenseInfo = JSON.parse(readFileSync(licensePath, "utf8")) const sequence = this.sequenceIds[license.licenseShortName?.toLowerCase()]
if (!licenseText.licenseShortName) {
console.log("No license found for", path, licenseText)
return undefined
}
const license = licenseText.licenseShortName.toLowerCase().split(" ")[0].replace(/-/g, "")
const sequence = this.sequenceIds[license]
const author = licenseText.artist
const handle = await open(path) const handle = await open(path)
const stat = await handle.stat() const stat = await handle.stat()
class MyFile extends File { class MyFile extends File {
@ -100,75 +163,152 @@ export class ImgurToPanoramax extends Script {
const file = new MyFile([], path) const file = new MyFile([], path)
file.stream = function () { file.stream = function() {
return handle.readableWebStream() return handle.readableWebStream()
} }
const licenseRaw = await this.getRawInfo(v)
const date = new Date(licenseRaw.datetime * 1000)
console.log("Uploading", imageHash, sequence) console.log("Uploading", imageHash, sequence)
const result = await this.panoramax.uploadImage( const result = await this.panoramax.uploadImage(
<any>file, <any>file,
GeoOperations.centerpointCoordinates(feat), GeoOperations.centerpointCoordinates(feat),
author, license.artist,
true, true,
sequence sequence,
date.toISOString()
) )
this.alreadyUploaded[imageHash] = result.value
await handle.close() await handle.close()
this.alreadyUploaded[imageHash] = result.value
this.writeAlreadyUploaded()
return new And([new Tag(key.replace("image", result.key), result.value), new Tag(key, "")]) return new And([new Tag(key.replace("image", result.key), result.value), new Tag(key, "")])
} }
private writeAlreadyUploaded() {
writeFileSync("uploaded_images.json", JSON.stringify(this.alreadyUploaded))
}
private readAlreadyUploaded() {
const uploaded = JSON.parse(readFileSync("uploaded_images.json", "utf8"))
console.log("Detected ", Object.keys(uploaded).length, "previously uploaded images")
return uploaded
}
private async patchDate(panokey: string) {
const imgurkey = this.alreadyUploadedInv[panokey]
const license = await this.getRawInfo("https://i.imgur.com/" + imgurkey + ".jpg")
const date = new Date(license.datetime * 1000)
const panolicense = await this.panoramax.panoramax.search({
ids: [panokey]
})
const panodata = panolicense[0]
const collection: string = panodata.collection
console.log({ imgurkey, date, panodata, datetime: license.datetime })
const p = this.panoramax.panoramax
const url = p.host+"/collections/" + collection + "/items/" + panokey
const result = await p.fetch(url, {
method: "PATCH",
headers: { "content-type": "application/json" },
body: JSON.stringify({
ts: date.getTime(),
})
})
console.log("Patched date of ", p.createViewLink({
imageId: panokey,
}), url, "result is", result.status, await result.text())
}
async main(args: string[]): Promise<void> { async main(args: string[]): Promise<void> {
this._imageDirectory = args[0] ?? "/home/pietervdvn/data/imgur-image-backup" this._imageDirectory = args[0] ?? "/home/pietervdvn/data/imgur-image-backup"
this._licenseDirectory = args[1] ?? "/home/pietervdvn/git/MapComplete-data/ImageLicenseInfo" this._licenseDirectory = args[1] ?? "/home/pietervdvn/git/MapComplete-data/ImageLicenseInfo"
const bounds = new BBox([ // await this.panoramax.panoramax.createCollection("CC0 - part 2")
[3.6984301050112833, 51.06715570450848], // return
[3.7434328399847914, 51.039379568816145], /* for (const panohash in this.alreadyUploadedInv) {
]) await this.patchDate(panohash)
const maxcount = 500 break
const filter = new RegexTag("image", /^https:\/\/i.imgur.com\/.*/) }*/
const overpass = new Overpass(filter, [], Constants.defaultOverpassUrls[0])
const features = (await overpass.queryGeoJson(bounds))[0].features
const bounds = new BBox([
[
1.7217767788980893,
41.00219164121438
],
[
2.7238939148245436,
41.9258679932085
]
])
const maxcount = 10000
const overpassfilters: RegexTag[] = []
const r = /^https:\/\/i.imgur.com\/.*/
for (const k of ["image", "image:menu", "image:streetsign"]) {
overpassfilters.push(new RegexTag(k, r))
for (let i = 0; i < 20; i++) {
overpassfilters.push(new RegexTag(k + ":" + i, r))
}
}
const overpass = new Overpass(new Or(overpassfilters), [], Constants.defaultOverpassUrls[0], new ImmutableStore(500) )
const features = (await overpass.queryGeoJson(bounds))[0].features
const featuresCopy = [...features]
let converted = 0 let converted = 0
const pano = this.panoramax.panoramax const total = features.length
const sequences = await pano.mySequences()
const changes: ChangeDescription[] = [] const changes: ChangeDescription[] = []
do { do {
const f = features.shift() const f = features.shift()
if (!f) { if (!f) {
break break
} }
if (converted % 100 === 0) {
console.log("Converted:", converted, "total:", total, "progress:", Math.round(converted * 100 / total) + "%")
}
const changedTags: (UploadableTag | undefined)[] = [] let changedTags: (UploadableTag | undefined)[] = []
console.log("Handling "+f.properties.id)
for (const k of ["image", "image:menu", "image:streetsign"]) { for (const k of ["image", "image:menu", "image:streetsign"]) {
changedTags.push(await this.uploadImage(k, f, sequences)) changedTags.push(await this.uploadImage(k, f))
for (let i = 0; i < 20; i++) { for (let i = 0; i < 20; i++) {
changedTags.push(await this.uploadImage(k + ":" + i, f, sequences)) changedTags.push(await this.uploadImage(k + ":" + i, f))
} }
} }
changedTags = Utils.NoNull(changedTags)
if (changedTags.length > 0) {
const action = new ChangeTagAction( const action = new ChangeTagAction(
f.properties.id, f.properties.id,
new And(Utils.NoNull(changedTags)), new And(changedTags),
f.properties, f.properties,
{ {
theme: "image-mover", theme: "image-mover",
changeType: "link-image", changeType: "link-image"
} }
) )
changes.push(...(await action.CreateChangeDescriptions())) changes.push(...(await action.CreateChangeDescriptions()))
}
converted++ converted++
} while (converted < maxcount) } while (converted < maxcount)
console.log("Uploaded images for", converted, "items; now creating the changeset")
const modif: string[] = Utils.Dedup(changes.map((ch) => ch.type + "/" + ch.id)) const modif: string[] = Utils.Dedup(changes.map((ch) => ch.type + "/" + ch.id))
const modifiedObjectsFresh = <OsmObject[]>( const modifiedObjectsFresh: OsmObject[] = []
( const dloader = new OsmObjectDownloader()
await Promise.all( for (let i = 0; i < modif.length; i++) {
modif.map((id) => new OsmObjectDownloader().DownloadObjectAsync(id)) if (i % 100 === 0) {
) console.log("Downloaded osm object", i, "/", modif.length, "(" + Math.round(i * 100 / modif.length) + "%)")
).filter((m) => m !== "deleted") }
) const id = modif[i]
const obj = await dloader.DownloadObjectAsync(id)
if (obj === "deleted") {
continue
}
modifiedObjectsFresh.push(obj)
}
const modifiedObjects = Changes.createChangesetObjectsStatic( const modifiedObjects = Changes.createChangesetObjectsStatic(
changes, changes,
modifiedObjectsFresh, modifiedObjectsFresh,
@ -177,6 +317,14 @@ export class ImgurToPanoramax extends Script {
) )
const cs = Changes.buildChangesetXML("0", modifiedObjects) const cs = Changes.buildChangesetXML("0", modifiedObjects)
writeFileSync("imgur_to_panoramax.osc", cs, "utf8") writeFileSync("imgur_to_panoramax.osc", cs, "utf8")
const usernames = featuresCopy.map(f => f.properties.user)
const hist : Record<string,number> = {}
for (const username of usernames) {
hist[username] = (hist[username] ?? 0)+ 1
}
console.log(hist)
} }
} }

View file

@ -11,6 +11,7 @@ export class Imgur extends ImageProvider {
public readonly defaultKeyPrefixes: string[] = ["image"] public readonly defaultKeyPrefixes: string[] = ["image"]
public static readonly apiUrl = "https://api.imgur.com/3/image" public static readonly apiUrl = "https://api.imgur.com/3/image"
public static readonly supportingUrls = ["https://i.imgur.com"] public static readonly supportingUrls = ["https://i.imgur.com"]
private constructor() { private constructor() {
super() super()
} }
@ -30,13 +31,44 @@ export class Imgur extends ImageProvider {
url: value, url: value,
key: key, key: key,
provider: this, provider: this,
id: value, id: value
}, }
] ]
} }
return undefined return undefined
} }
public static parseLicense(descr: string) {
const data: Record<string, string> = {}
if (!descr) {
return undefined
}
if (descr.toLowerCase() === "cc0") {
data.author = "Unknown"
data.license = "CC0"
} else {
for (const tag of descr.split("\n")) {
const kv = tag.split(":")
if (kv.length < 2) {
continue
}
const k = kv[0]
data[k] = kv[1]?.replace(/\r/g, "")
}
}
if (Object.keys(data).length === 0) {
return undefined
}
const licenseInfo = new LicenseInfo()
licenseInfo.licenseShortName = data.license
licenseInfo.artist = data.author
return licenseInfo
}
/** /**
* Download the attribution and license info for the picture at the given URL * Download the attribution and license info for the picture at the given URL
* *
@ -56,7 +88,9 @@ export class Imgur extends ImageProvider {
* *
* *
*/ */
public async DownloadAttribution(providedImage: { url: string }): Promise<LicenseInfo> { public async DownloadAttribution(providedImage: {
url: string
}, withResponse?: (obj) => void): Promise<LicenseInfo> {
const url = providedImage.url const url = providedImage.url
const hash = url.substr("https://i.imgur.com/".length).split(/\.jpe?g/i)[0] const hash = url.substr("https://i.imgur.com/".length).split(/\.jpe?g/i)[0]
@ -64,26 +98,17 @@ export class Imgur extends ImageProvider {
const response = await Utils.downloadJsonCached<{ const response = await Utils.downloadJsonCached<{
data: { description: string; datetime: string; views: number } data: { description: string; datetime: string; views: number }
}>(apiUrl, 365 * 24 * 60 * 60, { }>(apiUrl, 365 * 24 * 60 * 60, {
Authorization: "Client-ID " + Constants.ImgurApiKey, Authorization: "Client-ID " + Constants.ImgurApiKey
}) })
if (withResponse) {
const descr = response.data.description ?? "" withResponse(response)
const data: any = {}
const imgurData = response.data
for (const tag of descr.split("\n")) {
const kv = tag.split(":")
const k = kv[0]
data[k] = kv[1]?.replace(/\r/g, "")
} }
const licenseInfo = new LicenseInfo() const imgurData = response.data
const license= Imgur.parseLicense(imgurData.description ?? "")
license.views = imgurData.views
license.date = new Date(Number(imgurData.datetime) * 1000)
licenseInfo.licenseShortName = data.license return license
licenseInfo.artist = data.author
licenseInfo.date = new Date(Number(imgurData.datetime) * 1000)
licenseInfo.views = imgurData.views
return licenseInfo
} }
} }

View file

@ -145,12 +145,7 @@ export default class PanoramaxImageProvider extends ImageProvider {
) )
} }
Stores.Chronic(1500, () => hasLoading(source.data)).addCallback((_) => { Stores.Chronic(1500, () => hasLoading(source.data)).addCallback(() => {
console.log(
"Testing panoramax URLS again as some were loading",
source.data,
hasLoading(source.data)
)
super.getRelevantUrlsFor(tags, prefixes).then((data) => { super.getRelevantUrlsFor(tags, prefixes).then((data) => {
source.set(data) source.set(data)
return !hasLoading(data) return !hasLoading(data)
@ -192,9 +187,9 @@ export default class PanoramaxImageProvider extends ImageProvider {
export class PanoramaxUploader implements ImageUploader { export class PanoramaxUploader implements ImageUploader {
public readonly panoramax: AuthorizedPanoramax public readonly panoramax: AuthorizedPanoramax
maxFileSizeInMegabytes = 100 * 1000 * 1000 // 100MB maxFileSizeInMegabytes = 100 * 1000 * 1000 // 100MB
private readonly _targetSequence: Store<string> private readonly _targetSequence?: Store<string>
constructor(url: string, token: string, targetSequence: Store<string>) { constructor(url: string, token: string, targetSequence?: Store<string>) {
this._targetSequence = targetSequence this._targetSequence = targetSequence
this.panoramax = new AuthorizedPanoramax(url, token) this.panoramax = new AuthorizedPanoramax(url, token)
} }
@ -204,7 +199,8 @@ export class PanoramaxUploader implements ImageUploader {
currentGps: [number, number], currentGps: [number, number],
author: string, author: string,
noblur: boolean = false, noblur: boolean = false,
sequenceId?: string sequenceId?: string,
datetime?: string
): Promise<{ ): Promise<{
key: string key: string
value: string value: string
@ -213,7 +209,7 @@ export class PanoramaxUploader implements ImageUploader {
// https://panoramax.openstreetmap.fr/api/docs/swagger#/ // https://panoramax.openstreetmap.fr/api/docs/swagger#/
let [lon, lat] = currentGps let [lon, lat] = currentGps
let datetime = new Date().toISOString() datetime ??= new Date().toISOString()
try { try {
const tags = await ExifReader.load(blob) const tags = await ExifReader.load(blob)
const [[latD], [latM], [latS, latSDenom]] = < const [[latD], [latM], [latS, latSDenom]] = <

View file

@ -1291,6 +1291,19 @@ In the case that MapComplete is pointed to the testing grounds, the edit will be
return newD return newD
} }
/**
*
* {"a": "b", "c":"d"} // => {"b":"a", "d":"c"}
*/
public static transposeMapSimple<K extends string, V extends string>(d: Record<K, V>): Record<V, K>{
const inv = <Record<V, K>> {}
for (const k in d) {
const v = d[k]
inv[v] = k
}
return inv
}
/** /**
* Utils.colorAsHex({r: 255, g: 128, b: 0}) // => "#ff8000" * Utils.colorAsHex({r: 255, g: 128, b: 0}) // => "#ff8000"
* Utils.colorAsHex(undefined) // => undefined * Utils.colorAsHex(undefined) // => undefined