MapComplete/scripts/importscripts/openbenches.ts

465 lines
16 KiB
TypeScript

import Script from "../Script"
import { existsSync, promises as fs, readFileSync, writeFile, writeFileSync } from "fs"
import { Feature, FeatureCollection, Point, Polygon } from "geojson"
import { join } from "path"
import sqlite3, { Database } from "sqlite3"
import { open } from "sqlite"
import { Lists } from "../../src/Utils/Lists"
import { Overpass } from "../../src/Logic/Osm/Overpass"
import { TagUtils } from "../../src/Logic/Tags/TagUtils"
import Constants from "../../src/Models/Constants"
import { BBox } from "../../src/Logic/BBox"
import LinkImageAction from "../../src/Logic/Osm/Actions/LinkImageAction"
import OsmChangeAction from "../../src/Logic/Osm/Actions/OsmChangeAction"
import ChangeTagAction from "../../src/Logic/Osm/Actions/ChangeTagAction"
import { Tag as OsmTag } from "../../src/Logic/Tags/Tag"
import { Changes } from "../../src/Logic/Osm/Changes"
import ScriptUtils from "../ScriptUtils"
import { GeoOperations } from "../../src/Logic/GeoOperations"
/**
* Note:
* npm i sqlite sqlite3
* I didn't want this into the deps
* "sqlite": "^5.1.1",
* "sqlite3": "^5.1.7",
*/
interface Bench {
benchID: number,
latitude: number,
longitude: number,
address: string,
inscription: string,
description: string,
present: 0 | 1,
published: 0 | 1,
/* time of creation (or possibly last edit?) */
added: string,
userID: number
}
interface User {
name: string,
providerID: string,
provider: string,
userID: number
}
interface Tag {
tagID: number,
tagText: string
}
function mediaUrl(sha: string | { "sha1": string }): string {
if (sha["sha1"]) {
sha = sha["sha1"]
}
return `https://openbenches.org/image/${sha}.jpg`
}
const uk: Feature<Polygon> = {
"type": "Feature",
"properties": {},
"geometry": {
"coordinates": [
[
[
3.139397666817615,
53.112746745001914
],
[
0.12546232547020963,
61.34289409315957
],
[
-5.193638926198332,
60.3858935023425
],
[
-12.316831332595541,
56.76308878364702
],
[
-12.586640816376246,
51.076733390490034
],
[
-3.6443836396576046,
49.4256703574342
],
[
1.0194660085441853,
50.442813369706585
],
[
3.139397666817615,
53.112746745001914
]
]
],
"type": "Polygon"
}
}
const us : Feature<Polygon> = {
"type": "Feature",
"properties": {},
"geometry": {
"coordinates": [
[
[
-171.55472370762342,
71.44263911390138
],
[
-171.31347027402668,
33.24735774004321
],
[
-105.9804086342826,
-3.5292610992716362
],
[
-57.00596161415962,
15.805666337324794
],
[
-32.880618254493015,
49.584578264365916
],
[
-47.35582427029317,
72.85409976292118
],
[
-101.60890406091582,
79.0557752859543
],
[
-171.55472370762342,
71.44263911390138
]
]
],
"type": "Polygon"
}
}
const australia: Feature<Polygon> = {
"type": "Feature",
"properties": {},
"geometry": {
"coordinates": [
[
[
177.6309142850211,
-48.72845301037672
],
[
177.6309142850211,
-8.050870320392335
],
[
107.59695622498174,
-8.050870320392335
],
[
107.59695622498174,
-48.72845301037672
],
[
177.6309142850211,
-48.72845301037672
]
]
],
"type": "Polygon"
}
}
const areas = {uk, us, australia}
class Openbenches extends Script {
private db: Database
constructor() {
super("Creates the OpenBenches dataset to upload to maproulette")
}
async buildDatabase(sqlDir: string, dbFile: string) {
const db = await open({
filename: dbFile,
driver: sqlite3.Database,
})
const files = await fs.readdir(sqlDir)
const sqlFiles = files.filter(f => f.endsWith(".sql"))
const skip = ["database.sql"]
const order = ["tags", "users", "tag_map", "media_types", "benches", "media"]
for (let file of order) {
console.log("Exec file", file)
file = "openbenc_benches_table_" + file + ".sql"
let content = await fs.readFile(join(sqlDir, file), "utf-8")
content = content.replaceAll("ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci", "")
.replaceAll("\\'", "''")
await db.exec(content)
}
await db.close()
console.log("DB has been seeded")
}
all<T>(query): Promise<T[]> {
return new Promise<T[]>((resolve, reject) => {
this.db.all(query, (err, rows) => {
if (err) {
reject(err)
} else {
resolve(<any>rows)
}
})
})
}
async loadDb(dbFile: string): Promise<Database> {
const db = await open({
filename: dbFile,
driver: sqlite3.Database,
})
return <any>db.db
}
async createBenchInfo(benchWithUser: Bench & User, tags: string[]): Promise<Feature<Point>> {
const id = benchWithUser.benchID
const media = await this.all<{
sha1: string,
media_type: string
}>("SELECT * FROM media WHERE media.benchID = " + id)
const mediaBench = media.filter(m => m.media_type === "bench")
const mediaInscr = media.filter(m => m.media_type === "inscription")
const mediaView = media.filter(m => m.media_type === "view")
const inscription = benchWithUser.inscription.replaceAll("\\r\\n", "\n")
const properties = {
lastModifiedTime: benchWithUser.added,
"openbenches:id": id,
inscription: inscription.slice(0,255),
amenity: "bench",
lastModifiedBy: benchWithUser.name,
}
if(inscription.length >= 255){
properties["inscription:0"] = inscription.slice(255)
}
let mediaMerged = Lists.dedup(mediaBench.concat(mediaInscr).map(m => mediaUrl(m)))
for (let i = 0; i < mediaMerged.length; i++) {
const m = mediaMerged[i]
if (i === 0) {
properties["image"] = m
} else {
properties["image:" + (i - 1)] = m
}
}
for (let i = 0; i < mediaView.length; i++) {
const m = mediaView[i]
if (i === 0) {
properties["image:view"] = mediaUrl(m)
} else {
properties["image:view:" + (i - 1)] = mediaUrl(m)
}
}
const tagsToProperties = {
"wooden": "material=wood",
"metal": "material=metal",
"indoors": "indoor=yes",
"stone": "material=stone",
"poem": "artwork=poem",
"statue": "artwork=statue",
"composite": "material=plastic",
/*"cat":"subject=cat",
"dog":"subject=dog" Not always a pet, sometimes also a 'dogwalker', someone mentioning their cat, ... */
// EMOJI: very broad category, basically that a little image is part of the 'inscription'. Should be handled by adding the emoji directly
// Twinned: basically, two people are remembered, often a couple -> inscription and/or subject handles this
// Picture: plaque has a little picture -> subset of plaque
// Famous: someone "famous" is remembered, although I don't know half of 'm. Too subjective for OSM
// FUnny: talk about subjective...
}
for (const tag of (tags ?? [])) {
const match = tagsToProperties[tag]
if (!match) {
continue
}
const [k, v] = match.split("=")
properties[k] = v
tags.splice(tags.indexOf(tag), 1)
}
return {
type: "Feature",
properties,
geometry: {
type: "Point",
coordinates: [benchWithUser.longitude, benchWithUser.latitude],
},
}
}
async getAlreadyImported(): Promise<FeatureCollection> {
const alreadyImportedPath = "openbenches_linked_in_osm.geojson"
if (!existsSync(alreadyImportedPath)) {
const overpass = new Overpass(Constants.defaultOverpassUrls[0], TagUtils.Tag("openbenches:id~*"))
const dataAndDate = await overpass.queryGeoJson(BBox.global)
const data = dataAndDate[0]
writeFileSync(alreadyImportedPath, JSON.stringify(data), "utf-8")
return data
}
return JSON.parse(readFileSync(alreadyImportedPath, "utf-8"))
}
async conflate(osmData: Feature[], openBenchesData: FeatureCollection, area: string = "") {
const dict: Map<string, Feature> = new Map()
for (const bench of openBenchesData.features) {
const obid = bench.properties["openbenches:id"]
dict.set("" + obid, bench)
}
const changes: OsmChangeAction[] = []
for (const bench of osmData) {
const obid = bench.properties["openbenches:id"]
const ob = dict.get(obid)
if (!ob) {
console.log("No match found for", obid, "https://osm.org/" + bench.properties.id)
continue
}
// console.log(`https://osm.org/${bench.properties.id} = https://openbenches.org/bench/${obid}`)
for (const key in ob.properties) {
if (key.startsWith("lastModified")) {
continue
}
if (key.startsWith("image")) {
const imgValue = ob.properties[key]
if (Object.values(bench.properties).some(v => v === imgValue || (v + ".jpg") === imgValue)) {
continue
}
let ikey = "image"
let i = -1
while (bench.properties[ikey]) {
i++
ikey = "image:" + i
}
const li = new ChangeTagAction(bench.properties.id, new OsmTag(ikey, imgValue), bench.properties, {
theme: "openbenches",
changeType: "link-image",
})
changes.push(li)
bench.properties[ikey] = imgValue
console.log(` + ${ikey}=${imgValue}`)
} else if (!bench.properties[key]) {
const v = ob.properties[key]
if(v.length >= 255){
console.log("Text too long:", v.replaceAll("\n"," "))
continue
}
changes.push(new ChangeTagAction(
bench.properties.id,
new OsmTag(key, v),
bench.properties,
{
theme: "openbenches",
changeType: "answer",
},
))
console.log(` - ${key}=${ob.properties[key].replaceAll("\n", " ")}`)
}
}
}
if(changes.length === 0){
return
}
const xml = await Changes.createChangesetXMLForJosm(changes)
writeFileSync(`attributes_import${area}.osc`,xml, "utf-8")
}
async main(args: string[]): Promise<void> {
const dbFile = "openbenches.sqlite"
let createTest = false
const osmData = await this.getAlreadyImported()
// rmSync(dbFile)
if(!existsSync(dbFile)){
console.log("No database file found at "+dbFile+", recreating the database")
await this.buildDatabase("/home/pietervdvn/git/openbenches.org/database", dbFile)
}
const alreadyLinked: Set<number> = new Set(osmData.features.map(f => Number(f.properties["openbenches:id"])))
this.db = await this.loadDb(dbFile)
const tags = new Map<number, string>()
const tagRows = await this.all<Tag>("SELECT * FROM tags")
for (const tag of tagRows) {
tags.set(tag.tagID, tag.tagText)
}
const tagsOnBenches = new Map<number, string[]>()
const tagOnBench = await this.all<{ benchID: number, tagID: number }>("SELECT * from tag_map")
for (const tg of tagOnBench) {
const bench = tg.benchID
if (!tagsOnBenches.has(bench)) {
tagsOnBenches.set(bench, [])
}
tagsOnBenches.get(bench).push(tags.get(tg.tagID))
}
const openbenches = await this.all<Bench & User>("SELECT * FROM benches INNER JOIN users ON benches.userID = users.userID")
const features: Feature<Point>[] = []
let skipped = 0
for (let i = 0; i < openbenches.length; i++) {
if(alreadyLinked.has(i)){
skipped++
continue
}
const benchWithUser = openbenches[i]
if (benchWithUser.present === 0 || benchWithUser.published === 0) {
continue
}
const tags = tagsOnBenches.get(benchWithUser.benchID)
if (i % 100 === 0) {
ScriptUtils.erasableLog(`Processing bench ${i}/${openbenches.length} (${Math.round(100 * i / openbenches.length)}%) `)
}
features.push(await this.createBenchInfo(benchWithUser, tags))
if (createTest && features.length > 1000) {
break
}
}
/*
writeFileSync(`openbenches_export_josm_${createTest ? "_test" : ""}.geojson`, JSON.stringify({
type: "FeatureCollection", features,
}, null, " "), "utf-8")*/
const maproulette = features
.map(f => {
const properties = {tags: JSON.stringify(f.properties)}
properties["id"] = "openbenches/"+f.properties["openbenches:id"]
return {...f, properties}
})
console.log("Skipped",skipped,"benches as already linked/imported")
writeFileSync(`openbenches_export_maproulette${createTest ? "_test" : ""}.geojson`, JSON.stringify({
type: "FeatureCollection", features: maproulette,
}, null, " "), "utf-8")
await this.conflate(osmData.features, { type: "FeatureCollection", features }, "_all")
}
}
new Openbenches().run()