MapComplete/Docs/Tools/GenerateSeries.ts

215 lines
7.6 KiB
TypeScript
Raw Normal View History

import {existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync} from "fs";
2021-08-22 15:53:05 +02:00
import ScriptUtils from "../../scripts/ScriptUtils";
import {Utils} from "../../Utils";
2022-02-14 01:15:20 +01:00
ScriptUtils.fixUtils()
2021-08-22 15:53:05 +02:00
class StatsDownloader {
private readonly urlTemplate = "https://osmcha.org/api/v1/changesets/?date__gte={start_date}&date__lte={end_date}&page={page}&comment=%23mapcomplete&page_size=100"
2021-08-22 15:53:05 +02:00
private readonly _targetDirectory: string;
constructor(targetDirectory = ".") {
this._targetDirectory = targetDirectory;
}
public async DownloadStats(startYear = 2020, startMonth = 5, startDay = 1) {
2021-08-22 15:53:05 +02:00
2022-07-16 01:01:51 +02:00
const today = new Date();
const currentYear = today.getFullYear()
const currentMonth = today.getMonth() + 1
2022-08-20 18:27:25 +02:00
for (let year = startYear; year <= currentYear; year++) {
2021-08-22 15:53:05 +02:00
for (let month = 1; month <= 12; month++) {
2022-08-20 18:27:25 +02:00
if (year === startYear && month < startMonth) {
2021-08-22 15:53:05 +02:00
continue;
}
if (year === currentYear && month > currentMonth) {
2022-07-16 01:01:51 +02:00
break
2021-08-22 15:53:05 +02:00
}
2022-07-16 01:01:51 +02:00
const pathM = `${this._targetDirectory}/stats.${year}-${month}.json`
if (existsSync(pathM)) {
continue;
}
2022-08-20 12:46:33 +02:00
const features = []
for (let day = startDay; day <= 31; day++) {
2022-08-20 18:27:25 +02:00
2022-08-20 12:46:33 +02:00
if (year === currentYear && month === currentMonth && day === today.getDate()) {
2022-07-16 01:01:51 +02:00
break;
2021-08-22 15:53:05 +02:00
}
2022-08-20 18:27:25 +02:00
{
const date = new Date(year, month - 1, day)
if(date.getMonth() != month -1){
// We did roll over
continue
}
}
2022-08-20 12:46:33 +02:00
const path = `${this._targetDirectory}/stats.${year}-${month}-${(day < 10 ? "0" : "") + day}.day.json`
if (existsSync(path)) {
let features = JSON.parse(readFileSync(path, "UTF-8"))
features = features?.features ?? features
console.log(features)
features.push(...features.features ) // day-stats are generally a list already, but in some ad-hoc cases might be a geojson-collection too
2022-08-20 12:46:33 +02:00
console.log("Loaded ", path, "from disk, got", features.length, "features now")
2022-07-16 01:01:51 +02:00
continue
}
2022-08-20 12:46:33 +02:00
let dayFeatures: any[] = undefined
try {
dayFeatures = await this.DownloadStatsForDay(year, month, day, path)
} catch (e) {
2022-07-29 23:25:22 +02:00
console.error(e)
2022-08-20 12:46:33 +02:00
console.error("Could not download " + year + "-" + month + "-" + day + "... Trying again")
dayFeatures = await this.DownloadStatsForDay(year, month, day, path)
2022-07-29 23:25:22 +02:00
}
2022-08-20 12:46:33 +02:00
writeFileSync(path, JSON.stringify(dayFeatures))
features.push(...dayFeatures)
2021-08-22 15:53:05 +02:00
}
2022-08-20 18:27:25 +02:00
writeFileSync(pathM, JSON.stringify({features}))
2021-08-22 15:53:05 +02:00
}
startDay = 1
2021-08-22 15:53:05 +02:00
}
}
2022-08-20 12:46:33 +02:00
public async DownloadStatsForDay(year: number, month: number, day: number, path: string): Promise<any[]> {
2021-08-22 15:53:05 +02:00
let page = 1;
let allFeatures = []
2022-08-20 12:46:33 +02:00
let endDay = new Date(year, month - 1 /* Zero-indexed: 0 = january*/, day + 1);
let endDate = `${endDay.getFullYear()}-${Utils.TwoDigits(endDay.getMonth() + 1)}-${Utils.TwoDigits(endDay.getDate())}`
2022-07-16 01:01:51 +02:00
let url = this.urlTemplate.replace("{start_date}", year + "-" + Utils.TwoDigits(month) + "-" + Utils.TwoDigits(day))
2021-08-22 15:53:05 +02:00
.replace("{end_date}", endDate)
.replace("{page}", "" + page)
let headers = {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0',
'Accept-Language': 'en-US,en;q=0.5',
'Referer': 'https://osmcha.org/?filters=%7B%22date__gte%22%3A%5B%7B%22label%22%3A%222020-07-05%22%2C%22value%22%3A%222020-07-05%22%7D%5D%2C%22editor%22%3A%5B%7B%22label%22%3A%22mapcomplete%22%2C%22value%22%3A%22mapcomplete%22%7D%5D%7D',
'Content-Type': 'application/json',
'Authorization': 'Token 6e422e2afedb79ef66573982012000281f03dc91',
'DNT': '1',
'Connection': 'keep-alive',
'TE': 'Trailers',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache'
}
while (url) {
2022-07-16 01:01:51 +02:00
ScriptUtils.erasableLog(`Downloading stats for ${year}-${month}-${day}, page ${page} ${url}`)
2022-02-14 01:15:20 +01:00
const result = await Utils.downloadJson(url, headers)
2021-08-22 15:53:05 +02:00
page++;
allFeatures.push(...result.features)
if (result.features === undefined) {
console.log("ERROR", result)
return
}
url = result.next
}
2022-01-16 02:52:46 +01:00
console.log(`Writing ${allFeatures.length} features to `, path, Utils.Times(_ => " ", 80))
allFeatures = Utils.NoNull(allFeatures)
allFeatures.forEach(f => {
2022-08-20 12:46:33 +02:00
f.properties = {...f.properties, ...f.properties.metadata}
delete f.properties.metadata
2022-01-16 02:52:46 +01:00
f.properties.id = f.id
})
2022-08-20 12:46:33 +02:00
return allFeatures
2021-08-22 15:53:05 +02:00
}
}
interface ChangeSetData {
"id": number,
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [number, number][][]
},
"properties": {
"check_user": null,
"reasons": [],
"tags": [],
"features": [],
"user": string,
"uid": string,
"editor": string,
"comment": string,
"comments_count": number,
"source": string,
"imagery_used": string,
"date": string,
"reviewed_features": [],
"create": number,
"modify": number,
"delete": number,
"area": number,
"is_suspect": boolean,
"harmful": any,
"checked": boolean,
"check_date": any,
"metadata": {
"host": string,
"theme": string,
"imagery": string,
"language": string
}
}
}
2022-03-17 16:40:53 +01:00
async function main(): Promise<void> {
if (!existsSync("graphs")) {
mkdirSync("graphs")
}
2022-03-17 16:40:53 +01:00
2022-07-16 01:01:51 +02:00
const targetDir = "Docs/Tools/stats"
2022-08-20 18:27:25 +02:00
let year = 2020
let month = 5
let day = 1
2022-08-20 18:27:25 +02:00
if(!isNaN(Number(process.argv[2]))){
year = Number(process.argv[2])
2022-03-14 02:48:50 +01:00
}
2022-08-20 18:27:25 +02:00
if(!isNaN(Number(process.argv[3]))){
month = Number(process.argv[3])
}
if(!isNaN(Number(process.argv[4]))){
day = Number(process.argv[4])
}
2022-08-20 18:27:25 +02:00
do {
try {
await new StatsDownloader(targetDir).DownloadStats(year, month, day)
2022-08-20 18:27:25 +02:00
break
} catch (e) {
console.log(e)
}
} while (true)
2022-07-16 01:01:51 +02:00
const allPaths = readdirSync(targetDir)
2022-01-16 02:52:46 +01:00
.filter(p => p.startsWith("stats.") && p.endsWith(".json"));
let allFeatures: ChangeSetData[] = [].concat(...allPaths
2022-08-18 23:37:44 +02:00
.map(path => JSON.parse(readFileSync("Docs/Tools/stats/" + path, "utf-8")).features));
2022-08-20 18:27:25 +02:00
allFeatures = allFeatures.filter(f => f?.properties !== undefined && (f.properties.editor === null || f.properties.editor.toLowerCase().startsWith("mapcomplete")))
2022-01-16 02:52:46 +01:00
2022-08-22 14:33:04 +02:00
allFeatures = allFeatures.filter(f => f.properties.metadata?.theme !== "EMPTY CS")
2022-01-16 02:52:46 +01:00
2022-03-17 16:40:53 +01:00
if (process.argv.indexOf("--no-graphs") >= 0) {
return
2022-03-14 02:48:50 +01:00
}
2022-07-29 23:25:22 +02:00
const allFiles = readdirSync("Docs/Tools/stats").filter(p => p.endsWith(".json"))
writeFileSync("Docs/Tools/stats/file-overview.json", JSON.stringify(allFiles))
2022-08-20 12:46:33 +02:00
2022-01-16 02:52:46 +01:00
}
2021-08-22 15:53:05 +02:00
2022-01-16 02:52:46 +01:00
main().then(_ => console.log("All done!"))
2021-08-22 15:53:05 +02:00