forked from MapComplete/MapComplete
New scripts to generate statistics
This commit is contained in:
parent
7b595e65c6
commit
a2f3b967b7
108 changed files with 336663 additions and 4921 deletions
620
Docs/Tools/GenerateSeries.ts
Normal file
620
Docs/Tools/GenerateSeries.ts
Normal file
|
@ -0,0 +1,620 @@
|
|||
import {existsSync, readdirSync, readFileSync, writeFileSync} from "fs";
|
||||
import ScriptUtils from "../../scripts/ScriptUtils";
|
||||
import {Utils} from "../../Utils";
|
||||
import {exec} from "child_process"
|
||||
import {GeoOperations} from "../../Logic/GeoOperations";
|
||||
|
||||
class StatsDownloader {
|
||||
|
||||
private readonly startYear = 2020
|
||||
private readonly startMonth = 5;
|
||||
private readonly urlTemplate = "https://osmcha.org/api/v1/changesets/?date__gte={start_date}&date__lte={end_date}&page={page}&editor=mapcomplete&page_size=100"
|
||||
private readonly _targetDirectory: string;
|
||||
|
||||
|
||||
constructor(targetDirectory = ".") {
|
||||
this._targetDirectory = targetDirectory;
|
||||
}
|
||||
|
||||
public async DownloadStats() {
|
||||
|
||||
const currentYear = new Date().getFullYear()
|
||||
const currentMonth = new Date().getMonth() + 1
|
||||
for (let year = this.startYear; year <= currentYear; year++) {
|
||||
for (let month = 1; month <= 12; month++) {
|
||||
|
||||
if (year === this.startYear && month < this.startMonth) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (year === currentYear && month > currentMonth) {
|
||||
continue
|
||||
}
|
||||
|
||||
const path = `${this._targetDirectory}/stats.${year}-${month}.json`
|
||||
if (existsSync(path)) {
|
||||
if ((month == currentMonth && year == currentYear)) {
|
||||
console.log(`Force downloading ${year}-${month}`)
|
||||
} else {
|
||||
console.log(`Skipping ${year}-${month}: already exists`)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
await this.DownloadStatsForMonth(year, month, path)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public async DownloadStatsForMonth(year: number, month: number, path: string) {
|
||||
|
||||
let page = 1;
|
||||
let allFeatures = []
|
||||
let endDate = `${year}-${Utils.TwoDigits(month + 1)}-01`
|
||||
if (month == 12) {
|
||||
endDate = `${year + 1}-01-01`
|
||||
}
|
||||
let url = this.urlTemplate.replace("{start_date}", year + "-" + Utils.TwoDigits(month) + "-01")
|
||||
.replace("{end_date}", endDate)
|
||||
.replace("{page}", "" + page)
|
||||
|
||||
|
||||
let headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Referer': 'https://osmcha.org/?filters=%7B%22date__gte%22%3A%5B%7B%22label%22%3A%222020-07-05%22%2C%22value%22%3A%222020-07-05%22%7D%5D%2C%22editor%22%3A%5B%7B%22label%22%3A%22mapcomplete%22%2C%22value%22%3A%22mapcomplete%22%7D%5D%7D',
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Token 6e422e2afedb79ef66573982012000281f03dc91',
|
||||
'DNT': '1',
|
||||
'Connection': 'keep-alive',
|
||||
'TE': 'Trailers',
|
||||
'Pragma': 'no-cache',
|
||||
'Cache-Control': 'no-cache'
|
||||
}
|
||||
|
||||
|
||||
while (url) {
|
||||
ScriptUtils.erasableLog(`Downloading stats for ${year}-${month}, page ${page} ${url}`)
|
||||
const result = await ScriptUtils.DownloadJSON(url, {
|
||||
headers: headers
|
||||
})
|
||||
page++;
|
||||
allFeatures.push(...result.features)
|
||||
if (result.features === undefined) {
|
||||
console.log("ERROR", result)
|
||||
return
|
||||
}
|
||||
url = result.next
|
||||
}
|
||||
console.log(`Writing ${allFeatures.length} features to `, path, " ")
|
||||
writeFileSync(path, JSON.stringify({
|
||||
features: allFeatures
|
||||
}, undefined, 2))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
interface ChangeSetData {
|
||||
"id": number,
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Polygon",
|
||||
"coordinates": [number, number][][]
|
||||
},
|
||||
"properties": {
|
||||
"check_user": null,
|
||||
"reasons": [],
|
||||
"tags": [],
|
||||
"features": [],
|
||||
"user": string,
|
||||
"uid": string,
|
||||
"editor": string,
|
||||
"comment": string,
|
||||
"comments_count": number,
|
||||
"source": string,
|
||||
"imagery_used": string,
|
||||
"date": string,
|
||||
"reviewed_features": [],
|
||||
"create": number,
|
||||
"modify": number,
|
||||
"delete": number,
|
||||
"area": number,
|
||||
"is_suspect": boolean,
|
||||
"harmful": any,
|
||||
"checked": boolean,
|
||||
"check_date": any,
|
||||
"metadata": {
|
||||
"host": string,
|
||||
"theme": string,
|
||||
"imagery": string,
|
||||
"language": string
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const theme_remappings = {
|
||||
"metamap": "maps",
|
||||
"groen": "buurtnatuur",
|
||||
"updaten van metadata met mapcomplete": "buurtnatuur",
|
||||
"Toevoegen of dit natuurreservaat toegangkelijk is": "buurtnatuur",
|
||||
"wiki:mapcomplete/fritures": "fritures",
|
||||
"wiki:MapComplete/Fritures": "fritures",
|
||||
"lits": "lit",
|
||||
"pomp": "cyclofix",
|
||||
"wiki:user:joost_schouppe/campersite": "campersite",
|
||||
"wiki-user-joost_schouppe-geveltuintjes": "geveltuintjes",
|
||||
"wiki-user-joost_schouppe-campersite": "campersite",
|
||||
"wiki-User-joost_schouppe-campersite": "campersite",
|
||||
"wiki-User-joost_schouppe-geveltuintjes": "geveltuintjes",
|
||||
"wiki:User:joost_schouppe/campersite": "campersite",
|
||||
"arbres": "arbres_llefia",
|
||||
"aed_brugge": "aed",
|
||||
"https://llefia.org/arbres/mapcomplete.json": "arbres_llefia",
|
||||
"https://llefia.org/arbres/mapcomplete1.json": "arbres_llefia",
|
||||
"toevoegen of dit natuurreservaat toegangkelijk is": "buurtnatuur",
|
||||
"testing mapcomplete 0.0.0": "buurtnatuur",
|
||||
"https://raw.githubusercontent.com/osmbe/play/master/mapcomplete/geveltuinen/geveltuinen.json": "geveltuintjes"
|
||||
}
|
||||
|
||||
class ChangesetDataTools {
|
||||
|
||||
public static cleanChangesetData(cs: ChangeSetData): ChangeSetData {
|
||||
if (cs.properties.metadata.theme === undefined) {
|
||||
cs.properties.metadata.theme = cs.properties.comment.substr(cs.properties.comment.lastIndexOf("#") + 1)
|
||||
}
|
||||
cs.properties.metadata.theme = cs.properties.metadata.theme.toLowerCase()
|
||||
const remapped = theme_remappings[cs.properties.metadata.theme]
|
||||
cs.properties.metadata.theme = remapped ?? cs.properties.metadata.theme
|
||||
if (cs.properties.metadata.theme.startsWith("https://raw.githubusercontent.com/")) {
|
||||
cs.properties.metadata.theme = "gh://" + cs.properties.metadata.theme.substr("https://raw.githubusercontent.com/".length)
|
||||
}
|
||||
if (cs.properties.modify + cs.properties.delete + cs.properties.create == 0) {
|
||||
cs.properties.metadata.theme = "EMPTY CS"
|
||||
}
|
||||
return cs
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
interface PlotSpec {
|
||||
name: string,
|
||||
interpetKeysAs: "date" | "number" | "string" | string
|
||||
plot: {
|
||||
type: "pie" | "bar"
|
||||
count: { key: string, value: number }[]
|
||||
} | {
|
||||
type: "stacked-bar"
|
||||
count: {
|
||||
label: string,
|
||||
values: { key: string | Date, value: number }[]
|
||||
}[]
|
||||
},
|
||||
|
||||
render()
|
||||
}
|
||||
|
||||
|
||||
function createGraph(
|
||||
title: string,
|
||||
...options: PlotSpec[]) {
|
||||
const process = exec("python GenPlot.py \"graphs/" + title + "\"", ((error, stdout, stderr) => {
|
||||
console.log("Python: ", stdout)
|
||||
if (error !== null) {
|
||||
console.error(error)
|
||||
}
|
||||
if (stderr !== "") {
|
||||
console.error(stderr)
|
||||
}
|
||||
}))
|
||||
|
||||
for (const option of options) {
|
||||
process.stdin._write(JSON.stringify(option) + "\n", "utf-8", undefined)
|
||||
}
|
||||
process.stdin._write("\n", "utf-8", undefined)
|
||||
|
||||
}
|
||||
|
||||
class Histogram<K> {
|
||||
total(): number {
|
||||
let total = 0
|
||||
Array.from(this.counts.values()).forEach(i => total = total + i)
|
||||
return total
|
||||
}
|
||||
|
||||
public counts: Map<K, number> = new Map<K, number>()
|
||||
private sortAtEnd: K[] = []
|
||||
|
||||
constructor(keys?: K[]) {
|
||||
const self = this
|
||||
keys?.forEach(key => self.bump(key))
|
||||
}
|
||||
|
||||
|
||||
public bump(key: K, increase = 1) {
|
||||
if (this.counts.has(key)) {
|
||||
this.counts.set(key, increase + this.counts.get(key))
|
||||
} else {
|
||||
this.counts.set(key, increase)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds all the values of the given histogram to this histogram
|
||||
* @param hist
|
||||
*/
|
||||
public bumpHist(hist: Histogram<K>) {
|
||||
const self = this
|
||||
hist.counts.forEach((value, key) => {
|
||||
self.bump(key, value)
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new histogram. All entries with less then 'cutoff' count are lumped together into the 'other' category
|
||||
*/
|
||||
public createOthersCategory(otherName: K, cutoff: number | ((key: K, value: number) => boolean) = 15): Histogram<K> {
|
||||
const hist = new Histogram<K>()
|
||||
hist.sortAtEnd.push(otherName)
|
||||
|
||||
if (typeof cutoff === "number") {
|
||||
this.counts.forEach((value, key) => {
|
||||
if (value <= cutoff) {
|
||||
hist.bump(otherName, value)
|
||||
} else {
|
||||
hist.bump(key, value)
|
||||
}
|
||||
})
|
||||
} else {
|
||||
this.counts.forEach((value, key) => {
|
||||
if (cutoff(key, value)) {
|
||||
hist.bump(otherName, value)
|
||||
} else {
|
||||
hist.bump(key, value)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return hist;
|
||||
}
|
||||
|
||||
public addCountToName(): Histogram<string> {
|
||||
const self = this;
|
||||
const hist = new Histogram<string>()
|
||||
hist.sortAtEnd = this.sortAtEnd.map(name => `${name} (${self.counts.get(name)})`)
|
||||
|
||||
this.counts.forEach((value, key) => {
|
||||
hist.bump(`${key} (${value})`, value)
|
||||
})
|
||||
|
||||
return hist;
|
||||
}
|
||||
|
||||
public Clone(): Histogram<K> {
|
||||
const hist = new Histogram<K>()
|
||||
hist.bumpHist(this)
|
||||
hist.sortAtEnd = [...this.sortAtEnd];
|
||||
return hist;
|
||||
}
|
||||
|
||||
public keyToDate(addMissingDays: boolean = false): Histogram<Date> {
|
||||
const hist = new Histogram<Date>()
|
||||
hist.sortAtEnd = this.sortAtEnd.map(name => new Date("" + name))
|
||||
|
||||
let earliest = undefined;
|
||||
let latest = undefined;
|
||||
this.counts.forEach((value, key) => {
|
||||
const d = new Date("" + key);
|
||||
if (earliest === undefined) {
|
||||
earliest = d
|
||||
} else if (d < earliest) {
|
||||
earliest = d
|
||||
}
|
||||
if (latest === undefined) {
|
||||
latest = d
|
||||
} else if (d > latest) {
|
||||
latest = d
|
||||
}
|
||||
hist.bump(d, value)
|
||||
})
|
||||
|
||||
if (addMissingDays) {
|
||||
while (earliest < latest) {
|
||||
earliest.setDate(earliest.getDate() + 1)
|
||||
hist.bump(earliest, 0)
|
||||
}
|
||||
}
|
||||
return hist
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a histogram:
|
||||
* 'a': 3
|
||||
* 'b': 5
|
||||
* 'c': 3
|
||||
* 'd': 1
|
||||
*
|
||||
* This will create a new histogram, which counts how much every count occurs, thus:
|
||||
* 5: 1 // as only 'b' had 5 counts
|
||||
* 3: 2 // as both 'a' and 'c' had 3 counts
|
||||
* 1: 1 // as only 'd' has 1 count
|
||||
*/
|
||||
public binPerCount(): Histogram<number> {
|
||||
const hist = new Histogram<number>()
|
||||
this.counts.forEach((value) => {
|
||||
hist.bump(value)
|
||||
})
|
||||
return hist;
|
||||
}
|
||||
|
||||
public stringifyName(): Histogram<string> {
|
||||
const hist = new Histogram<string>()
|
||||
this.counts.forEach((value, key) => {
|
||||
hist.bump("" + key, value)
|
||||
})
|
||||
return hist;
|
||||
}
|
||||
|
||||
public asPie(options: {
|
||||
name: string
|
||||
compare?: (a: K, b: K) => number
|
||||
}): PlotSpec {
|
||||
const self = this
|
||||
const entriesArray = Array.from(this.counts.entries())
|
||||
let type: string = (typeof entriesArray[0][0])
|
||||
if (entriesArray[0][0] instanceof Date) {
|
||||
type = "date"
|
||||
}
|
||||
const entries = entriesArray.map(kv => {
|
||||
return ({key: kv[0], value: kv[1]});
|
||||
})
|
||||
|
||||
if (options.compare) {
|
||||
entries.sort((a, b) => options.compare(a.key, b.key))
|
||||
} else {
|
||||
entries.sort((a, b) => b.value - a.value)
|
||||
}
|
||||
entries.sort((a, b) => self.sortAtEnd.indexOf(a.key) - self.sortAtEnd.indexOf(b.key))
|
||||
|
||||
|
||||
const graph: PlotSpec = {
|
||||
name: options.name,
|
||||
interpetKeysAs: type,
|
||||
plot: {
|
||||
|
||||
type: "pie",
|
||||
count: entries.map(kv => {
|
||||
if (kv.key instanceof Date) {
|
||||
return ({key: kv.key.toISOString(), value: kv.value})
|
||||
}
|
||||
return ({key: kv.key + "", value: kv.value});
|
||||
})
|
||||
},
|
||||
render: undefined
|
||||
}
|
||||
graph.render = () => createGraph(graph.name, graph)
|
||||
return graph;
|
||||
}
|
||||
|
||||
public asBar(options: {
|
||||
name: string
|
||||
compare?: (a: K, b: K) => number
|
||||
}): PlotSpec {
|
||||
const spec = this.asPie(options)
|
||||
spec.plot.type = "bar"
|
||||
return spec;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class Group<K, V> {
|
||||
|
||||
public groups: Map<K, V[]> = new Map<K, V[]>()
|
||||
|
||||
constructor(features?: any[], fkey?: (feature: any) => K, fvalue?: (feature: any) => V) {
|
||||
const self = this;
|
||||
features?.forEach(f => {
|
||||
self.bump(fkey(f), fvalue(f))
|
||||
})
|
||||
}
|
||||
|
||||
public static createStackedBarChartPerDay(name: string, features: any, extractV: (feature: any) => string, minNeededTotal = 1): void {
|
||||
const perDay = new Group<string, string>(
|
||||
features,
|
||||
f => f.properties.date.substr(0, 10),
|
||||
extractV
|
||||
)
|
||||
|
||||
createGraph(
|
||||
name,
|
||||
...Array.from(
|
||||
stackHists<string, string>(
|
||||
perDay.asGroupedHists()
|
||||
.filter(tpl => tpl[1].total() > minNeededTotal)
|
||||
.map(tpl => [`${tpl[0]} (${tpl[1].total()})`, tpl[1]])
|
||||
)
|
||||
)
|
||||
.map(
|
||||
tpl => {
|
||||
const [name, hist] = tpl
|
||||
return hist
|
||||
.keyToDate(true)
|
||||
.asBar({
|
||||
name: name
|
||||
});
|
||||
}
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
public bump(key: K, value: V) {
|
||||
if (!this.groups.has(key)) {
|
||||
this.groups.set(key, [])
|
||||
}
|
||||
this.groups.get(key).push(value)
|
||||
}
|
||||
|
||||
public asHist(dedup = false): Histogram<K> {
|
||||
const hist = new Histogram<K>()
|
||||
this.groups.forEach((values, key) => {
|
||||
if (dedup) {
|
||||
hist.bump(key, new Set(values).size)
|
||||
} else {
|
||||
hist.bump(key, values.length)
|
||||
}
|
||||
})
|
||||
return hist
|
||||
}
|
||||
|
||||
asGroupedHists(): [V, Histogram<K>][] {
|
||||
|
||||
const allHists = new Map<V, Histogram<K>>()
|
||||
|
||||
const allValues = new Set<V>();
|
||||
Array.from(this.groups.values()).forEach(vs =>
|
||||
vs.forEach(v => {
|
||||
allValues.add(v)
|
||||
})
|
||||
)
|
||||
|
||||
allValues.forEach(v => allHists.set(v, new Histogram<K>()))
|
||||
|
||||
this.groups.forEach((values, key) => {
|
||||
values.forEach(v => {
|
||||
allHists.get(v).bump(key)
|
||||
})
|
||||
})
|
||||
|
||||
return Array.from(allHists.entries())
|
||||
}
|
||||
}
|
||||
|
||||
function stackHists<K, V>(hists: [V, Histogram<K>][]): [V, Histogram<K>][] {
|
||||
const runningTotals = new Histogram<K>()
|
||||
const result: [V, Histogram<K>][] = []
|
||||
hists.forEach(vhist => {
|
||||
const hist = vhist[1]
|
||||
const clone = hist.Clone()
|
||||
clone.bumpHist(runningTotals)
|
||||
runningTotals.bumpHist(hist)
|
||||
result.push([vhist[0], clone])
|
||||
})
|
||||
result.reverse()
|
||||
return result
|
||||
}
|
||||
|
||||
|
||||
function createGraphs(allFeatures: ChangeSetData[], appliedFilterDescription: string) {
|
||||
const hist = new Histogram<string>(allFeatures.map(f => f.properties.metadata.theme))
|
||||
hist
|
||||
.addCountToName()
|
||||
.createOthersCategory("other", 40)
|
||||
.asPie({
|
||||
name: "Changesets per theme" + appliedFilterDescription
|
||||
}).render()
|
||||
|
||||
hist
|
||||
.createOthersCategory("other", 20)
|
||||
.addCountToName()
|
||||
.asBar({name: "Changesets per theme (bar)" + appliedFilterDescription}).render()
|
||||
|
||||
|
||||
new Histogram<string>(allFeatures.map(f => f.properties.user))
|
||||
.binPerCount()
|
||||
.stringifyName()
|
||||
.createOthersCategory("25 or more", (key, _) => Number(key) >= 25).asBar(
|
||||
{
|
||||
compare: (a, b) => Number(a) - Number(b),
|
||||
name: "Contributors per changeset count" + appliedFilterDescription
|
||||
}).render()
|
||||
|
||||
new Histogram<string>(allFeatures.map(f => f.properties.metadata.host))
|
||||
.asPie({
|
||||
name: "Changesets per host"
|
||||
}).render()
|
||||
|
||||
|
||||
Group.createStackedBarChartPerDay(
|
||||
"Changesets per theme" + appliedFilterDescription,
|
||||
allFeatures,
|
||||
f => f.properties.metadata.theme,
|
||||
25
|
||||
)
|
||||
|
||||
Group.createStackedBarChartPerDay(
|
||||
"Changesets per version number" + appliedFilterDescription,
|
||||
allFeatures,
|
||||
f => f.properties.editor.substr("MapComplete ".length, 6).replace(/[a-zA-Z-/]/g, ''),
|
||||
1
|
||||
)
|
||||
|
||||
{
|
||||
// Contributors (unique + unique new) per day
|
||||
const contributorCountPerDay = new Group<string, string>()
|
||||
const newContributorsPerDay = new Group<string, string>()
|
||||
const seenContributors = new Set<string>()
|
||||
allFeatures.forEach(f => {
|
||||
const user = f.properties.user
|
||||
const day = f.properties.date.substr(0, 10)
|
||||
contributorCountPerDay.bump(day, user)
|
||||
if (!seenContributors.has(user)) {
|
||||
seenContributors.add(user)
|
||||
newContributorsPerDay.bump(day, user)
|
||||
}
|
||||
})
|
||||
const total = new Set(allFeatures.map(f => f.properties.user)).size
|
||||
createGraph(
|
||||
`Contributors per day${appliedFilterDescription} (${total} total contributors)`,
|
||||
contributorCountPerDay
|
||||
.asHist(true)
|
||||
.keyToDate(true)
|
||||
.asBar({
|
||||
name: "Unique contributors per day"
|
||||
}),
|
||||
newContributorsPerDay
|
||||
.asHist(true)
|
||||
.keyToDate(true)
|
||||
.asBar({
|
||||
name: "New, unique contributors per day"
|
||||
}),
|
||||
)
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
// new StatsDownloader("stats").DownloadStats()
|
||||
const allPaths = readdirSync("stats")
|
||||
.filter(p => p.startsWith("stats.") && p.endsWith(".json"));
|
||||
let allFeatures: ChangeSetData[] = [].concat(...allPaths
|
||||
.map(path => JSON.parse(readFileSync("stats/" + path, "utf-8")).features
|
||||
.map(cs => ChangesetDataTools.cleanChangesetData(cs))));
|
||||
|
||||
const emptyCS = allFeatures.filter(f => f.properties.metadata.theme === "EMPTY CS")
|
||||
allFeatures = allFeatures.filter(f => f.properties.metadata.theme !== "EMPTY CS")
|
||||
|
||||
new Histogram(emptyCS.map(f => f.properties.date)).keyToDate().asBar({
|
||||
name: "Empty changesets by date"
|
||||
}).render()
|
||||
|
||||
|
||||
writeFileSync("centerpoints.geojson", JSON.stringify({
|
||||
type: "FeatureCollection",
|
||||
features: allFeatures.map(f => {
|
||||
try {
|
||||
return GeoOperations.centerpoint(f.geometry);
|
||||
} catch (e) {
|
||||
console.error("Could not create center point: ", e, f)
|
||||
}
|
||||
})
|
||||
}))
|
||||
|
||||
|
||||
createGraphs(allFeatures, "")
|
||||
createGraphs(allFeatures.filter(f => f.properties.date.startsWith("2020")), " in 2020")
|
||||
createGraphs(allFeatures.filter(f => f.properties.date.startsWith("2021")), " in 2021")
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue