From d952b73e9e872ea288b2a1c372c2503ea4d6cfb4 Mon Sep 17 00:00:00 2001 From: pietervdvn Date: Thu, 17 Mar 2022 16:40:53 +0100 Subject: [PATCH] New graph generation --- Docs/Tools/GenPlot.py | 7 +- Docs/Tools/GenerateSeries.ts | 213 +++++++++++++++++++++++++---------- 2 files changed, 161 insertions(+), 59 deletions(-) diff --git a/Docs/Tools/GenPlot.py b/Docs/Tools/GenPlot.py index 60601b980..bea0e098a 100644 --- a/Docs/Tools/GenPlot.py +++ b/Docs/Tools/GenPlot.py @@ -35,7 +35,10 @@ def createBar(options): keys = genKeys(data, options["interpetKeysAs"]) values = list(map(lambda kv: kv["value"], data)) - pyplot.bar(keys, values, label=options["name"]) + color = None + if "color" in options["plot"]: + color = options["plot"]["color"] + pyplot.bar(keys, values, label=options["name"], color=color) pyplot.legend() @@ -72,4 +75,4 @@ while (True): createLine(options) else: print("Unkown type: " + options.type) -print("Plot generated") \ No newline at end of file +print("Plot generated") diff --git a/Docs/Tools/GenerateSeries.ts b/Docs/Tools/GenerateSeries.ts index 9d64a5ff6..1c0a4d043 100644 --- a/Docs/Tools/GenerateSeries.ts +++ b/Docs/Tools/GenerateSeries.ts @@ -14,7 +14,6 @@ class StatsDownloader { private readonly _targetDirectory: string; - constructor(targetDirectory = ".") { this._targetDirectory = targetDirectory; @@ -177,6 +176,11 @@ class ChangesetDataTools { } if (cs.properties.modify + cs.properties.delete + cs.properties.create == 0) { cs.properties.metadata.theme = "EMPTY CS" + } + try { + cs.properties.metadata.host = new URL(cs.properties.metadata.host).host + } catch (e) { + } return cs } @@ -193,18 +197,18 @@ interface PlotSpec { type: "stacked-bar" count: { label: string, - values: { key: string | Date, value: number }[] + values: { key: string | Date, value: number }[], + color?: string }[] }, - render() + render(): Promise } - function createGraph( title: string, - ...options: PlotSpec[]) { - console.log("Creating graph",title,"...") + ...options: PlotSpec[]): Promise { + console.log("Creating graph", title, "...") const process = exec("python3 GenPlot.py \"graphs/" + title + "\"", ((error, stdout, stderr) => { console.log("Python: ", stdout) if (error !== null) { @@ -221,6 +225,11 @@ function createGraph( } process.stdin._write("\n", "utf-8", undefined) + + return new Promise((resolve) => { + process.on("exit", () => resolve()) + }) + } class Histogram { @@ -414,16 +423,18 @@ class Histogram { }, render: undefined } - graph.render = () => createGraph(graph.name, graph) + graph.render = async () => await createGraph(graph.name, graph) return graph; } public asBar(options: { name: string - compare?: (a: K, b: K) => number + compare?: (a: K, b: K) => number, + color?: string }): PlotSpec { const spec = this.asPie(options) spec.plot.type = "bar" + spec.plot["color"] = options.color return spec; } @@ -438,6 +449,10 @@ class Histogram { } +/** + * A group keeps track of a matrix of changes, e.g. + * 'All contributors per day'. This will be stored internally, e.g. as {'2022-03-16' --> ['Pieter Vander Vennet', 'Pieter Vander Vennet', 'Joost Schouppe', 'Pieter Vander Vennet', 'dentonny', ...]} + */ class Group { public groups: Map = new Map() @@ -497,6 +512,11 @@ class Group { return hist } + /** + * Given a group, creates a kind of histogram. + * E.g: if the Group is {'2022-03-16' --> ['Pieter Vander Vennet', 'Pieter Vander Vennet', 'Seppe Santens']}, the resulting 'groupedHists' will be: + * [['Pieter Vander Vennet', {'2022-03-16' --> 2}],['Seppe Santens', {'2022-03-16' --> 1}]] + */ asGroupedHists(): [V, Histogram][] { const allHists = new Map>() @@ -520,6 +540,10 @@ class Group { } } +/** + * + * @param hists + */ function stackHists(hists: [V, Histogram][]): [V, Histogram][] { const runningTotals = new Histogram() const result: [V, Histogram][] = [] @@ -534,25 +558,93 @@ function stackHists(hists: [V, Histogram][]): [V, Histogram][] { return result } +/** + * Given histograms which should be shown as bars on top of each other, creates a new list of histograms with adjusted heights in order to create a coherent sum + * e.g.: for a given day, there are 2 deletions, 3 additions and 5 answers, this will be ordered as 2, 5 and 10 in order to mimic a coherent bar + * @param hists + */ +function stackHistsSimple(hists: Histogram[]): Histogram[] { + const runningTotals = new Histogram() + const result: Histogram[] = [] + for (const hist of hists) { + const clone = hist.Clone() + clone.bumpHist(runningTotals) // "Copies" one histogram into the other + runningTotals.bumpHist(hist) + result.push(clone) + } + result.reverse(/* Changes in place, safe copy*/) + return result +} -function createGraphs(allFeatures: ChangeSetData[], appliedFilterDescription: string, cutoff = undefined) { +function createActualChangesGraph(allFeatures: ChangeSetData[], appliedFilterDescription: string) { + const metadataOptions = { + "answer": "#5b5bdc", + "create": "#46ea46", + "move": "#ffa600", + "deletion": "#ff0000", + "soft-delete": "#ff8888", + "add-image": "#8888ff", + "import": "#00ff00", + "conflation": "#ffff00", + "split": "#000000", + "relation-fix": "#cccccc", + "delete-image": "#ff00ff" + } + + const metadataKeys: string[] = Object.keys(metadataOptions) + const histograms: Map> = new Map>() // {metakey --> Histogram} + allFeatures.forEach(f => { + const day = f.properties.date.substr(0, 10) + + for (const key of metadataKeys) { + const v = f.properties.metadata[key] + if (v === undefined) { + continue + } + const count = Number(v) + if (isNaN(count)) { + continue + } + if (!histograms.has(key)) { + histograms.set(key, new Histogram()) + } + histograms.get(key).bump(day, count) + } + + }) + + + const entries = stackHists(Array.from(histograms.entries())) + + const allGraphs = entries.map(([name, stackedHist]) => { + const hist = histograms.get(name) + return stackedHist + .keyToDate(true) + .asBar({name: `${name} (${hist.total()})`, color: metadataOptions[name]}); + } + ) + + createGraph("Actual changes" + appliedFilterDescription, ...allGraphs) +} + +async function createGraphs(allFeatures: ChangeSetData[], appliedFilterDescription: string, cutoff = undefined) { const hist = new Histogram(allFeatures.map(f => f.properties.metadata.theme)) - hist + await hist .createOthersCategory("other", cutoff ?? 20) .addCountToName() .asBar({name: "Changesets per theme (bar)" + appliedFilterDescription}) - .render() + .render() - new Histogram(allFeatures.map(f => f.properties.user)) + await new Histogram(allFeatures.map(f => f.properties.user)) .binPerCount() .stringifyName() - .createOthersCategory("25 or more", (key, _) => Number(key) >=(cutoff ?? 25)).asBar( - { - compare: (a, b) => Number(a) - Number(b), - name: "Contributors per changeset count" + appliedFilterDescription - }) - .render() + .createOthersCategory("25 or more", (key, _) => Number(key) >= (cutoff ?? 25)).asBar( + { + compare: (a, b) => Number(a) - Number(b), + name: "Contributors per changeset count" + appliedFilterDescription + }) + .render() const csPerDay = new Histogram(allFeatures.map(f => f.properties.date.substr(0, 10))) @@ -572,12 +664,12 @@ function createGraphs(allFeatures: ChangeSetData[], appliedFilterDescription: st } return keys }) - .keyToDate() + .keyToDate(true) .asLine({ - compare: (a, b) => a.getTime() - b.getTime(), - name: "Rolling 7 day average" + appliedFilterDescription - }) - + compare: (a, b) => a.getTime() - b.getTime(), + name: "Rolling 7 day average" + appliedFilterDescription + }) + const perDayAvgMonth = csPerDay.asRunningAverages(key => { const keys = [] for (let i = 0; i < 31; i++) { @@ -588,18 +680,19 @@ function createGraphs(allFeatures: ChangeSetData[], appliedFilterDescription: st }) .keyToDate() .asLine({ - compare: (a, b) => a.getTime() - b.getTime(), - name: "Rolling 31 day average" + appliedFilterDescription - }) - - createGraph("Changesets per day (line)" + appliedFilterDescription, perDayLine, perDayAvg, perDayAvgMonth) + compare: (a, b) => a.getTime() - b.getTime(), + name: "Rolling 31 day average" + appliedFilterDescription + }) - new Histogram(allFeatures.map(f => f.properties.metadata.host)) + await createGraph("Changesets per day (line)" + appliedFilterDescription, perDayLine, perDayAvg, perDayAvgMonth) + + + await new Histogram(allFeatures.map(f => f.properties.metadata.host)) .asPie({ name: "Changesets per host" + appliedFilterDescription }).render() - new Histogram(allFeatures.map(f => f.properties.metadata.theme)) + await new Histogram(allFeatures.map(f => f.properties.metadata.theme)) .createOthersCategory("< 25 changesets", (cutoff ?? 25)) .addCountToName() .asPie({ @@ -613,23 +706,24 @@ function createGraphs(allFeatures: ChangeSetData[], appliedFilterDescription: st cutoff ?? 25 ) + Group.createStackedBarChartPerDay( "Changesets per version number" + appliedFilterDescription, allFeatures, f => f.properties.editor?.substr("MapComplete ".length, 6)?.replace(/[a-zA-Z-/]/g, '') ?? "UNKNOWN", cutoff ?? 1 ) - + Group.createStackedBarChartPerDay( "Changesets per minor version number" + appliedFilterDescription, allFeatures, f => { - const base = f.properties.editor?.substr("MapComplete ".length)?.replace(/[a-zA-Z-/]/g, '') ?? "UNKNOWN" - const [major, minor, patch] = base.split(".") - return major+"."+minor - + const base = f.properties.editor?.substr("MapComplete ".length)?.replace(/[a-zA-Z-/]/g, '') ?? "UNKNOWN" + const [major, minor, patch] = base.split(".") + return major + "." + minor + }, - cutoff ??1 + cutoff ?? 1 ) Group.createStackedBarChartPerDay( @@ -654,7 +748,7 @@ function createGraphs(allFeatures: ChangeSetData[], appliedFilterDescription: st } }) const total = new Set(allFeatures.map(f => f.properties.user)).size - createGraph( + await createGraph( `Contributors per day${appliedFilterDescription}`, contributorCountPerDay .asHist(true) @@ -670,14 +764,14 @@ function createGraphs(allFeatures: ChangeSetData[], appliedFilterDescription: st }), ) - + await createActualChangesGraph(allFeatures, appliedFilterDescription); } } -function createMiscGraphs(allFeatures: ChangeSetData[], emptyCS: ChangeSetData[]) { - new Histogram(emptyCS.map(f => f.properties.date)).keyToDate().asBar({ +async function createMiscGraphs(allFeatures: ChangeSetData[], emptyCS: ChangeSetData[]) { + await new Histogram(emptyCS.map(f => f.properties.date)).keyToDate().asBar({ name: "Empty changesets by date" }).render() const geojson = { @@ -691,47 +785,52 @@ function createMiscGraphs(allFeatures: ChangeSetData[], emptyCS: ChangeSetData[] for (const key in f.properties.metadata) { point.properties[key] = f.properties.metadata[key] } - + return point } catch (e) { console.error("Could not create center point: ", e, f) return undefined } - })) + })) } writeFileSync("centerpoints.geojson", JSON.stringify(geojson, undefined, 2)) } -async function main(): Promise{ - if(!existsSync("graphs")){ +async function main(): Promise { + if (!existsSync("graphs")) { mkdirSync("graphs") } - - if(process.argv.indexOf("--no-download") < 0){ + + if (process.argv.indexOf("--no-download") < 0) { await new StatsDownloader("stats").DownloadStats() } const allPaths = readdirSync("stats") .filter(p => p.startsWith("stats.") && p.endsWith(".json")); let allFeatures: ChangeSetData[] = [].concat(...allPaths - .map(path => JSON.parse(readFileSync("stats/" + path, "utf-8")).features + .map(path => JSON.parse(readFileSync("stats/" + path, "utf-8")).features .map(cs => ChangesetDataTools.cleanChangesetData(cs)))); allFeatures = allFeatures.filter(f => f.properties.editor === null || f.properties.editor.toLowerCase().startsWith("mapcomplete")) const emptyCS = allFeatures.filter(f => f.properties.metadata.theme === "EMPTY CS") allFeatures = allFeatures.filter(f => f.properties.metadata.theme !== "EMPTY CS") - - const noEditor = allFeatures.filter(f => f.properties.editor === null).map(f =>"https://www.osm.org/changeset/"+ f.id) + + const noEditor = allFeatures.filter(f => f.properties.editor === null).map(f => "https://www.osm.org/changeset/" + f.id) writeFileSync("missing_editor.json", JSON.stringify(noEditor, null, " ")); - if(process.argv.indexOf("--no-graphs") >= 0){ - return + if (process.argv.indexOf("--no-graphs") >= 0) { + return } - createMiscGraphs(allFeatures, emptyCS) - createGraphs(allFeatures, "") - // createGraphs(allFeatures.filter(f => f.properties.date.startsWith("2020")), " in 2020") - // createGraphs(allFeatures.filter(f => f.properties.date.startsWith("2021")), " in 2021") - createGraphs(allFeatures.filter(f => f.properties.date.startsWith("2022")), " in 2022"), - createGraphs(allFeatures.filter(f => f.properties.metadata.theme==="toerisme_vlaanderen"), " met pin je punt", 0) + await createMiscGraphs(allFeatures, emptyCS) + + const grbOnly = allFeatures.filter(f => f.properties.metadata.theme === "grb") + allFeatures = allFeatures.filter(f => f.properties.metadata.theme !== "grb") + await createGraphs(allFeatures, "") + await createGraphs(allFeatures.filter(f => f.properties.date.startsWith("2020")), " in 2020") + await createGraphs(allFeatures.filter(f => f.properties.date.startsWith("2021")), " in 2021") + await createGraphs(allFeatures.filter(f => f.properties.date.startsWith("2022")), " in 2022") + await createGraphs(allFeatures.filter(f => f.properties.metadata.theme === "toerisme_vlaanderen"), " met pin je punt", 0) + await createGraphs(grbOnly, " with the GRB import tool", 0) + } main().then(_ => console.log("All done!"))