| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  | import csv | 
					
						
							|  |  |  | from datetime import datetime | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from matplotlib import pyplot | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-10 16:02:19 +02:00
										 |  |  | useLegend = True | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  | def counts(lst): | 
					
						
							|  |  |  |     counts = {} | 
					
						
							|  |  |  |     for v in lst: | 
					
						
							|  |  |  |         if not v in counts: | 
					
						
							|  |  |  |             counts[v] = 0 | 
					
						
							|  |  |  |         counts[v] += 1 | 
					
						
							|  |  |  |     return counts | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Hist: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, firstcolumn): | 
					
						
							|  |  |  |         self.key = "\"" + firstcolumn + "\"" | 
					
						
							|  |  |  |         self.dictionary = {} | 
					
						
							|  |  |  |         self.key = "" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def add(self, key, value): | 
					
						
							|  |  |  |         if not key in self.dictionary: | 
					
						
							|  |  |  |             self.dictionary[key] = [] | 
					
						
							|  |  |  |         self.dictionary[key].append(value) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def values(self): | 
					
						
							|  |  |  |         allV = [] | 
					
						
							|  |  |  |         for v in self.dictionary.values(): | 
					
						
							|  |  |  |             allV += list(set(v)) | 
					
						
							|  |  |  |         return list(set(allV)) | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     def keys(self): | 
					
						
							|  |  |  |         return self.dictionary.keys() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get(self, key): | 
					
						
							|  |  |  |         if key in self.dictionary: | 
					
						
							|  |  |  |             return self.dictionary[key] | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  |     # Returns values.map(f).  | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     def map(self, f): | 
					
						
							|  |  |  |         vals = [] | 
					
						
							|  |  |  |         keys = self.keys() | 
					
						
							|  |  |  |         for key in keys: | 
					
						
							|  |  |  |             vals.append(f(self.get(key))) | 
					
						
							|  |  |  |         return vals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def mapcumul(self, f, add, zero): | 
					
						
							|  |  |  |         vals = [] | 
					
						
							|  |  |  |         running_value = zero | 
					
						
							|  |  |  |         keys = self.keys() | 
					
						
							|  |  |  |         for key in keys: | 
					
						
							|  |  |  |             v = f(self.get(key)) | 
					
						
							|  |  |  |             running_value = add(running_value, v) | 
					
						
							|  |  |  |             vals.append(running_value) | 
					
						
							|  |  |  |         return vals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  |     # Returns [(key, flatten(values))] To be used with e.g. pyplot.plot | 
					
						
							| 
									
										
										
										
											2021-03-29 00:18:32 +02:00
										 |  |  |     def flatten(self, flatten): | 
					
						
							|  |  |  |         result = [] | 
					
						
							|  |  |  |         keys = self.keys() | 
					
						
							|  |  |  |         for key in keys: | 
					
						
							|  |  |  |             v = flatten(self.get(key)) | 
					
						
							|  |  |  |             result.append((key, v)) | 
					
						
							|  |  |  |         return result | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     def csv(self): | 
					
						
							|  |  |  |         csv = self.key + "," + ",".join(self.values()) | 
					
						
							|  |  |  |         header = self.values() | 
					
						
							|  |  |  |         for k in self.dictionary.keys(): | 
					
						
							|  |  |  |             csv += k | 
					
						
							|  |  |  |             values = counts(self.dictionary[k]) | 
					
						
							|  |  |  |             for head in header: | 
					
						
							|  |  |  |                 if head in values: | 
					
						
							|  |  |  |                     csv += "," + str(values[head]) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     csv += ",0" | 
					
						
							|  |  |  |             csv += "\n" | 
					
						
							|  |  |  |         return csv | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     def __str__(self): | 
					
						
							|  |  |  |         return str(self.dictionary) | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | def build_hist(stats, keyIndex, valueIndex): | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     hist = Hist("date") | 
					
						
							|  |  |  |     c = 0 | 
					
						
							|  |  |  |     for row in stats: | 
					
						
							|  |  |  |         c += 1 | 
					
						
							|  |  |  |         hist.add(row[keyIndex], row[valueIndex]) | 
					
						
							|  |  |  |     return hist | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  | def as_date(str): | 
					
						
							|  |  |  |     return datetime.strptime(str, "%Y-%m-%d") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def cumulative_users(stats): | 
					
						
							|  |  |  |     users_hist = build_hist(stats, 0, 1) | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     all_users_per_day = users_hist.mapcumul( | 
					
						
							|  |  |  |         lambda users: set(users), | 
					
						
							|  |  |  |         lambda a, b: a.union(b), | 
					
						
							|  |  |  |         set([]) | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     cumul_uniq = list(map(len, all_users_per_day)) | 
					
						
							|  |  |  |     unique_per_day = users_hist.map(lambda users: len(set(users))) | 
					
						
							|  |  |  |     new_users = [0] | 
					
						
							|  |  |  |     for i in range(len(cumul_uniq) - 1): | 
					
						
							|  |  |  |         new_users.append(cumul_uniq[i + 1] - cumul_uniq[i]) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     dates = map(as_date, users_hist.keys()) | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     return list(dates), cumul_uniq, list(unique_per_day), list(new_users) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def pyplot_init(): | 
					
						
							| 
									
										
										
										
											2021-06-06 15:35:14 +02:00
										 |  |  |     pyplot.close('all') | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     pyplot.figure(figsize=(14, 8), dpi=200) | 
					
						
							|  |  |  |     pyplot.xticks(rotation='vertical') | 
					
						
							| 
									
										
										
										
											2021-04-03 00:16:04 +02:00
										 |  |  |     pyplot.grid() | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  | def create_usercount_graphs(stats, extra_text=""): | 
					
						
							|  |  |  |     print("Creating usercount graphs " + extra_text) | 
					
						
							|  |  |  |     dates, cumul_uniq, unique_per_day, new_users = cumulative_users(stats) | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     total = cumul_uniq[-1] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pyplot_init() | 
					
						
							| 
									
										
										
										
											2021-03-29 00:18:32 +02:00
										 |  |  |     pyplot.bar(dates, unique_per_day, label='Unique contributors') | 
					
						
							|  |  |  |     pyplot.bar(dates, new_users, label='First time contributor via MapComplete') | 
					
						
							| 
									
										
										
										
											2021-05-10 16:02:19 +02:00
										 |  |  |     if (useLegend): | 
					
						
							|  |  |  |         pyplot.legend() | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     pyplot.title("Unique contributors" + extra_text + ' with MapComplete (' + str(total) + ' contributors)') | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     pyplot.ylabel("Number of unique contributors") | 
					
						
							|  |  |  |     pyplot.xlabel("Date") | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |     pyplot.savefig("Contributors" + extra_text + ".png", dpi=400, facecolor='w', edgecolor='w') | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     pyplot_init() | 
					
						
							|  |  |  |     pyplot.plot(dates, cumul_uniq, label='Cumulative unique contributors') | 
					
						
							| 
									
										
										
										
											2021-05-10 16:02:19 +02:00
										 |  |  |     if (useLegend): | 
					
						
							|  |  |  |         pyplot.legend() | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     pyplot.title("Cumulative unique contributors" + extra_text + " with MapComplete - " + str(total) + " contributors") | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     pyplot.ylabel("Number of unique contributors") | 
					
						
							|  |  |  |     pyplot.xlabel("Date") | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |     pyplot.savefig("CumulativeContributors" + extra_text + ".png", dpi=400, facecolor='w', edgecolor='w') | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  | def create_contributors_per_total_cs(contents, extra_text="", cutoff=25, per_day=False): | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  |     hist = Hist("contributor") | 
					
						
							|  |  |  |     for cs in contents: | 
					
						
							|  |  |  |         hist.add(cs[1], cs[0]) | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     count_per_contributor = hist.map(lambda dates: len(set(dates))) if per_day else hist.map(len) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  |     per_count = Hist("per cs count") | 
					
						
							|  |  |  |     for cs_count in count_per_contributor: | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |         per_count.add(min(cs_count, cutoff), 1) | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     to_plot = per_count.flatten(len) | 
					
						
							|  |  |  |     to_plot.sort(key=lambda a: a[0]) | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |     to_plot[- 1] = (str(cutoff) + " or more", to_plot[-1][1]) | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  |     pyplot_init() | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |     pyplot.bar(list(map(lambda a: str(a[0]), to_plot)), list(map(lambda a: a[1], to_plot))) | 
					
						
							|  |  |  |     pyplot.title("Contributors per total number of changesets" + extra_text) | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  |     pyplot.ylabel("Number of contributors") | 
					
						
							|  |  |  |     pyplot.xlabel("Mapping days with MapComplete" if per_day else "Number of changesets with MapComplete") | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |     pyplot.savefig( | 
					
						
							|  |  |  |         "Contributors per total number of " + ("mapping days" if per_day else "changesets") + extra_text + ".png", | 
					
						
							|  |  |  |         dpi=400) | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-25 12:08:27 +02:00
										 |  |  | def create_theme_breakdown(stats, fileExtra="", cutoff=15): | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     print("Creating theme breakdown " + fileExtra) | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     themeCounts = {} | 
					
						
							|  |  |  |     for row in stats: | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |         theme = row[3].lower() | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |         if theme in theme_remappings: | 
					
						
							|  |  |  |             theme = theme_remappings[theme] | 
					
						
							|  |  |  |         if theme in themeCounts: | 
					
						
							|  |  |  |             themeCounts[theme] += 1 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             themeCounts[theme] = 1 | 
					
						
							|  |  |  |     themes = list(themeCounts.items()) | 
					
						
							|  |  |  |     if len(themes) == 0: | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |         print("No entries found for theme breakdown (extra: " + str(fileExtra) + ")") | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |         return | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     themes.sort(key=lambda kv: kv[1], reverse=True) | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     other_count = sum([theme[1] for theme in themes if theme[1] < cutoff]) | 
					
						
							|  |  |  |     themes_filtered = [theme for theme in themes if theme[1] >= cutoff] | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     keys = list(map(lambda kv: kv[0] + " (" + str(kv[1]) + ")", themes_filtered)) | 
					
						
							|  |  |  |     values = list(map(lambda kv: kv[1], themes_filtered)) | 
					
						
							|  |  |  |     total = sum(map(lambda kv: kv[1], themes)) | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     first_pct = themes[0][1] / total; | 
					
						
							|  |  |  |     if other_count > 0: | 
					
						
							|  |  |  |         keys.append("other") | 
					
						
							|  |  |  |         values.append(other_count) | 
					
						
							|  |  |  |     pyplot_init() | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     pyplot.pie(values, labels=keys, startangle=(90 - 360 * first_pct / 2)) | 
					
						
							|  |  |  |     pyplot.title("MapComplete changes per theme" + fileExtra + " - " + str(total) + " total changes") | 
					
						
							|  |  |  |     pyplot.savefig("Theme distribution" + fileExtra + ".png", dpi=400, facecolor='w', edgecolor='w', | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |                    bbox_inches='tight') | 
					
						
							|  |  |  |     return themes | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-29 00:18:32 +02:00
										 |  |  | def summed_changes_per(contents, extraText, sum_column=5): | 
					
						
							|  |  |  |     newPerDay = build_hist(contents, 0, 5) | 
					
						
							|  |  |  |     kv = newPerDay.flatten(sum) | 
					
						
							|  |  |  |     keysNew = list(map(lambda kv: as_date(kv[0]), kv)) | 
					
						
							|  |  |  |     valuesNew = list(map(lambda kv: kv[1], kv)) | 
					
						
							|  |  |  |     changedPerDay = build_hist(contents, 0, 6) | 
					
						
							|  |  |  |     kv = changedPerDay.flatten(sum) | 
					
						
							|  |  |  |     keysChanged = list(map(lambda kv: as_date(kv[0]), kv)) | 
					
						
							|  |  |  |     valuesChanged = list(map(lambda kv: kv[1], kv)) | 
					
						
							|  |  |  |     if len(keysChanged) == 0 and len(keysNew) == 0: | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pyplot_init() | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |     text = "New and changed nodes per day " + extraText | 
					
						
							| 
									
										
										
										
											2021-03-29 00:18:32 +02:00
										 |  |  |     pyplot.title(text) | 
					
						
							|  |  |  |     if len(keysChanged) > 0: | 
					
						
							|  |  |  |         pyplot.bar(keysChanged, valuesChanged, label="Changed") | 
					
						
							|  |  |  |     if len(keysNew) > 0: | 
					
						
							|  |  |  |         pyplot.bar(keysNew, valuesNew, label="New") | 
					
						
							| 
									
										
										
										
											2021-05-10 16:02:19 +02:00
										 |  |  |     if (useLegend): | 
					
						
							|  |  |  |         pyplot.legend() | 
					
						
							| 
									
										
										
										
											2021-03-29 00:18:32 +02:00
										 |  |  |     pyplot.savefig(text) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-15 15:53:54 +01:00
										 |  |  | def cumulative_changes_per(contents, index, subject, filenameextra="", cutoff=5, cumulative=True, sort=True): | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     print("Creating graph about " + subject + filenameextra) | 
					
						
							|  |  |  |     themes = Hist("date") | 
					
						
							|  |  |  |     dates_per_theme = Hist("theme") | 
					
						
							|  |  |  |     all_themes = set() | 
					
						
							|  |  |  |     for row in contents: | 
					
						
							|  |  |  |         th = row[index] | 
					
						
							|  |  |  |         all_themes.add(th) | 
					
						
							|  |  |  |         themes.add(as_date(row[0]), th) | 
					
						
							|  |  |  |         dates_per_theme.add(th, row[0]) | 
					
						
							|  |  |  |     per_theme_count = list(zip(dates_per_theme.keys(), dates_per_theme.map(len))) | 
					
						
							|  |  |  |     # PerThemeCount gives the most popular theme first | 
					
						
							| 
									
										
										
										
											2021-03-15 15:53:54 +01:00
										 |  |  |     if sort == True: | 
					
						
							|  |  |  |         per_theme_count.sort(key=lambda kv: kv[1], reverse=False) | 
					
						
							|  |  |  |     elif sort is not None: | 
					
						
							|  |  |  |         per_theme_count.sort(key=sort) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     values_to_show = []  # (theme name, value to fill between - this is stacked, with the first layer to print last) | 
					
						
							|  |  |  |     running_totals = None | 
					
						
							|  |  |  |     other_total = 0 | 
					
						
							|  |  |  |     other_theme_count = 0 | 
					
						
							|  |  |  |     other_cumul = None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for kv in per_theme_count: | 
					
						
							|  |  |  |         theme = kv[0] | 
					
						
							|  |  |  |         total_for_this_theme = kv[1] | 
					
						
							|  |  |  |         if cumulative: | 
					
						
							|  |  |  |             edits_per_day_cumul = themes.mapcumul( | 
					
						
							|  |  |  |                 lambda themes_for_date: len([x for x in themes_for_date if theme == x]), | 
					
						
							|  |  |  |                 lambda a, b: a + b, 0) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             edits_per_day_cumul = themes.map(lambda themes_for_date: len([x for x in themes_for_date if theme == x])) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-13 17:25:21 +01:00
										 |  |  |         if (not cumulative) or (running_totals is None): | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |             running_totals = edits_per_day_cumul | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |         else: | 
					
						
							|  |  |  |             running_totals = list(map(lambda ab: ab[0] + ab[1], zip(running_totals, edits_per_day_cumul))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if total_for_this_theme >= cutoff: | 
					
						
							|  |  |  |             values_to_show.append((theme, running_totals)) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             other_total += total_for_this_theme | 
					
						
							|  |  |  |             other_theme_count += 1 | 
					
						
							|  |  |  |             if other_cumul is None: | 
					
						
							|  |  |  |                 other_cumul = edits_per_day_cumul | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 other_cumul = list(map(lambda ab: ab[0] + ab[1], zip(other_cumul, edits_per_day_cumul))) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-13 17:25:21 +01:00
										 |  |  |     keys = list(themes.keys()) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     values_to_show.reverse() | 
					
						
							|  |  |  |     values_to_show.append(("other", other_cumul)) | 
					
						
							|  |  |  |     totals = dict(per_theme_count) | 
					
						
							|  |  |  |     total = sum(totals.values()) | 
					
						
							|  |  |  |     totals["other"] = other_total | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     pyplot_init() | 
					
						
							|  |  |  |     for kv in values_to_show: | 
					
						
							|  |  |  |         if kv[1] is None: | 
					
						
							|  |  |  |             continue  # No 'other' graph | 
					
						
							|  |  |  |         msg = kv[0] + " (" + str(totals[kv[0]]) + ")" | 
					
						
							|  |  |  |         if kv[0] == "other": | 
					
						
							|  |  |  |             msg = str(other_theme_count) + " small " + subject + "s (" + str(other_total) + " changes)" | 
					
						
							| 
									
										
										
										
											2021-03-13 17:25:21 +01:00
										 |  |  |         if cumulative: | 
					
						
							|  |  |  |             pyplot.fill_between(keys, kv[1], label=msg) | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2021-06-06 15:35:14 +02:00
										 |  |  |             pyplot.bar(keys, kv[1], label=msg) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if cumulative: | 
					
						
							|  |  |  |         cumulative_txt = "Cumulative changesets" | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         cumulative_txt = "Changesets" | 
					
						
							|  |  |  |     pyplot.title(cumulative_txt + " per " + subject + filenameextra + " (" + str(total) + " changesets)") | 
					
						
							| 
									
										
										
										
											2021-05-10 16:02:19 +02:00
										 |  |  |     if (useLegend): | 
					
						
							|  |  |  |         pyplot.legend(loc="upper left", ncol=3) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     pyplot.savefig(cumulative_txt + " per " + subject + filenameextra + ".png") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def contents_where(contents, index, starts_with, invert=False): | 
					
						
							|  |  |  |     for row in contents: | 
					
						
							|  |  |  |         if row[index].startswith(starts_with) is not invert: | 
					
						
							|  |  |  |             yield row | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  | def sortable_user_number(kv): | 
					
						
							|  |  |  |     str = kv[0] | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |     ls = list(map(lambda str: "0" + str if len(str) < 2 else str, re.findall("[0-9]+", str))) | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |     return ".".join(ls) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  | def create_graphs(contents): | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |     # summed_changes_per(contents, "") | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  |     create_contributors_per_total_cs(contents) | 
					
						
							|  |  |  |     create_contributors_per_total_cs(contents, per_day=True) | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |     cumulative_changes_per(contents, 4, "version number", cutoff=1, sort=sortable_user_number) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     create_usercount_graphs(contents) | 
					
						
							|  |  |  |     create_theme_breakdown(contents) | 
					
						
							| 
									
										
										
										
											2021-03-29 00:18:32 +02:00
										 |  |  |     cumulative_changes_per(contents, 3, "created element", cutoff=10) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     cumulative_changes_per(contents, 3, "theme", cutoff=10) | 
					
						
							| 
									
										
										
										
											2021-03-13 17:25:21 +01:00
										 |  |  |     cumulative_changes_per(contents, 3, "theme", cutoff=10, cumulative=False) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     cumulative_changes_per(contents, 1, "contributor", cutoff=15) | 
					
						
							|  |  |  |     cumulative_changes_per(contents, 2, "language", cutoff=1) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:49:14 +01:00
										 |  |  |     cumulative_changes_per(contents, 8, "host", cutoff=1) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     currentYear = datetime.now().year | 
					
						
							|  |  |  |     for year in range(2020, currentYear + 1): | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |         contents_filtered = list(contents_where(contents, 0, str(year))) | 
					
						
							|  |  |  |         extratext = " in " + str(year) | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  |         create_contributors_per_total_cs(contents_filtered, extratext) | 
					
						
							|  |  |  |         create_contributors_per_total_cs(contents_filtered, extratext, per_day=True) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |         create_usercount_graphs(contents_filtered, extratext) | 
					
						
							|  |  |  |         create_theme_breakdown(contents_filtered, extratext) | 
					
						
							|  |  |  |         cumulative_changes_per(contents_filtered, 3, "theme", extratext, cutoff=5) | 
					
						
							| 
									
										
										
										
											2021-03-13 17:25:21 +01:00
										 |  |  |         cumulative_changes_per(contents_filtered, 3, "theme", extratext, cutoff=5, cumulative=False) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |         cumulative_changes_per(contents_filtered, 1, "contributor", extratext, cutoff=10) | 
					
						
							|  |  |  |         cumulative_changes_per(contents_filtered, 2, "language", extratext, cutoff=1) | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |         cumulative_changes_per(contents_filtered, 4, "version number", extratext, cutoff=1, cumulative=False, | 
					
						
							|  |  |  |                                sort=sortable_user_number) | 
					
						
							|  |  |  |         cumulative_changes_per(contents_filtered, 4, "version number", extratext, cutoff=1, sort=sortable_user_number) | 
					
						
							| 
									
										
										
										
											2021-03-13 17:25:21 +01:00
										 |  |  |         cumulative_changes_per(contents_filtered, 8, "host", extratext, cutoff=1) | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |         # summed_changes_per(contents_filtered, "for year " + str(year)) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-23 13:19:39 +01:00
										 |  |  | def create_per_theme_graphs(contents, cutoff=10): | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |     all_themes = set(map(lambda row: row[3], contents)) | 
					
						
							|  |  |  |     for theme in all_themes: | 
					
						
							|  |  |  |         filtered = list(contents_where(contents, 3, theme)) | 
					
						
							| 
									
										
										
										
											2021-03-23 13:19:39 +01:00
										 |  |  |         if len(filtered) < cutoff: | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |             # less then 10 changesets - we do not map it | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         contributors = set(map(lambda row: row[1], filtered)) | 
					
						
							| 
									
										
										
										
											2021-03-29 00:18:32 +02:00
										 |  |  |         if len(contributors) >= 2: | 
					
						
							|  |  |  |             cumulative_changes_per(filtered, 1, "contributor", " for theme " + theme, cutoff=1) | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |         # if len(filtered) > 25: | 
					
						
							|  |  |  |         #    summed_changes_per(filtered, "for theme " + theme) | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  | def create_per_contributor_graphs(contents, least_needed_changesets): | 
					
						
							|  |  |  |     all_contributors = set(map(lambda row: row[1], contents)) | 
					
						
							|  |  |  |     for contrib in all_contributors: | 
					
						
							|  |  |  |         filtered = list(contents_where(contents, 1, contrib)) | 
					
						
							|  |  |  |         if len(filtered) < least_needed_changesets: | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |             print("Skipping " + contrib + " - too little changesets"); | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |             continue | 
					
						
							|  |  |  |         themes = set(map(lambda row: row[3], filtered)) | 
					
						
							| 
									
										
										
										
											2021-03-29 00:18:32 +02:00
										 |  |  |         if len(themes) >= 2: | 
					
						
							|  |  |  |             cumulative_changes_per(filtered, 3, "theme", " for contributor " + contrib, cutoff=1) | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |         # if len(filtered) > 25: | 
					
						
							|  |  |  |         #     summed_changes_per(filtered, "for contributor " + contrib) | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  | theme_remappings = { | 
					
						
							|  |  |  |     "metamap": "maps", | 
					
						
							|  |  |  |     "groen": "buurtnatuur", | 
					
						
							| 
									
										
										
										
											2021-03-29 00:18:32 +02:00
										 |  |  |     "updaten van metadata met mapcomplete": "buurtnatuur", | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |     "Toevoegen of dit natuurreservaat toegangkelijk is": "buurtnatuur", | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     "wiki:mapcomplete/fritures": "fritures", | 
					
						
							|  |  |  |     "wiki:MapComplete/Fritures": "fritures", | 
					
						
							|  |  |  |     "lits": "lit", | 
					
						
							|  |  |  |     "pomp": "cyclofix", | 
					
						
							|  |  |  |     "wiki:user:joost_schouppe/campersite": "campersite", | 
					
						
							|  |  |  |     "wiki-user-joost_schouppe-geveltuintjes": "geveltuintjes", | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |     "wiki-user-joost_schouppe-campersite": "campersite", | 
					
						
							|  |  |  |     "wiki-User-joost_schouppe-campersite": "campersite", | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     "wiki-User-joost_schouppe-geveltuintjes": "geveltuintjes", | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |     "wiki:User:joost_schouppe/campersite": "campersite", | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |     "arbres": "arbres_llefia", | 
					
						
							| 
									
										
										
										
											2021-06-03 21:48:44 +02:00
										 |  |  |     "aed_brugge": "aed", | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |     "https://llefia.org/arbres/mapcomplete.json": "arbres_llefia", | 
					
						
							|  |  |  |     "https://llefia.org/arbres/mapcomplete1.json": "arbres_llefia", | 
					
						
							|  |  |  |     "toevoegen of dit natuurreservaat toegangkelijk is": "buurtnatuur", | 
					
						
							|  |  |  |     "testing mapcomplete 0.0.0": "buurtnatuur", | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |     "https://raw.githubusercontent.com/osmbe/play/master/mapcomplete/geveltuinen/geveltuinen.json": "geveltuintjes" | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def clean_input(contents): | 
					
						
							| 
									
										
										
										
											2021-03-12 13:48:49 +01:00
										 |  |  |     for row in contents: | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |         theme = row[3].strip().strip("\"").lower() | 
					
						
							|  |  |  |         if theme == "null": | 
					
						
							|  |  |  |             # The theme metadata has only been set later on - we fetch this from the comment | 
					
						
							|  |  |  |             i = row[7].rfind("#") | 
					
						
							|  |  |  |             theme = row[7][i + 1:-1].lower() | 
					
						
							|  |  |  |         if theme in theme_remappings: | 
					
						
							|  |  |  |             theme = theme_remappings[theme] | 
					
						
							| 
									
										
										
										
											2021-06-06 15:35:14 +02:00
										 |  |  |         if theme.rfind('/') > 0: | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |             theme = theme[theme.rfind('/') + 1:] | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |         row[3] = theme | 
					
						
							| 
									
										
										
										
											2021-03-16 19:27:06 +01:00
										 |  |  |         row[4] = row[4].strip().strip("\"")[len("MapComplete "):] | 
					
						
							|  |  |  |         row[4] = re.findall("[0-9]*\.[0-9]*\.[0-9]*", row[4])[0] | 
					
						
							| 
									
										
										
										
											2021-03-29 00:18:32 +02:00
										 |  |  |         row = [data.strip().strip("\"") for data in row] | 
					
						
							|  |  |  |         row[5] = int(row[5]) | 
					
						
							|  |  |  |         row[6] = int(row[6]) | 
					
						
							|  |  |  |         yield row | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  | # Merges changesets of the same theme and the samecontributos within the same hour, so that the stats are comparable | 
					
						
							|  |  |  | def mergeChangesets(contents): | 
					
						
							|  |  |  |     open_changesets = dict()  # {contributor --> {theme --> hour of last change}} | 
					
						
							|  |  |  |     for row in contents: | 
					
						
							|  |  |  |         theme = row[3] | 
					
						
							|  |  |  |         contributor = row[1] | 
					
						
							|  |  |  |         date = datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%SZ") | 
					
						
							|  |  |  |         if (contributor not in open_changesets): | 
					
						
							|  |  |  |             open_changesets[contributor] = dict() | 
					
						
							|  |  |  |         perTheme = open_changesets[contributor] | 
					
						
							|  |  |  |         if (theme in perTheme): | 
					
						
							|  |  |  |             lastChange = perTheme[theme] | 
					
						
							|  |  |  |             diff = (date - lastChange).total_seconds() | 
					
						
							|  |  |  |             if(diff > 60*60): | 
					
						
							|  |  |  |                 yield row | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             yield row | 
					
						
							|  |  |  |         perTheme[theme] = date | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Removes the time from the date component     | 
					
						
							|  |  |  | def datesOnly(contents): | 
					
						
							|  |  |  |     for row in contents: | 
					
						
							|  |  |  |         row[0] = row[0].split("T")[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def contributor_count(stats, index=1, item="contributor"): | 
					
						
							| 
									
										
										
										
											2021-04-17 14:46:04 +02:00
										 |  |  |     seen_contributors = set() | 
					
						
							|  |  |  |     for line in stats: | 
					
						
							| 
									
										
										
										
											2021-06-24 01:17:53 +02:00
										 |  |  |         contributor = line[index] | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |         if (contributor in seen_contributors): | 
					
						
							| 
									
										
										
										
											2021-04-17 14:46:04 +02:00
										 |  |  |             continue | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |         print("New " + item + " " + str(len(seen_contributors) + 1) + ": " + contributor) | 
					
						
							| 
									
										
										
										
											2021-04-17 14:46:04 +02:00
										 |  |  |         seen_contributors.add(contributor) | 
					
						
							|  |  |  |         print(line) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  | def main(): | 
					
						
							| 
									
										
										
										
											2021-03-12 13:48:49 +01:00
										 |  |  |     print("Creating graphs...") | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     with open('stats.csv', newline='') as csvfile: | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |         stats = list(clean_input(csv.reader(csvfile, delimiter=',', quotechar='"'))) | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  |         stats = list(mergeChangesets(stats)) | 
					
						
							|  |  |  |         datesOnly(stats) | 
					
						
							| 
									
										
										
										
											2021-03-13 15:31:28 +01:00
										 |  |  |         print("Found " + str(len(stats)) + " changesets") | 
					
						
							| 
									
										
										
										
											2021-06-27 01:47:24 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         #  contributor_count(stats, 3, "theme") | 
					
						
							| 
									
										
										
										
											2021-06-24 15:27:01 +02:00
										 |  |  |         create_graphs(stats) | 
					
						
							|  |  |  |         create_per_theme_graphs(stats, 15) | 
					
						
							| 
									
										
										
										
											2021-06-27 02:03:54 +02:00
										 |  |  |         # create_per_contributor_graphs(stats, 25) | 
					
						
							| 
									
										
										
										
											2021-03-10 12:55:27 +01:00
										 |  |  |     print("All done!") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | main() |