Add script

This commit is contained in:
Pieter Vander Vennet 2023-02-26 18:00:38 +01:00
parent 83cee14312
commit 7a3928a428
8 changed files with 1750 additions and 2 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
cache/*
node_modules/*

View file

@ -1,2 +1 @@
# weekly-OSM-link-analysis
A quick and dirty analysis of microblogging-links used in the Weekly-OSM
How much mastodon-posts does the OSM-weekly use?

BIN
Result.png Normal file

Binary file not shown.

After

(image error) Size: 70 KiB

39
entries.csv Normal file
View file

@ -0,0 +1,39 @@
issue, twitter and nitter, en.osm.town, other mastodon
2020-1,36,0,0
2020-2,17,0,0
2020-3,29,1,1
2020-4,19,0,0
2020-5,21,0,0
2020-6,22,0,0
2020-7,13,0,0
2020-8,23,0,0
2020-9,17,0,0
2020-10,12,0,0
2020-11,20,0,0
2020-12,11,0,0
2021-1,7,0,0
2021-2,6,0,0
2021-3,14,0,0
2021-4,10,0,0
2021-5,21,0,0
2021-6,16,0,0
2021-7,21,0,0
2021-8,18,0,0
2021-9,23,0,0
2021-10,16,0,0
2021-11,44,0,0
2021-12,20,0,0
2022-1,18,0,0
2022-2,17,4,4
2022-3,19,0,1
2022-4,13,2,2
2022-5,18,2,2
2022-6,17,2,4
2022-7,16,2,3
2022-8,12,3,4
2022-9,9,1,2
2022-10,26,2,4
2022-11,79,25,37
2022-12,41,6,14
2023-1,10,6,12
2023-2,6,7,10
1 issue twitter and nitter en.osm.town other mastodon
2 2020-1 36 0 0
3 2020-2 17 0 0
4 2020-3 29 1 1
5 2020-4 19 0 0
6 2020-5 21 0 0
7 2020-6 22 0 0
8 2020-7 13 0 0
9 2020-8 23 0 0
10 2020-9 17 0 0
11 2020-10 12 0 0
12 2020-11 20 0 0
13 2020-12 11 0 0
14 2021-1 7 0 0
15 2021-2 6 0 0
16 2021-3 14 0 0
17 2021-4 10 0 0
18 2021-5 21 0 0
19 2021-6 16 0 0
20 2021-7 21 0 0
21 2021-8 18 0 0
22 2021-9 23 0 0
23 2021-10 16 0 0
24 2021-11 44 0 0
25 2021-12 20 0 0
26 2022-1 18 0 0
27 2022-2 17 4 4
28 2022-3 19 0 1
29 2022-4 13 2 2
30 2022-5 18 2 2
31 2022-6 17 2 4
32 2022-7 16 2 3
33 2022-8 12 3 4
34 2022-9 9 1 2
35 2022-10 26 2 4
36 2022-11 79 25 37
37 2022-12 41 6 14
38 2023-1 10 6 12
39 2023-2 6 7 10

145
index.ts Normal file
View file

@ -0,0 +1,145 @@
import * as https from "https";
import * as dom from "fake-dom"
import * as fs from "fs";
class Main {
public static async main() {
if (dom === undefined) {
console.log("Fakedom not loaded")
}
const csvEntries: string[] = ["issue, twitter and nitter, en.osm.town, other mastodon"]
const targetYear = (new Date()).getUTCFullYear()
for (let year = 2020; year <= targetYear; year++) {
for (let month = 1; month <= 12; month++) {
console.log("Analyzing",year, month)
if (year === targetYear && month > (new Date().getUTCMonth() + 1)) {
console.log("Stopping now")
break
}
let m = "" + month
if (m.length == 1) {
m = "0" + m
}
const baseUrl = `https://weeklyosm.eu/archives/date/${year}/${m}`
let r: { twitter: number, osmtown: number, mastodon: number } = {
twitter: 0,
osmtown: 0,
mastodon: 0
}
try {
const issue = await Main.analyse(baseUrl)
r = Main.sum(r, issue)
} catch (e) {
console.log("SKipping ", baseUrl)
}
for (let i = 5; i >= 2; i--) {
try {
const issue = await Main.analyse(baseUrl + "/page/" + i)
r = Main.sum(r, issue)
} catch (e) {
console.log("SKipping ", baseUrl + "/page/" + i)
}
}
const entry = year + "-" + month + "," + r.twitter + "," + r.osmtown + "," + r.mastodon
csvEntries.push(entry)
}
}
fs.writeFileSync("entries.csv", csvEntries.join("\n"))
}
private static sum<X extends Record<string, number>>(a: X, b: X): X {
const r: X = {
...a
}
for (const key in b) {
// @ts-ignore
a[key] = (a[key] ?? 0) + (b[key] ?? 0)
}
return a
}
private static async Download(url: string, headers?: any): Promise<{ content: string }> {
const cache = "./cache/" + url.replace(/[./\\:?]/g, "_")
if (fs.existsSync(cache)) {
return {content: fs.readFileSync(cache, {encoding: "utf-8"})}
}
console.log("> Downloading", url)
return new Promise((resolve, reject) => {
try {
headers = headers ?? {}
headers.accept = "application/json"
const urlObj = new URL(url)
https.get(
{
host: urlObj.host,
path: urlObj.pathname + urlObj.search,
port: urlObj.port,
headers: headers,
},
(res) => {
const parts: string[] = []
res.setEncoding("utf8")
res.on("data", function (chunk) {
// @ts-ignore
parts.push(chunk)
})
res.addListener("end", function () {
fs.writeFileSync(cache, parts.join(""))
resolve({content: parts.join("")})
})
}
)
} catch (e) {
reject(e)
}
})
}
private static async analyse(url: string): Promise<{ twitter: number, osmtown: number, mastodon: number }> {
const data = await this.Download(url)
const doc = document.createElement("html")
doc.innerHTML = data.content
const article = doc.getElementsByTagName("article")[0]
const asides = Array.from(article.getElementsByTagName("aside"))
for (const aside of asides) {
aside.parentElement.removeChild(aside)
}
const links = Array.from(article.getElementsByTagName("a"))
const hosts: Record<string, number> = {}
for (const link of links) {
const url = new URL(link.href)
hosts[url.host] = 1 + (hosts[url.host] ?? 0)
}
const result = {
twitter: (hosts["twitter.com"] ?? 0) + (hosts["nitter.net"] ?? 0),
osmtown: hosts["en.osm.town"] ?? 0,
mastodon: 0
}
for (let host in hosts) {
const count = hosts[host]
host = host.toLowerCase()
if (host.endsWith("translate.goog")) {
continue
}
if (host.indexOf('masto') >= 0 || host.indexOf('mapstodon') >= 0 || host.endsWith(".social") || host.endsWith(".town") || host.endsWith("botsin.space")) {
console.log(host)
result.mastodon += count
}
}
return result
}
}
Main.main().then(_ => console.log("All done"))

1542
package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

16
package.json Normal file
View file

@ -0,0 +1,16 @@
{
"name": "weeklyanalysis",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "ts-node index.ts"
},
"author": "",
"license": "ISC",
"dependencies": {
"fake-dom": "^1.0.4",
"ts-node": "^10.9.1",
"typescript": "^4.9.4"
}
}

5
tsconfig.json Normal file
View file

@ -0,0 +1,5 @@
{
"compilerOptions": {
"lib": ["DOM"]
}
}