Add scraper to handle the new social links on the website

This commit is contained in:
Pieter Vander Vennet 2025-06-14 00:58:40 +02:00
parent 3977e3ca99
commit d5e20d6c82
6 changed files with 50 additions and 13 deletions

View file

@ -29,7 +29,7 @@ There are currently two bot accounts using this code:
If you want to be mentioned by this bot:
- [Edit your OSM profile], make it include `<a href='https://<mastodon-host>/@<your-username/' rel='me'>My fediverse acount</a>`
- [Edit your OSM profile], add your mastodon in the social links (or include `<a href='https://<mastodon-host>/@<your-username/' rel='me'>My fediverse acount</a>` in the description)
- Make edits with [MapComplete](https://mapcomplete.org)
### Optional: add a verified link
@ -47,7 +47,7 @@ You can indicate this to the bot in the following way:
### On Mastodon
- You can mute or block the bot so that you won't see the posts. Your user account on OpenStreetMap or your Mastodon-username will still be mentioned in the posts
- You can add the hashtag `#nobot` or `#no-mapcomplete-bot` to your profile description. The bot will not mention you with your Mastodon-handle, but it will still post your OSM-username, your pictures and a report of your contributions
- You can add the hashtag `#nobot` or `#no-mapcomplete-bot` to your profile description. The bot will not mention you with your Mastodon-handle, but it will **still post your OSM-username, your pictures and a report of your contributions**
### On your OSM-user profile:

View file

@ -16,7 +16,8 @@
"scripts": {
"build": "tsc",
"lint": "tslint --project ./tsconfig.json -t stylish",
"start": "ts-node src/index.ts"
"start": "ts-node src/index.ts",
"test": "ts-node src/test.ts"
},
"dependencies": {
"@types/node-fetch": "^2.6.2",

View file

@ -185,4 +185,17 @@ ${text.split("\n").map(txt => " > " + txt).join("\n")}`)
static totalLength(overview: string, rest: string[]) {
return overview.length + rest.join("\n").length + 1
}
private static notMastodon = ["wiki.openstreetmap.org", "hdyc.neis-one.org", "matrix.to"]
private static isMastodon = ["en.osm.town", ".social", "mstdn", "mastodon", "mapstodon"]
static isProbablyMastodonLink(link: string) {
if (this.isMastodon.some(white => link.indexOf(white) >= 0)) {
return true
}
if (!this.notMastodon.some(white => link.indexOf(white) >= 0)) {
return false
}
return true; // probably?
}
}

View file

@ -62,11 +62,12 @@ export default class OsmUserInfo {
if (nomention) {
return undefined
}
const mastodonLinks = await this.getMeLinks()
const meLinks = await this.getMeLinks()
const mastodonLinks = meLinks.filter(link => MastodonPoster.isProbablyMastodonLink(link))
if (mastodonLinks.length <= 0) {
return undefined
}
console.log("Got probable mastodon links", mastodonLinks,"down from", meLinks)
let mastodonlink = mastodonLinks[0]
while(mastodonlink.endsWith("/")){
@ -89,13 +90,30 @@ export default class OsmUserInfo {
return useraccount
}
/**
* Uses the HTML-interface of the OSM website, as that one includes the new social links parts
*/
public async scrapeDescription(): Promise<string> {
const userInfo = await this.getUserInfo()
const url = `${this._backend}user/${encodeURIComponent(userInfo.display_name)}`
const response = await fetch(url) // , {"user-agent":"Mastodon poster bot by Pietervdvn","contact-email":"pietervdvn@posteo.net","contact-osm":"Pieter Vander Vennet","contact-mastodon":"@pietervdvn@en.osm.town"});
return response.text()
}
/**
* Gets the 'href' of every link with `rel=me`
*/
public async getMeLinks(): Promise<string[]> {
const userdata = await this.getUserInfo()
const div = document.createElement("div")
div.innerHTML = userdata.description
const fullPageStr = await this.scrapeDescription()
const fullPage = document.createElement("div")
fullPage.innerHTML = fullPageStr
const contentbody = fullPage.getElementsByClassName("content-body")
const content = contentbody.item(0)
const div = Array.from(content.getElementsByClassName("row"))[0]
if(!div){
return []
}
const links = Array.from(div.getElementsByTagName("a"))
const meLinks = links.filter(link => link.getAttribute("rel")?.split(" ")?.indexOf("me") >= 0)
return meLinks.map(link => link.href.toString())
@ -114,14 +132,13 @@ export default class OsmUserInfo {
try {
this._userData = JSON.parse(fs.readFileSync(this._cachingPath, "utf8"))
return this._userData
// return this._userData
} catch (e) {
fs.unlinkSync(this._cachingPath)
}
}
}
const url = `${this._backend}api/0.6/user/${this._userId}.json`
console.log("Looking up OSM user info about ", this._userId)
const res = await Utils.DownloadJson(url);
this._userData = res.user
if (this._cachingPath !== undefined) {

View file

@ -277,11 +277,11 @@ export class Postbuilder {
}
private async createOverviewForContributor(uid: string, changesetsMade: ChangeSetData[]): Promise<string> {
const userinfo = new OsmUserInfo(Number(uid), this._globalConfig)
const inf = await userinfo.getUserInfo()
const themes = new Histogram(changesetsMade, cs => cs.properties.theme)
const userinfo = new OsmUserInfo(Number(uid), this._globalConfig)
const inf = await userinfo.getUserInfo()
let username = await userinfo.GetMastodonUsername(this._poster) ?? inf.display_name
const statistics = this.getStatisticsFor(changesetsMade)

6
src/test.ts Normal file
View file

@ -0,0 +1,6 @@
import OsmUserInfo from "./OsmUserInfo";
console.log("Hello world")
new OsmUserInfo(3818858).getMeLinks().then(links => console.log("Got links:", links))