From f246afe3db95299fc06d8207ee8922d53ba9e940 Mon Sep 17 00:00:00 2001 From: pietervdvn Date: Mon, 13 Jun 2022 17:48:29 +0200 Subject: [PATCH] Add and process school data --- package-lock.json | 11 +++ package.json | 4 +- scripts/csvToGeojson.ts | 89 ++++++++++++++++++ scripts/schools/README.md | 3 + scripts/schools/amendSchoolData.ts | 144 +++++++++++++++++++++++++++++ 5 files changed, 250 insertions(+), 1 deletion(-) create mode 100644 scripts/csvToGeojson.ts create mode 100644 scripts/schools/README.md create mode 100644 scripts/schools/amendSchoolData.ts diff --git a/package-lock.json b/package-lock.json index 2df8f425f..827c62ae0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -27,6 +27,7 @@ "@types/wikidata-sdk": "^6.1.0", "@types/xml2js": "^0.4.9", "country-language": "^0.1.7", + "csv-parse": "^5.1.0", "doctest-ts-improved": "^0.8.8", "email-validator": "^2.0.4", "escape-html": "^1.0.3", @@ -5490,6 +5491,11 @@ "cssom": "0.3.x" } }, + "node_modules/csv-parse": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.1.0.tgz", + "integrity": "sha512-JL+Q6YEikT2uoe57InjFFa6VejhSv0tDwOxeQ1bVQKeUC/NCnLAAZ8n3PzowPQQLuZ37fysDYZipB2UJkH9C6A==" + }, "node_modules/currently-unhandled": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/currently-unhandled/-/currently-unhandled-0.4.1.tgz", @@ -21041,6 +21047,11 @@ "cssom": "0.3.x" } }, + "csv-parse": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.1.0.tgz", + "integrity": "sha512-JL+Q6YEikT2uoe57InjFFa6VejhSv0tDwOxeQ1bVQKeUC/NCnLAAZ8n3PzowPQQLuZ37fysDYZipB2UJkH9C6A==" + }, "currently-unhandled": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/currently-unhandled/-/currently-unhandled-0.4.1.tgz", diff --git a/package.json b/package.json index 266b05722..3f1505cd5 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,8 @@ "weblate-add-upstream": "git remote add weblate-github git@github.com:weblate/MapComplete.git", "weblate-fix": "git remote update weblate-github ; git merge weblate-github/weblate-mapcomplete-core; git merge weblate-github/weblate-mapcomplete-layers ; git merge weblate-github/weblate-mapcomplete-layer-translations", "weblate-fix-heavy": "git remote rm weblate-layers; git remote add weblate-layers https://hosted.weblate.org/git/mapcomplete/layers/; git remote update weblate-layers; git merge weblate-layers/master", - "housekeeping": "npm run generate && npm run generate:docs && npm run generate:contributor-list && git commit assets/ langs/ Docs/ -m 'Housekeeping...'" + "housekeeping": "npm run generate && npm run generate:docs && npm run generate:contributor-list && git commit assets/ langs/ Docs/ -m 'Housekeeping...'", + "parseSchools": "cd scripts/schools && ts-node amendSchoolData.ts" }, "keywords": [ "OpenStreetMap", @@ -83,6 +84,7 @@ "@types/wikidata-sdk": "^6.1.0", "@types/xml2js": "^0.4.9", "country-language": "^0.1.7", + "csv-parse": "^5.1.0", "doctest-ts-improved": "^0.8.8", "email-validator": "^2.0.4", "escape-html": "^1.0.3", diff --git a/scripts/csvToGeojson.ts b/scripts/csvToGeojson.ts new file mode 100644 index 000000000..d0a7fe09f --- /dev/null +++ b/scripts/csvToGeojson.ts @@ -0,0 +1,89 @@ +import {parse} from 'csv-parse/sync'; +import {readFileSync} from "fs"; + +var lambert72toWGS84 = function(x, y){ + + var newLongitude, newLatitude; + + var n = 0.77164219, + F = 1.81329763, + thetaFudge = 0.00014204, + e = 0.08199189, + a = 6378388, + xDiff = 149910, + yDiff = 5400150, + theta0 = 0.07604294; + + var xReal = xDiff - x, + yReal = yDiff - y; + + var rho = Math.sqrt(xReal * xReal + yReal * yReal), + theta = Math.atan(xReal / -yReal); + + newLongitude = (theta0 + (theta + thetaFudge) / n) * 180 / Math.PI; + newLatitude = 0; + + for (var i = 0; i < 5 ; ++i) { + newLatitude = (2 * Math.atan(Math.pow(F * a / rho, 1 / n) * Math.pow((1 + e * Math.sin(newLatitude)) / (1 - e * Math.sin(newLatitude)), e / 2))) - Math.PI / 2; + } + newLatitude *= 180 / Math.PI; + return [newLongitude, newLatitude]; + +} + +function main(args: string[]): void { + + + + if (args.length == 0) { + /* args = ["/home/pietervdvn/Downloads/Scholen/aantallen.csv", + "/home/pietervdvn/Downloads/Scholen/perschool.csv", + "/home/pietervdvn/Downloads/Scholen/Vestigingsplaatsen-van-scholen-gewoon-secundair-onderwijs-cleaned.csv"] + */ + console.log("Usage: csvToGeojson input.csv name-of-lat-field name-of-lon-field") + return + } + + let file = args[0] + if(file.startsWith("file://")){ + file = file.substr("file://".length) + } + const latField = args[1] + const lonField = args[2] + + const csvOptions = { + columns: true, + skip_empty_lines: true, + trim: true + } + + const csv: Record[] = parse(readFileSync(file), csvOptions) + + const features = csv.map((csvElement, i) => { + const lat = Number(csvElement[latField]) + const lon = Number(csvElement[lonField]) + if(isNaN(lat) || isNaN(lon)){ + throw `Not a valid lat or lon for entry ${i}: ${JSON.stringify(csvElement)}` + } + + + + return { + type: "Feature", + properties: csvElement, + geometry: { + type: "Point", + coordinates: lambert72toWGS84(lon, lat) + } + } + + }) + + console.log(JSON.stringify({ + type: "FeatureCollection", + features + })) + +} + +main(process.argv.slice(2)) \ No newline at end of file diff --git a/scripts/schools/README.md b/scripts/schools/README.md new file mode 100644 index 000000000..f2e3d1a02 --- /dev/null +++ b/scripts/schools/README.md @@ -0,0 +1,3 @@ +# Little scripts to parse Belgian school data + + diff --git a/scripts/schools/amendSchoolData.ts b/scripts/schools/amendSchoolData.ts new file mode 100644 index 000000000..e41d0f642 --- /dev/null +++ b/scripts/schools/amendSchoolData.ts @@ -0,0 +1,144 @@ +import {parse} from 'csv-parse/sync'; +import {readFileSync, writeFileSync} from "fs"; +import {Utils} from "../../Utils"; + +function parseAndClean(filename: string): Record[] { + const csvOptions = { + columns: true, + skip_empty_lines: true, + trim: true + } + const records: Record[] = parse(readFileSync(filename), csvOptions) + return records.map(r => { + + for (const key of Object.keys(r)) { + if (r[key].endsWith("niet van toepassing")) { + delete r[key] + } + } + + return r; + }) +} + +const structuren = { + "Voltijds Gewoon Secundair Onderwijs": "secondary", + "Gewoon Lager Onderwijs": "primary", + "Gewoon Kleuteronderwijs": "kindergarten", + "Kleuteronderwijs": "kindergarten", + "Buitengewoon Lager Onderwijs": "primary", + "Buitengewoon Secundair Onderwijs": "secondary", + "Buitengewoon Kleuteronderwijs": "kindergarten", + "Deeltijds Beroepssecundair Onderwijs": "secondary" + +} + +const rmKeys = ["schoolnummer", "instellingstype", + "adres", "begindatum","hoofdzetel","huisnummer","kbo-nummer", + "beheerder(s)", "bestuur", "clb", "ingerichte hoofdstructuren", "busnummer", "crab-code", "crab-huisnr", + "einddatum", "fax", "gemeente", "intern_vplnummer", "kbo_nummer", "lx", "ly", "niscode", + "onderwijsniveau","onderwijsvorm","scholengemeenschap", + "postcode", "provincie", + "provinciecode", "soort instelling", "status erkenning", "straat", "VWO-vestigingsplaatscode", "taalstelsel", +"net"] + +const rename = { + "e-mail":"email", + "naam":"name", + "telefoon":"phone" + +} + +function main() { + const aantallen = "/home/pietervdvn/Downloads/Scholen/aantallen.csv" + const perSchool = "/home/pietervdvn/Downloads/Scholen/perschool.csv" + + const schoolfields = ["schoolnummer", "intern_vplnummer", "net", "naam", "hoofdzetel", "adres", "straat", "huisnummer", "busnummer", "postcode", "gemeente", "niscode", "provinciecode", "provincie", "VWO-vestigingsplaatscode", "crab-code", "crab-huisnr", "lx", "ly", "kbo-nummer", "telefoon", "fax", "e-mail", "website", "beheerder(s)", "soort instelling", "onderwijsniveau", "instellingstype", "begindatum", "einddatum", "status erkenning", "clb", "bestuur", "scholengemeenschap", "taalstelsel", "ingerichte hoofdstructuren"] as const + + const schoolGeojson: { + features: { + properties: Record<(typeof schoolfields)[number], string> + }[] + } = JSON.parse(readFileSync("scholen.geojson", "utf8")) + + const aantallenFields = ["schooljaar", "nr koepel", "koepel", "instellingscode", "intern volgnr vpl", "volgnr vpl", "naam instelling", "GON-school", "GOK-school", "instellingsnummer scholengemeenschap", "scholengemeenschap", "code schoolbestuur", "schoolbestuur", "type vestigingsplaats", "fusiegemeente hoofdvestigingsplaats", "straatnaam vestigingsplaats", "huisnr vestigingsplaats", "bus vestigingsplaats", "postcode vestigingsplaats", "deelgemeente vestigingsplaats", "fusiegemeente vestigingsplaats", "hoofdstructuur (code)", "hoofdstructuur", "administratieve groep (code)", "administratieve groep", "graad lager onderwijs", "pedagogische methode", "graad secundair onderwijs", "leerjaar", "A of B-stroom", "basisopties", "beroepenveld", "onderwijsvorm", "studiegebied", "studierichting", "stelsel", "okan cluster", "type buitengewoon onderwijs", "opleidingsvorm (code)", "opleidingsvorm", "fase", "opleidingen", "geslacht", "aantal inschrijvingen"] as const + const aantallenParsed: Record<(typeof aantallenFields)[number], string>[] = parseAndClean(aantallen) + const perschoolFields = ["schooljaar", "nr koepel", "koepel", "instellingscode", "naam instelling", "straatnaam", "huisnr", "bus", "postcode", "deelgemeente", "fusiegemeente", "aantal inschrijvingen"] as const + const perschoolParsed: Record<(typeof perschoolFields)[number], string>[] = parseAndClean(perSchool) + + schoolGeojson.features = schoolGeojson.features + .filter(sch => sch.properties.lx != "0" && sch.properties.ly != "0") + .filter(sch => sch.properties.instellingstype !== "Universiteit") + + for (const feature of schoolGeojson.features) { + + const props = feature.properties + + const aantallen = aantallenParsed.filter(i => i.instellingscode == props.schoolnummer) + + if (aantallen.length > 0) { + + const fetch = (key: (typeof aantallenFields)[number]) => Utils.NoNull(Utils.Dedup(aantallen.map(x => x[key]))) + + props["onderwijsvorm"] = fetch("onderwijsvorm").join(";") + + const gonSchool = aantallen.some(x => x["GON-school"] === "GON-school") + const gokSchool = aantallen.some(x => x["GOK-school"] === "GON-school") + const hoofdstructuur = fetch("hoofdstructuur") + const onderwijsvorm = fetch("onderwijsvorm") + + let specialEducation = false + const classification = hoofdstructuur.map(s => { + const v = structuren[s] + if (s.startsWith("Buitengewoon")) { + specialEducation = true; + } + if (v === undefined) { + console.error("Type not found: " + s) + return "" + } + return v + }) + props["school"] = Utils.Dedup(classification).join("; ") + if (specialEducation) { + props["school:for"] = "special_education" + } + if (props.taalstelsel === "Nederlandstalig") { + props["language:nl"] = "yes" + } + + if(props.instellingstype === "Instelling voor deeltijds kunstonderwijs") { + props["amenity"] = "college" + props["school:subject"] = "art" + } + } + + const schoolinfo = perschoolParsed.filter(i => i.instellingscode == props.schoolnummer) + if (schoolinfo.length == 0) { + // pass + } else if (schoolinfo.length == 1) { + props["capacity"] = schoolinfo[0]["aantal inschrijvingen"].split(";").map(i => Number(i)).reduce((sum, i) => sum + i, 0) + } else { + throw "Multiple schoolinfo's found for " + props.schoolnummer + } + + props["source:ref"] = props.schoolnummer + + for (const renameKey in rename) { + const into = rename[renameKey] + if(props[renameKey] !== undefined){ + props[into] = props[renameKey] + delete props[renameKey] + } + } + + for (const rmKey of rmKeys) { + delete props[rmKey] + } + + } + writeFileSync("amended_schools.geojson", JSON.stringify(schoolGeojson), "utf8") + +} + +main() \ No newline at end of file