Add and process school data

This commit is contained in:
pietervdvn 2022-06-13 17:48:29 +02:00
parent 0de20c40a8
commit f246afe3db
5 changed files with 250 additions and 1 deletions

11
package-lock.json generated
View file

@ -27,6 +27,7 @@
"@types/wikidata-sdk": "^6.1.0", "@types/wikidata-sdk": "^6.1.0",
"@types/xml2js": "^0.4.9", "@types/xml2js": "^0.4.9",
"country-language": "^0.1.7", "country-language": "^0.1.7",
"csv-parse": "^5.1.0",
"doctest-ts-improved": "^0.8.8", "doctest-ts-improved": "^0.8.8",
"email-validator": "^2.0.4", "email-validator": "^2.0.4",
"escape-html": "^1.0.3", "escape-html": "^1.0.3",
@ -5490,6 +5491,11 @@
"cssom": "0.3.x" "cssom": "0.3.x"
} }
}, },
"node_modules/csv-parse": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.1.0.tgz",
"integrity": "sha512-JL+Q6YEikT2uoe57InjFFa6VejhSv0tDwOxeQ1bVQKeUC/NCnLAAZ8n3PzowPQQLuZ37fysDYZipB2UJkH9C6A=="
},
"node_modules/currently-unhandled": { "node_modules/currently-unhandled": {
"version": "0.4.1", "version": "0.4.1",
"resolved": "https://registry.npmjs.org/currently-unhandled/-/currently-unhandled-0.4.1.tgz", "resolved": "https://registry.npmjs.org/currently-unhandled/-/currently-unhandled-0.4.1.tgz",
@ -21041,6 +21047,11 @@
"cssom": "0.3.x" "cssom": "0.3.x"
} }
}, },
"csv-parse": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.1.0.tgz",
"integrity": "sha512-JL+Q6YEikT2uoe57InjFFa6VejhSv0tDwOxeQ1bVQKeUC/NCnLAAZ8n3PzowPQQLuZ37fysDYZipB2UJkH9C6A=="
},
"currently-unhandled": { "currently-unhandled": {
"version": "0.4.1", "version": "0.4.1",
"resolved": "https://registry.npmjs.org/currently-unhandled/-/currently-unhandled-0.4.1.tgz", "resolved": "https://registry.npmjs.org/currently-unhandled/-/currently-unhandled-0.4.1.tgz",

View file

@ -50,7 +50,8 @@
"weblate-add-upstream": "git remote add weblate-github git@github.com:weblate/MapComplete.git", "weblate-add-upstream": "git remote add weblate-github git@github.com:weblate/MapComplete.git",
"weblate-fix": "git remote update weblate-github ; git merge weblate-github/weblate-mapcomplete-core; git merge weblate-github/weblate-mapcomplete-layers ; git merge weblate-github/weblate-mapcomplete-layer-translations", "weblate-fix": "git remote update weblate-github ; git merge weblate-github/weblate-mapcomplete-core; git merge weblate-github/weblate-mapcomplete-layers ; git merge weblate-github/weblate-mapcomplete-layer-translations",
"weblate-fix-heavy": "git remote rm weblate-layers; git remote add weblate-layers https://hosted.weblate.org/git/mapcomplete/layers/; git remote update weblate-layers; git merge weblate-layers/master", "weblate-fix-heavy": "git remote rm weblate-layers; git remote add weblate-layers https://hosted.weblate.org/git/mapcomplete/layers/; git remote update weblate-layers; git merge weblate-layers/master",
"housekeeping": "npm run generate && npm run generate:docs && npm run generate:contributor-list && git commit assets/ langs/ Docs/ -m 'Housekeeping...'" "housekeeping": "npm run generate && npm run generate:docs && npm run generate:contributor-list && git commit assets/ langs/ Docs/ -m 'Housekeeping...'",
"parseSchools": "cd scripts/schools && ts-node amendSchoolData.ts"
}, },
"keywords": [ "keywords": [
"OpenStreetMap", "OpenStreetMap",
@ -83,6 +84,7 @@
"@types/wikidata-sdk": "^6.1.0", "@types/wikidata-sdk": "^6.1.0",
"@types/xml2js": "^0.4.9", "@types/xml2js": "^0.4.9",
"country-language": "^0.1.7", "country-language": "^0.1.7",
"csv-parse": "^5.1.0",
"doctest-ts-improved": "^0.8.8", "doctest-ts-improved": "^0.8.8",
"email-validator": "^2.0.4", "email-validator": "^2.0.4",
"escape-html": "^1.0.3", "escape-html": "^1.0.3",

89
scripts/csvToGeojson.ts Normal file
View file

@ -0,0 +1,89 @@
import {parse} from 'csv-parse/sync';
import {readFileSync} from "fs";
var lambert72toWGS84 = function(x, y){
var newLongitude, newLatitude;
var n = 0.77164219,
F = 1.81329763,
thetaFudge = 0.00014204,
e = 0.08199189,
a = 6378388,
xDiff = 149910,
yDiff = 5400150,
theta0 = 0.07604294;
var xReal = xDiff - x,
yReal = yDiff - y;
var rho = Math.sqrt(xReal * xReal + yReal * yReal),
theta = Math.atan(xReal / -yReal);
newLongitude = (theta0 + (theta + thetaFudge) / n) * 180 / Math.PI;
newLatitude = 0;
for (var i = 0; i < 5 ; ++i) {
newLatitude = (2 * Math.atan(Math.pow(F * a / rho, 1 / n) * Math.pow((1 + e * Math.sin(newLatitude)) / (1 - e * Math.sin(newLatitude)), e / 2))) - Math.PI / 2;
}
newLatitude *= 180 / Math.PI;
return [newLongitude, newLatitude];
}
function main(args: string[]): void {
if (args.length == 0) {
/* args = ["/home/pietervdvn/Downloads/Scholen/aantallen.csv",
"/home/pietervdvn/Downloads/Scholen/perschool.csv",
"/home/pietervdvn/Downloads/Scholen/Vestigingsplaatsen-van-scholen-gewoon-secundair-onderwijs-cleaned.csv"]
*/
console.log("Usage: csvToGeojson input.csv name-of-lat-field name-of-lon-field")
return
}
let file = args[0]
if(file.startsWith("file://")){
file = file.substr("file://".length)
}
const latField = args[1]
const lonField = args[2]
const csvOptions = {
columns: true,
skip_empty_lines: true,
trim: true
}
const csv: Record<any, string>[] = parse(readFileSync(file), csvOptions)
const features = csv.map((csvElement, i) => {
const lat = Number(csvElement[latField])
const lon = Number(csvElement[lonField])
if(isNaN(lat) || isNaN(lon)){
throw `Not a valid lat or lon for entry ${i}: ${JSON.stringify(csvElement)}`
}
return {
type: "Feature",
properties: csvElement,
geometry: {
type: "Point",
coordinates: lambert72toWGS84(lon, lat)
}
}
})
console.log(JSON.stringify({
type: "FeatureCollection",
features
}))
}
main(process.argv.slice(2))

View file

@ -0,0 +1,3 @@
# Little scripts to parse Belgian school data

View file

@ -0,0 +1,144 @@
import {parse} from 'csv-parse/sync';
import {readFileSync, writeFileSync} from "fs";
import {Utils} from "../../Utils";
function parseAndClean(filename: string): Record<any, string>[] {
const csvOptions = {
columns: true,
skip_empty_lines: true,
trim: true
}
const records: Record<any, string>[] = parse(readFileSync(filename), csvOptions)
return records.map(r => {
for (const key of Object.keys(r)) {
if (r[key].endsWith("niet van toepassing")) {
delete r[key]
}
}
return r;
})
}
const structuren = {
"Voltijds Gewoon Secundair Onderwijs": "secondary",
"Gewoon Lager Onderwijs": "primary",
"Gewoon Kleuteronderwijs": "kindergarten",
"Kleuteronderwijs": "kindergarten",
"Buitengewoon Lager Onderwijs": "primary",
"Buitengewoon Secundair Onderwijs": "secondary",
"Buitengewoon Kleuteronderwijs": "kindergarten",
"Deeltijds Beroepssecundair Onderwijs": "secondary"
}
const rmKeys = ["schoolnummer", "instellingstype",
"adres", "begindatum","hoofdzetel","huisnummer","kbo-nummer",
"beheerder(s)", "bestuur", "clb", "ingerichte hoofdstructuren", "busnummer", "crab-code", "crab-huisnr",
"einddatum", "fax", "gemeente", "intern_vplnummer", "kbo_nummer", "lx", "ly", "niscode",
"onderwijsniveau","onderwijsvorm","scholengemeenschap",
"postcode", "provincie",
"provinciecode", "soort instelling", "status erkenning", "straat", "VWO-vestigingsplaatscode", "taalstelsel",
"net"]
const rename = {
"e-mail":"email",
"naam":"name",
"telefoon":"phone"
}
function main() {
const aantallen = "/home/pietervdvn/Downloads/Scholen/aantallen.csv"
const perSchool = "/home/pietervdvn/Downloads/Scholen/perschool.csv"
const schoolfields = ["schoolnummer", "intern_vplnummer", "net", "naam", "hoofdzetel", "adres", "straat", "huisnummer", "busnummer", "postcode", "gemeente", "niscode", "provinciecode", "provincie", "VWO-vestigingsplaatscode", "crab-code", "crab-huisnr", "lx", "ly", "kbo-nummer", "telefoon", "fax", "e-mail", "website", "beheerder(s)", "soort instelling", "onderwijsniveau", "instellingstype", "begindatum", "einddatum", "status erkenning", "clb", "bestuur", "scholengemeenschap", "taalstelsel", "ingerichte hoofdstructuren"] as const
const schoolGeojson: {
features: {
properties: Record<(typeof schoolfields)[number], string>
}[]
} = JSON.parse(readFileSync("scholen.geojson", "utf8"))
const aantallenFields = ["schooljaar", "nr koepel", "koepel", "instellingscode", "intern volgnr vpl", "volgnr vpl", "naam instelling", "GON-school", "GOK-school", "instellingsnummer scholengemeenschap", "scholengemeenschap", "code schoolbestuur", "schoolbestuur", "type vestigingsplaats", "fusiegemeente hoofdvestigingsplaats", "straatnaam vestigingsplaats", "huisnr vestigingsplaats", "bus vestigingsplaats", "postcode vestigingsplaats", "deelgemeente vestigingsplaats", "fusiegemeente vestigingsplaats", "hoofdstructuur (code)", "hoofdstructuur", "administratieve groep (code)", "administratieve groep", "graad lager onderwijs", "pedagogische methode", "graad secundair onderwijs", "leerjaar", "A of B-stroom", "basisopties", "beroepenveld", "onderwijsvorm", "studiegebied", "studierichting", "stelsel", "okan cluster", "type buitengewoon onderwijs", "opleidingsvorm (code)", "opleidingsvorm", "fase", "opleidingen", "geslacht", "aantal inschrijvingen"] as const
const aantallenParsed: Record<(typeof aantallenFields)[number], string>[] = parseAndClean(aantallen)
const perschoolFields = ["schooljaar", "nr koepel", "koepel", "instellingscode", "naam instelling", "straatnaam", "huisnr", "bus", "postcode", "deelgemeente", "fusiegemeente", "aantal inschrijvingen"] as const
const perschoolParsed: Record<(typeof perschoolFields)[number], string>[] = parseAndClean(perSchool)
schoolGeojson.features = schoolGeojson.features
.filter(sch => sch.properties.lx != "0" && sch.properties.ly != "0")
.filter(sch => sch.properties.instellingstype !== "Universiteit")
for (const feature of schoolGeojson.features) {
const props = feature.properties
const aantallen = aantallenParsed.filter(i => i.instellingscode == props.schoolnummer)
if (aantallen.length > 0) {
const fetch = (key: (typeof aantallenFields)[number]) => Utils.NoNull(Utils.Dedup(aantallen.map(x => x[key])))
props["onderwijsvorm"] = fetch("onderwijsvorm").join(";")
const gonSchool = aantallen.some(x => x["GON-school"] === "GON-school")
const gokSchool = aantallen.some(x => x["GOK-school"] === "GON-school")
const hoofdstructuur = fetch("hoofdstructuur")
const onderwijsvorm = fetch("onderwijsvorm")
let specialEducation = false
const classification = hoofdstructuur.map(s => {
const v = structuren[s]
if (s.startsWith("Buitengewoon")) {
specialEducation = true;
}
if (v === undefined) {
console.error("Type not found: " + s)
return ""
}
return v
})
props["school"] = Utils.Dedup(classification).join("; ")
if (specialEducation) {
props["school:for"] = "special_education"
}
if (props.taalstelsel === "Nederlandstalig") {
props["language:nl"] = "yes"
}
if(props.instellingstype === "Instelling voor deeltijds kunstonderwijs") {
props["amenity"] = "college"
props["school:subject"] = "art"
}
}
const schoolinfo = perschoolParsed.filter(i => i.instellingscode == props.schoolnummer)
if (schoolinfo.length == 0) {
// pass
} else if (schoolinfo.length == 1) {
props["capacity"] = schoolinfo[0]["aantal inschrijvingen"].split(";").map(i => Number(i)).reduce((sum, i) => sum + i, 0)
} else {
throw "Multiple schoolinfo's found for " + props.schoolnummer
}
props["source:ref"] = props.schoolnummer
for (const renameKey in rename) {
const into = rename[renameKey]
if(props[renameKey] !== undefined){
props[into] = props[renameKey]
delete props[renameKey]
}
}
for (const rmKey of rmKeys) {
delete props[rmKey]
}
}
writeFileSync("amended_schools.geojson", JSON.stringify(schoolGeojson), "utf8")
}
main()