MapComplete/scripts/schools/amendSchoolData.ts

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

365 lines
11 KiB
TypeScript
Raw Normal View History

2022-09-08 21:40:48 +02:00
import { parse } from "csv-parse/sync"
import { readFileSync, writeFileSync } from "fs"
import { Utils } from "../../Utils"
import { GeoJSONObject, geometry } from "@turf/turf"
2022-06-13 17:48:29 +02:00
function parseAndClean(filename: string): Record<any, string>[] {
const csvOptions = {
columns: true,
skip_empty_lines: true,
2022-09-08 21:40:48 +02:00
trim: true,
2022-06-13 17:48:29 +02:00
}
const records: Record<any, string>[] = parse(readFileSync(filename), csvOptions)
2022-09-08 21:40:48 +02:00
return records.map((r) => {
2022-06-13 17:48:29 +02:00
for (const key of Object.keys(r)) {
if (r[key].endsWith("niet van toepassing")) {
delete r[key]
}
}
2022-09-08 21:40:48 +02:00
return r
2022-06-13 17:48:29 +02:00
})
}
const structuren = {
"Voltijds Gewoon Secundair Onderwijs": "secondary",
"Gewoon Lager Onderwijs": "primary",
"Gewoon Kleuteronderwijs": "kindergarten",
2022-09-08 21:40:48 +02:00
Kleuteronderwijs: "kindergarten",
2022-06-13 17:48:29 +02:00
"Buitengewoon Lager Onderwijs": "primary",
"Buitengewoon Secundair Onderwijs": "secondary",
"Buitengewoon Kleuteronderwijs": "kindergarten",
2022-09-08 21:40:48 +02:00
"Deeltijds Beroepssecundair Onderwijs": "secondary",
2022-06-13 17:48:29 +02:00
}
2022-06-24 03:36:51 +02:00
const degreesMapping = {
2022-09-08 21:40:48 +02:00
"Derde graad": "upper_secondary",
"Tweede graad": "middle_secondary",
"Eerste graad": "lower_secondary",
2022-06-24 03:36:51 +02:00
}
2022-09-08 21:40:48 +02:00
const classificationOrder = [
"kindergarten",
"primary",
"secondary",
"lower_secondary",
"middle_secondary",
"upper_secondary",
]
2022-06-24 03:36:51 +02:00
const stelselsMapping = {
2022-09-08 21:40:48 +02:00
"Beide stelsels": "linear_courses;modular_courses",
"Lineair stelsel": "linear_courses",
"Modulair stelsel": "modular_courses",
2022-06-24 03:36:51 +02:00
}
2022-09-08 21:40:48 +02:00
const rmKeys = [
"schoolnummer",
"instellingstype",
"adres",
"begindatum",
"hoofdzetel",
"huisnummer",
"kbo-nummer",
"beheerder(s)",
"bestuur",
"clb",
"ingerichte hoofdstructuren",
"busnummer",
"crab-code",
"crab-huisnr",
"einddatum",
"fax",
"gemeente",
"intern_vplnummer",
"kbo_nummer",
"lx",
"ly",
"niscode",
"onderwijsniveau",
"onderwijsvorm",
"scholengemeenschap",
"postcode",
"provincie",
"provinciecode",
"soort instelling",
"status erkenning",
"straat",
"VWO-vestigingsplaatscode",
"taalstelsel",
"net",
]
2022-06-13 17:48:29 +02:00
const rename = {
2022-09-08 21:40:48 +02:00
"e-mail": "email",
naam: "name",
telefoon: "phone",
2022-06-13 17:48:29 +02:00
}
2022-09-08 21:40:48 +02:00
function fuzzIdenticals(features: { geometry: { coordinates: [number, number] } }[]) {
2022-06-24 03:36:51 +02:00
var seen = new Set<string>()
for (const feature of features) {
2022-09-08 21:40:48 +02:00
var coors = feature.geometry.coordinates
2022-06-24 03:36:51 +02:00
let k = coors[0] + "," + coors[1]
2022-09-08 21:40:48 +02:00
while (seen.has(k)) {
2022-06-24 03:36:51 +02:00
coors[0] += 0.00025
k = coors[0] + "," + coors[1]
}
seen.add(k)
}
}
/**
* Sorts classifications in order
2022-09-08 21:40:48 +02:00
*
2022-06-24 03:36:51 +02:00
* sortClassifications(["primary","secondary","kindergarten"] // => ["kindergarten", "primary", "secondary"]
*/
2022-09-08 21:40:48 +02:00
function sortClassifications(classification: string[]) {
return classification.sort(
(a, b) => classificationOrder.indexOf(a) - classificationOrder.indexOf(b)
)
2022-06-24 03:36:51 +02:00
}
2022-06-13 17:48:29 +02:00
function main() {
2022-06-24 03:36:51 +02:00
console.log("Parsing schools...")
2022-06-13 17:48:29 +02:00
const aantallen = "/home/pietervdvn/Downloads/Scholen/aantallen.csv"
const perSchool = "/home/pietervdvn/Downloads/Scholen/perschool.csv"
2022-09-08 21:40:48 +02:00
const schoolfields = [
"schoolnummer",
"intern_vplnummer",
"net",
"naam",
"hoofdzetel",
"adres",
"straat",
"huisnummer",
"busnummer",
"postcode",
"gemeente",
"niscode",
"provinciecode",
"provincie",
"VWO-vestigingsplaatscode",
"crab-code",
"crab-huisnr",
"lx",
"ly",
"kbo-nummer",
"telefoon",
"fax",
"e-mail",
"website",
"beheerder(s)",
"soort instelling",
"onderwijsniveau",
"instellingstype",
"begindatum",
"einddatum",
"status erkenning",
"clb",
"bestuur",
"scholengemeenschap",
"taalstelsel",
"ingerichte hoofdstructuren",
] as const
2022-06-13 17:48:29 +02:00
const schoolGeojson: {
features: {
2022-09-08 21:40:48 +02:00
properties: Record<typeof schoolfields[number], string>
geometry: {
type: "Point"
coordinates: [number, number]
2022-06-24 03:36:51 +02:00
}
2022-06-13 17:48:29 +02:00
}[]
2022-06-24 03:36:51 +02:00
} = JSON.parse(readFileSync("scripts/schools/scholen.geojson", "utf8"))
2022-09-08 21:40:48 +02:00
2022-06-24 03:36:51 +02:00
fuzzIdenticals(schoolGeojson.features)
2022-06-13 17:48:29 +02:00
2022-09-08 21:40:48 +02:00
const aantallenFields = [
"schooljaar",
"nr koepel",
"koepel",
"instellingscode",
"intern volgnr vpl",
"volgnr vpl",
"naam instelling",
"GON-school",
"GOK-school",
"instellingsnummer scholengemeenschap",
"scholengemeenschap",
"code schoolbestuur",
"schoolbestuur",
"type vestigingsplaats",
"fusiegemeente hoofdvestigingsplaats",
"straatnaam vestigingsplaats",
"huisnr vestigingsplaats",
"bus vestigingsplaats",
"postcode vestigingsplaats",
"deelgemeente vestigingsplaats",
"fusiegemeente vestigingsplaats",
"hoofdstructuur (code)",
"hoofdstructuur",
"administratieve groep (code)",
"administratieve groep",
"graad lager onderwijs",
"pedagogische methode",
"graad secundair onderwijs",
"leerjaar",
"A of B-stroom",
"basisopties",
"beroepenveld",
"onderwijsvorm",
"studiegebied",
"studierichting",
"stelsel",
"okan cluster",
"type buitengewoon onderwijs",
"opleidingsvorm (code)",
"opleidingsvorm",
"fase",
"opleidingen",
"geslacht",
"aantal inschrijvingen",
] as const
const aantallenParsed: Record<typeof aantallenFields[number], string>[] =
parseAndClean(aantallen)
const perschoolFields = [
"schooljaar",
"nr koepel",
"koepel",
"instellingscode",
"naam instelling",
"straatnaam",
"huisnr",
"bus",
"postcode",
"deelgemeente",
"fusiegemeente",
"aantal inschrijvingen",
] as const
const perschoolParsed: Record<typeof perschoolFields[number], string>[] =
parseAndClean(perSchool)
2022-06-13 17:48:29 +02:00
schoolGeojson.features = schoolGeojson.features
2022-09-08 21:40:48 +02:00
.filter((sch) => sch.properties.lx != "0" && sch.properties.ly != "0")
.filter((sch) => sch.properties.instellingstype !== "Universiteit")
2022-06-13 17:48:29 +02:00
2022-06-24 03:36:51 +02:00
const c = schoolGeojson.features.length
console.log("Got ", schoolGeojson.features.length, "items after filtering")
let i = 0
2022-09-08 21:40:48 +02:00
let lastWrite = 0
2022-06-13 17:48:29 +02:00
for (const feature of schoolGeojson.features) {
2022-06-24 03:36:51 +02:00
i++
2022-09-08 21:40:48 +02:00
const now = Date.now()
if (now - lastWrite > 1000) {
lastWrite = now
console.log("Processing " + i + "/" + c)
2022-06-24 03:36:51 +02:00
}
2022-06-13 17:48:29 +02:00
const props = feature.properties
2022-09-08 21:40:48 +02:00
const aantallen = aantallenParsed.filter((i) => i.instellingscode == props.schoolnummer)
2022-06-13 17:48:29 +02:00
2022-09-08 21:40:48 +02:00
if (aantallen.length > 0) {
const fetch = (key: typeof aantallenFields[number]) =>
Utils.NoNull(Utils.Dedup(aantallen.map((x) => x[key])))
2022-06-13 17:48:29 +02:00
props["onderwijsvorm"] = fetch("onderwijsvorm").join(";")
2022-06-24 03:36:51 +02:00
/*
2022-06-13 17:48:29 +02:00
const gonSchool = aantallen.some(x => x["GON-school"] === "GON-school")
const gokSchool = aantallen.some(x => x["GOK-school"] === "GON-school")
const onderwijsvorm = fetch("onderwijsvorm")
2022-06-16 19:48:05 +02:00
const koepel = fetch("koepel")
2022-06-24 03:36:51 +02:00
const stelsel = fetch("stelsel").join(";")
2022-06-16 19:48:05 +02:00
const scholengemeenschap = fetch("scholengemeenschap")
2022-06-24 03:36:51 +02:00
*/
const hoofdstructuur = fetch("hoofdstructuur")
2022-06-13 17:48:29 +02:00
let specialEducation = false
2022-09-08 21:40:48 +02:00
let classification = hoofdstructuur.map((s) => {
2022-06-13 17:48:29 +02:00
const v = structuren[s]
if (s.startsWith("Buitengewoon")) {
2022-09-08 21:40:48 +02:00
specialEducation = true
2022-06-13 17:48:29 +02:00
}
if (v === undefined) {
console.error("Type not found: " + s)
return ""
}
return v
})
2022-06-24 03:36:51 +02:00
const graden = fetch("graad secundair onderwijs")
2022-09-08 21:40:48 +02:00
if (classification[0] === "secondary") {
if (graden.length !== 3) {
classification = graden.map((degree) => degreesMapping[degree])
2022-06-24 03:36:51 +02:00
}
}
sortClassifications(classification)
2022-06-13 17:48:29 +02:00
props["school"] = Utils.Dedup(classification).join("; ")
2022-06-24 03:36:51 +02:00
// props["koepel"] = koepel.join(";")
// props["scholengemeenschap"] = scholengemeenschap.join(";")
// props["stelsel"] = stelselsMapping[stelsel]
2022-09-08 21:40:48 +02:00
2022-06-13 17:48:29 +02:00
if (specialEducation) {
props["school:for"] = "special_education"
}
if (props.taalstelsel === "Nederlandstalig") {
props["language:nl"] = "yes"
}
2022-09-08 21:40:48 +02:00
if (props.instellingstype === "Instelling voor deeltijds kunstonderwijs") {
props["amenity"] = "college"
2022-06-13 17:48:29 +02:00
props["school:subject"] = "art"
}
}
2022-09-08 21:40:48 +02:00
const schoolinfo = perschoolParsed.filter((i) => i.instellingscode == props.schoolnummer)
2022-06-13 17:48:29 +02:00
if (schoolinfo.length == 0) {
// pass
} else if (schoolinfo.length == 1) {
2022-09-08 21:40:48 +02:00
props["capacity"] = schoolinfo[0]["aantal inschrijvingen"]
.split(";")
.map((i) => Number(i))
.reduce((sum, i) => sum + i, 0)
2022-06-13 17:48:29 +02:00
} else {
throw "Multiple schoolinfo's found for " + props.schoolnummer
}
2022-09-08 21:40:48 +02:00
2022-06-24 03:36:51 +02:00
//props["source:ref"] = props.schoolnummer
2022-09-08 21:40:48 +02:00
props["amenity"] = "school"
if (props["school"] === "kindergarten") {
2022-06-24 03:36:51 +02:00
props["amenity"] = "kindergarten"
props["isced:2011:level"] = "early_education"
delete props["school"]
}
2022-06-13 17:48:29 +02:00
for (const renameKey in rename) {
const into = rename[renameKey]
2022-09-08 21:40:48 +02:00
if (props[renameKey] !== undefined) {
2022-06-13 17:48:29 +02:00
props[into] = props[renameKey]
delete props[renameKey]
}
}
2022-09-08 21:40:48 +02:00
2022-06-13 17:48:29 +02:00
for (const rmKey of rmKeys) {
delete props[rmKey]
}
}
2022-09-08 21:40:48 +02:00
2022-06-24 03:36:51 +02:00
//schoolGeojson.features = schoolGeojson.features.filter(f => f.properties["capacity"] !== undefined)
/*schoolGeojson.features.forEach((f, i) => {
f.properties["id"] = "school/"+i
})*/
2022-09-08 21:40:48 +02:00
schoolGeojson.features = schoolGeojson.features.filter(
(f) => f.properties["amenity"] === "kindergarten"
)
2022-06-24 03:36:51 +02:00
writeFileSync("scripts/schools/amended_schools.geojson", JSON.stringify(schoolGeojson), "utf8")
console.log("Done")
2022-06-13 17:48:29 +02:00
}
2022-09-08 21:40:48 +02:00
if (!process.argv[1].endsWith("mocha")) {
2022-06-27 23:57:04 +02:00
main()
}