first step in downloading past ATP data

2025-05-13 05:03:09 +02:00 · 2025-03-07 05:01:24 +01:00 · 2025-03-07 05:01:24 +01:00 · a489b32546
commit a489b32546
parent 05e8565893
1 changed files with 37 additions and 0 deletions
--- a/downloader_older_atp_data.py
+++ b/downloader_older_atp_data.py
@ -0,0 +1,37 @@
+import rich
+import osm_bot_abstraction_layer.util_download_file
+import os
+import requests
+config = __import__("0_config")
+
+def main():
+    # TODO: cache that
+    response = requests.get("https://data.alltheplaces.xyz/runs/history.json", timeout=10)
+    rich.print(response.json())
+    for entry in response.json()[::-1]:
+        print(entry['run_id'])
+
+# https://data.alltheplaces.xyz/runs/history.json
+# https://alltheplaces-data.openaddresses.io/runs/2025-01-11-13-32-30/output/zabka_pl.geojson
+# zabka is unstable - count past entries
+# {"type":"FeatureCollection","dataset_attributes":{"@spider":"zabka_pl","spider:collection_time":"2025-01-13T01:49:37.648114","spider:robots_txt":"ignored"}
+# filter to ones in Kraków
+
+#osm_bot_abstraction_layer.util_download_file.download_file_if_not_present_already(download_url, config.atp_cache_folder(), filename)
+
+# TODO: copied existing code, modify it
+def download_entire_atp_dataset():
+    FULL_ATP_FOLDER = config.atp_cache_folder()
+    if os.path.isdir(FULL_ATP_FOLDER) == False:
+        os.makedirs(FULL_ATP_FOLDER)
+    if os.path.isdir(config.atp_unpacked_folder()) == False:
+        response = requests.get("https://data.alltheplaces.xyz/runs/latest.json", timeout=10)
+        run_id = response.json()['run_id']
+        print(run_id)
+        download_url = "https://alltheplaces-data.openaddresses.io/runs/" + run_id + "/output.zip"
+        filename = "entire_atp.zip"
+        osm_bot_abstraction_layer.util_download_file.download_file_if_not_present_already(download_url, config.atp_cache_folder(), filename)
+        os.system('unzip "' + config.atp_cache_folder() + filename + '" -d "' + config.atp_cache_folder() + '"')
+
+if __name__ == "__main__":
+    main()