mirror of
https://codeberg.org/matkoniecz/list_how_openstreetmap_can_be_improved_with_alltheplaces_data.git
synced 2025-05-13 05:03:09 +02:00
WIP from dying laptop, OSM-ATP comparison for recently edited OHs
This commit is contained in:
parent
854a8c3002
commit
481fc44b9c
8 changed files with 602 additions and 12 deletions
|
@ -529,6 +529,10 @@ def planet_download_folder():
|
|||
return cache_folder() + "planet_data/"
|
||||
|
||||
|
||||
def changeset_list_download_folder():
|
||||
return cache_folder() + "openstreetmap_all_changeset_data/"
|
||||
|
||||
|
||||
def output_folder():
|
||||
return os.getenv("OSM_ATM_MATCHER_OUTPUT_FOLDER") + "/"
|
||||
|
||||
|
|
|
@ -6,21 +6,16 @@ import os
|
|||
import osm_bot_abstraction_layer.util_download_file
|
||||
config = __import__("0_config")
|
||||
|
||||
def main():
|
||||
print(datetime.now().isoformat(timespec='minutes'))
|
||||
|
||||
def download_file_via_torrent(torrent_file_url, torrent_file_directory, torrent_file_name, file_download_folder):
|
||||
ses = libtorrent.session({'listen_interfaces': '0.0.0.0:6881'})
|
||||
|
||||
torrent_file_directory = config.cache_folder()
|
||||
filename = 'planet-latest.osm.pbf.torrent'
|
||||
if os.path.isfile(torrent_file_directory + filename):
|
||||
os.remove(torrent_file_directory + filename)
|
||||
print(torrent_file_directory + filename)
|
||||
url = 'https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf.torrent'
|
||||
osm_bot_abstraction_layer.util_download_file.download_file_if_not_present_already(url, torrent_file_directory, filename)
|
||||
if os.path.isfile(torrent_file_directory + torrent_file_name):
|
||||
os.remove(torrent_file_directory + torrent_file_name)
|
||||
print(torrent_file_directory + torrent_file_name)
|
||||
osm_bot_abstraction_layer.util_download_file.download_file_if_not_present_already(torrent_file_url, torrent_file_directory, torrent_file_name)
|
||||
|
||||
info = libtorrent.torrent_info(torrent_file_directory + filename)
|
||||
h = ses.add_torrent({'ti': info, 'save_path': config.planet_download_folder()})
|
||||
info = libtorrent.torrent_info(torrent_file_directory + torrent_file_name)
|
||||
h = ses.add_torrent({'ti': info, 'save_path': file_download_folder})
|
||||
s = h.status()
|
||||
print('starting', s.name)
|
||||
|
||||
|
@ -41,6 +36,15 @@ def main():
|
|||
time.sleep(1)
|
||||
|
||||
print(h.status().name, 'complete')
|
||||
|
||||
def main():
|
||||
print(datetime.now().isoformat(timespec='minutes'))
|
||||
download_file_via_torrent(
|
||||
torrent_file_url='https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf.torrent',
|
||||
torrent_file_directory=config.cache_folder(),
|
||||
torrent_file_name='planet-latest.osm.pbf.torrent',
|
||||
file_download_folder=config.planet_download_folder()
|
||||
)
|
||||
print(datetime.now().isoformat(timespec='minutes'))
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
85
changeset_parser_extracting_data_from_xml_line.php
Normal file
85
changeset_parser_extracting_data_from_xml_line.php
Normal file
|
@ -0,0 +1,85 @@
|
|||
<?php
|
||||
// obtained from https://github.com/matkoniecz/StreetComplete_usage_changeset_analysis
|
||||
|
||||
// assumptions
|
||||
// changesets are formatted as follows:
|
||||
// either (1)
|
||||
// line begins with
|
||||
// "<changeset" and ends with "/>"
|
||||
// and it is a changeset without tags
|
||||
// or (2)
|
||||
// line begins with '<changeset' and ends with '">'
|
||||
// tags, one in each line follows
|
||||
// ends with line including '</changeset>' as sole nonwhitespace text
|
||||
// applies
|
||||
|
||||
function value_of_key($line, $tag) {
|
||||
$left_stripped = str_replace("<tag k=\"" . $tag . "\" v=\"", "", $line);
|
||||
return str_replace('"/>', '', $left_stripped);
|
||||
}
|
||||
|
||||
function quest_tag_to_identifier($line) {
|
||||
return value_of_key($line, "StreetComplete:quest_type");
|
||||
}
|
||||
|
||||
function created_by_tag_to_identifier($line) {
|
||||
return value_of_key($line, "created_by");
|
||||
}
|
||||
|
||||
// from https://www.php.net/manual/en/function.substr-compare.php
|
||||
function str_begins($haystack, $needle) {
|
||||
return 0 === substr_compare($haystack, $needle, 0, strlen($needle));
|
||||
}
|
||||
|
||||
function str_ends($haystack, $needle) {
|
||||
return 0 === substr_compare($haystack, $needle, -strlen($needle));
|
||||
}
|
||||
|
||||
function contains_substr($mainStr, $str, $loc = false) {
|
||||
if ($loc === false) return (strpos($mainStr, $str) !== false);
|
||||
if (strlen($mainStr) < strlen($str)) return false;
|
||||
if (($loc + strlen($str)) > strlen($mainStr)) return false;
|
||||
return (strcmp(substr($mainStr, $loc, strlen($str)), $str) == 0);
|
||||
}
|
||||
|
||||
function get_changes_number($changeset_header) {
|
||||
if (preg_match("/num_changes=\"([0-9]+)\"/", $changeset_header, $matches)) {
|
||||
return (int)$matches[1];
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
function get_quest_type($changeset_header) {
|
||||
if (preg_match("/v=\"([^\"]+)\"/", $changeset_header, $matches)) {
|
||||
return $matches[1];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
function get_changeset_id($changeset_header) {
|
||||
if (preg_match("/ id=\"([0-9]+)\"/", $changeset_header, $matches)) {
|
||||
return (int)$matches[1];
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
function get_uid($changeset_header) {
|
||||
if (preg_match("/ uid=\"([0-9]+)\"/", $changeset_header, $matches)) {
|
||||
return (int)$matches[1];
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
function get_changeset_creation_date($changeset_header) {
|
||||
if (preg_match("/ created_at=\"([^\"]+)\"/", $changeset_header, $matches)) {
|
||||
return $matches[1];
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
|
@ -0,0 +1,112 @@
|
|||
<?php
|
||||
// obtained from https://github.com/matkoniecz/StreetComplete_usage_changeset_analysis
|
||||
|
||||
// special thanks to @Zverik for answering https://github.com/Zverik/editor-stats/issues/4
|
||||
// without this I would not expect processing such data to be feasible (changeset planet file can be read line by line)!
|
||||
|
||||
|
||||
// for assumptions being made about file format, allowing to process it without
|
||||
// parsing it as an XML, see extracting_data_from_xml_line.php file
|
||||
|
||||
require_once('changeset_parser_extracting_data_from_xml_line.php');
|
||||
|
||||
function main($input_filepath, $output_filepath) {
|
||||
$file = new SplFileObject($input_filepath);
|
||||
|
||||
$outputFile = fopen($output_filepath, "w") or die("Unable to open file!");
|
||||
fwrite($outputFile, "changeset_id" . "," . "editor" . "," . "changed_objects" . "," . "quest_type" . "," . "user_id" . "\n");
|
||||
|
||||
$popularity = array();
|
||||
// based on https://stackoverflow.com/questions/13246597/how-to-read-a-large-file-line-by-line
|
||||
// Loop until we reach the end of the file.
|
||||
while (!$file->eof()) {
|
||||
$line = trim($file->fgets());
|
||||
if ($line == "</changeset>") {
|
||||
#echo $line;
|
||||
#echo "end of a changeset with tags\n\n";
|
||||
$changeset_header = NULL;
|
||||
} elseif (str_begins($line, "<changeset")) {
|
||||
if(str_ends($line, '">')) {
|
||||
#echo $line;
|
||||
$changeset_header = $line;
|
||||
#echo "new changeset, with tags\n\n";
|
||||
} else {
|
||||
#echo $line;
|
||||
#echo "new changeset, without tags\n\n";
|
||||
}
|
||||
} else {
|
||||
if(str_begins($line, '<tag k="created_by"')) {
|
||||
if(contains_substr($line, "StreetComplete") || contains_substr($line, "zażółć")) {
|
||||
#echo $changeset_header;
|
||||
#echo "\n";
|
||||
#echo $line;
|
||||
#echo "\n";
|
||||
#echo "created by tag\n";
|
||||
}
|
||||
} elseif (str_begins($line, '<tag k="StreetComplete:quest_type"') || str_begins($line, '<tag k="zażółć:quest_type"')) {
|
||||
#echo $line;
|
||||
#echo "\n";
|
||||
#echo "quest type tag";
|
||||
#echo get_changes_number($changeset_header);
|
||||
#echo "\n";
|
||||
if(str_begins($line, '<tag k="StreetComplete:quest_type"')){
|
||||
$editor = "StreetComplete";
|
||||
} elseif(str_begins($line, '<tag k="zażółć:quest_type"')){
|
||||
$editor = "StreetComplete";
|
||||
} else {
|
||||
$editor = "?";
|
||||
}
|
||||
$id = get_changeset_id($changeset_header);
|
||||
$count = get_changes_number($changeset_header);
|
||||
$type = get_quest_type($line);
|
||||
$uid = get_uid($changeset_header);
|
||||
fwrite($outputFile, $id . "," . $editor . "," . $count . "," . $type . "," . $uid . "\n");
|
||||
$popularity = register_popularity($popularity, $type, get_changes_number($changeset_header));
|
||||
#var_dump($popularity);
|
||||
#echo "\n\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
arsort($popularity);
|
||||
foreach ($popularity as $quest_identifier => $total_edits) {
|
||||
echo "$quest_identifier : $total_edits\n";
|
||||
}
|
||||
|
||||
echo("\n");
|
||||
echo("\n");
|
||||
echo("\n");
|
||||
echo "| QuestCode | Total modified elements |\n";
|
||||
echo "| ------------- |-------------|\n";
|
||||
foreach ($popularity as $quest_identifier => $total_edits) {
|
||||
echo "| $quest_identifier | $total_edits |\n";
|
||||
}
|
||||
echo("\n");
|
||||
echo("\n");
|
||||
echo("\n");
|
||||
echo "| QuestCode | Total modified elements |\n";
|
||||
echo "| ------------- |-------------|\n";
|
||||
foreach ($popularity as $quest_identifier => $total_edits) {
|
||||
if ($total_edits >= 4000) {
|
||||
echo "| $quest_identifier | ". (int)($total_edits/1000) . "k |\n";
|
||||
} else {
|
||||
echo "| $quest_identifier | $total_edits |\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Unset the file to call __destruct(), closing the file handle.
|
||||
$file = null;
|
||||
fclose($outputFile);
|
||||
}
|
||||
|
||||
function register_popularity($dict, $index, $number) {
|
||||
if (isset($dict[$index])) {
|
||||
$dict[$index] += $number;
|
||||
} else {
|
||||
$dict[$index] = $number;
|
||||
}
|
||||
return $dict;
|
||||
}
|
||||
|
||||
main($argv[1], $argv[1])
|
||||
?>
|
268
compare_atp_to_recent_osm.py
Normal file
268
compare_atp_to_recent_osm.py
Normal file
|
@ -0,0 +1,268 @@
|
|||
import csv
|
||||
from collections import deque
|
||||
import os
|
||||
import requests
|
||||
import json
|
||||
import diskcache
|
||||
import rich
|
||||
from osm_easy_api.api import Api
|
||||
from osm_easy_api.data_classes import Node, Way, Relation, Changeset, OsmChange, Action, Tags
|
||||
from osm_easy_api.api.endpoints import Elements_Container
|
||||
import sqlite3
|
||||
import sqlite_test
|
||||
import osm_bot_abstraction_layer.util_download_file
|
||||
import serializing
|
||||
import bz2
|
||||
|
||||
obtain_osm_data = __import__("1_obtain_osm_data") # move torrent download code elsewhere?
|
||||
config = __import__("0_config")
|
||||
|
||||
CHANGESET_CACHE = diskcache.Cache(config.cache_folder() + "osm_changeset_cache", eviction_policy="none")
|
||||
HISTORY_CACHE = diskcache.Cache(config.cache_folder() + "osm_history_cache", eviction_policy="none")
|
||||
|
||||
def create_filtered_csv(input_filename, output_filename):
|
||||
total_lines = 0
|
||||
saved_lines = 0
|
||||
|
||||
with open(input_filename, mode='r', newline='', encoding='utf-8') as infile:
|
||||
with open(output_filename, mode='w', newline='', encoding='utf-8') as outfile:
|
||||
reader = csv.DictReader(infile)
|
||||
writer = csv.writer(outfile)
|
||||
|
||||
# Write header to output file
|
||||
writer.writerow(['changeset_id', 'editor', 'user_id'])
|
||||
|
||||
for row in reader:
|
||||
total_lines += 1
|
||||
if row['quest_type'] == 'AddOpeningHours':
|
||||
# Write the selected columns to the output file
|
||||
writer.writerow([
|
||||
row['changeset_id'],
|
||||
row['editor'],
|
||||
row['user_id']
|
||||
])
|
||||
saved_lines += 1
|
||||
|
||||
print(f"Processed {total_lines} lines in total")
|
||||
print(f"Saved {saved_lines} lines to {output_filename}")
|
||||
|
||||
def serialize_element_list(input):
|
||||
returned = []
|
||||
for entry in input:
|
||||
returned.append(entry.to_dict())
|
||||
return json.dumps(returned, default=str, indent=3)
|
||||
|
||||
def object_class_from_object_name(object_name):
|
||||
if object_name == "Node":
|
||||
return Node
|
||||
elif object_name == "Way":
|
||||
return Way
|
||||
elif object_name == "Relation":
|
||||
return Relation
|
||||
else:
|
||||
raise Exception("unexpected type " + object_name)
|
||||
|
||||
def deserialize_element_list(serialized):
|
||||
returned = []
|
||||
for entry in json.loads(serialized):
|
||||
object_class = object_class_from_object_name(entry['type'])
|
||||
returned.append(object_class.from_dict(entry))
|
||||
return returned
|
||||
|
||||
def elements_edited_by_changeset(api, changeset_id):
|
||||
if changeset_id in CHANGESET_CACHE:
|
||||
return deserialize_element_list(CHANGESET_CACHE[changeset_id])
|
||||
element_list = download_elements_edited_by_changeset(api, changeset_id)
|
||||
returned = serialize_element_list(element_list)
|
||||
CHANGESET_CACHE[changeset_id] = returned
|
||||
return element_list
|
||||
|
||||
def download_elements_edited_by_changeset(api, changeset_id):
|
||||
print("downloading changeset", changeset_id)
|
||||
element_list = []
|
||||
for action in api.changeset.download(changeset_id):
|
||||
if action[0] != Action.MODIFY and action[0] != Action.DELETE and action[0] != Action.CREATE:
|
||||
print("unexpected action type", action)
|
||||
raise
|
||||
element = action[1]
|
||||
element_list.append(element)
|
||||
#rich.print(action)
|
||||
return element_list
|
||||
|
||||
def history_info(api, object_type, object_id):
|
||||
identifier = object_type + "_" + str(object_id)
|
||||
if identifier in HISTORY_CACHE:
|
||||
return deserialize_element_list(HISTORY_CACHE[identifier])
|
||||
|
||||
osm_type = object_class_from_object_name(object_type)
|
||||
container = Elements_Container(api)
|
||||
|
||||
print("downloading history of", object_type, object_id)
|
||||
history = container.history(osm_type, object_id)
|
||||
serialized = serialize_element_list(history)
|
||||
HISTORY_CACHE[identifier] = serialized
|
||||
return deserialize_element_list(HISTORY_CACHE[identifier])
|
||||
|
||||
|
||||
def obtain_changeset_listing():
|
||||
"""
|
||||
# obtain https://github.com/matkoniecz/StreetComplete_usage_changeset_analysis/blob/master/streetcomplete_edits_generate_csv_and_make_quest_summary.php
|
||||
# obtain https://github.com/matkoniecz/StreetComplete_usage_changeset_analysis/blob/master/extracting_data_from_xml_line.php
|
||||
# check hashes
|
||||
# or just check in it?
|
||||
# with verification for actual source?
|
||||
|
||||
# find code to download changesets via torrent in python
|
||||
# is in 1_obtain_osm_data.py
|
||||
|
||||
if os.path.isdir(config.changeset_list_download_folder()) == False:
|
||||
os.makedirs(config.changeset_list_download_folder())
|
||||
|
||||
torrent_file_url = 'https://planet.osm.org/planet/changesets-latest.osm.bz2.torrent'
|
||||
torrent_file_name = 'changesets-latest.osm.bz2.torrent'
|
||||
torrent_file_directory = config.changeset_list_download_folder()
|
||||
file_download_folder = config.changeset_list_download_folder()
|
||||
obtain_osm_data.download_file_via_torrent(torrent_file_url, torrent_file_directory, torrent_file_name, file_download_folder)
|
||||
|
||||
# based on https://stackoverflow.com/a/16964073/4130619
|
||||
# https://docs.python.org/3/library/bz2.html has no such example
|
||||
|
||||
# TODO done?
|
||||
# it unpacks to something like changesets-250324.osm.bz2
|
||||
# so ideally uncompressed would be also like this
|
||||
"""
|
||||
# remove commenting out above
|
||||
# remove that line below
|
||||
file_download_folder = config.changeset_list_download_folder()
|
||||
command = 'php changeset_parser_streetcomplete_edits_generate_csv_and_make_quest_summary.php "' + decompressed_filepath + '" "' + csv_with_streetcomplete_changesets() + '"'
|
||||
print(command)
|
||||
#remove up to here
|
||||
|
||||
file_with_bz2_changesets = None
|
||||
for file_name in os.listdir(file_download_folder):
|
||||
target = os.path.join(file_download_folder, file_name)
|
||||
if os.path.isfile(target):
|
||||
if target.endswith(".bz2"):
|
||||
if file_with_bz2_changesets == None or file_with_bz2_changesets < file_name:
|
||||
# relevant if multiple files become unpacked over time
|
||||
# in such case we want latest one
|
||||
file_with_bz2_changesets = file_name
|
||||
print("will unpack", file_with_bz2_changesets)
|
||||
filepath = os.path.join(file_download_folder, file_with_bz2_changesets)
|
||||
|
||||
decompressed_filepath = os.path.join(file_download_folder, file_with_bz2_changesets.replace('.bz2', ''))
|
||||
print("unpacking to", decompressed_filepath)
|
||||
with open(decompressed_filepath, 'wb') as new_file, bz2.BZ2File(filepath, 'rb') as file:
|
||||
for data in iter(lambda : file.read(100 * 1024), b''):
|
||||
new_file.write(data)
|
||||
|
||||
"""
|
||||
import bz2
|
||||
filepath = '/media/mateusz/OSM_cache/ATP_matcher_cache/openstreetmap_all_changeset_data/changesets-250324.osm.bz2'
|
||||
newfilepath = '/media/mateusz/OSM_cache/ATP_matcher_cache/openstreetmap_all_changeset_data/changesets-250324.osm'
|
||||
with open(newfilepath, 'wb') as new_file, open(filepath, 'rb') as file:
|
||||
decompressor = BZ2Decompressor()
|
||||
for data in iter(lambda : file.read(100 * 1024), b''):
|
||||
new_file.write(decompressor.decompress(data))
|
||||
|
||||
with open(newfilepath, 'wb') as new_file, bz2.BZ2File(filepath, 'rb') as file:
|
||||
for data in iter(lambda : file.read(100 * 1024), b''):
|
||||
new_file.write(data)"""
|
||||
|
||||
|
||||
# yay, now we also got PHP as dependency
|
||||
# TODO https://github.com/matkoniecz/StreetComplete_usage_changeset_analysis/blob/master/streetcomplete_edits_generate_csv_and_make_quest_summary.php - modify it so output.csv is also specified via parameter
|
||||
# /home/mateusz/Documents/install_moje/OSM_software/StreetComplete_usage_changeset_analysis
|
||||
# decide on where output goes
|
||||
# then pull this code into my repo
|
||||
command = 'php changeset_parser_streetcomplete_edits_generate_csv_and_make_quest_summary.php "' + decompressed_filepath + '" "' + csv_with_streetcomplete_changesets() + '"'
|
||||
print(command)
|
||||
os.system(command)
|
||||
|
||||
def csv_with_streetcomplete_changesets():
|
||||
return os.path.join(config.changeset_list_download_folder(), 'streetcomplete_changesets.csv')
|
||||
|
||||
def main():
|
||||
input_file = csv_with_streetcomplete_changesets()
|
||||
if os.path.isfile(input_file) == False:
|
||||
obtain_changeset_listing()
|
||||
|
||||
filtered_filename = os.path.join(config.changeset_list_download_folder(), 'streetcomplete_changesets_only_opening_hours.csv')
|
||||
filtering_success_filepath = '/media/mateusz/OSM_cache/changesets/filtering_marker.success'
|
||||
|
||||
if os.path.isfile(filtering_success_filepath) == False:
|
||||
create_filtered_csv(input_file, filtered_filename)
|
||||
with open(filtering_success_filepath, "w") as myfile:
|
||||
myfile.write("data prepared")
|
||||
|
||||
# Use a deque with maxlen set to store at most such number of lines
|
||||
recent_changesets = deque(maxlen=100_000)
|
||||
|
||||
with open(filtered_filename, mode='r', newline='', encoding='utf-8') as outfile:
|
||||
reader = csv.reader(outfile)
|
||||
next(reader) # Skip header
|
||||
|
||||
for line in reader:
|
||||
recent_changesets.append(line)
|
||||
|
||||
my_changesets = []
|
||||
other_changesets = []
|
||||
my_user_id = "1722488"
|
||||
for line in recent_changesets:
|
||||
changeset_id = line[0]
|
||||
editor_id = line[1]
|
||||
user_id = line[2]
|
||||
if user_id == my_user_id:
|
||||
my_changesets.append(changeset_id)
|
||||
else:
|
||||
other_changesets.append(changeset_id)
|
||||
|
||||
api = Api(url='https://openstreetmap.org')
|
||||
|
||||
#rich.print(history_info(api, "Node", 1))
|
||||
|
||||
# sqlite_test - that should be in serializing, probably TODO
|
||||
connection = sqlite3.connect(sqlite_test.database_filepath())
|
||||
cursor = connection.cursor()
|
||||
#sqlite_test.show_content_sample(cursor)
|
||||
print(sqlite_test.database_filepath())
|
||||
sqlite_test.load_data_if_database_is_empty(cursor)
|
||||
|
||||
for changeset_id in my_changesets + other_changesets:
|
||||
for element in elements_edited_by_changeset(api, changeset_id):
|
||||
if "opening_hours" in element.tags:
|
||||
if element.tags.get("opening_hours:signed") != "no":
|
||||
osm_url = "https://www.openstreetmap.org/" + element.__class__.__name__.lower() + "/" + str(element.id)
|
||||
cursor.execute("SELECT * FROM match_data WHERE osm_link = :osm_link ORDER BY match_distance ASC LIMIT 1000", {'osm_link': osm_url})
|
||||
returned = cursor.fetchall()
|
||||
if len(returned) == 0:
|
||||
pass
|
||||
elif len(returned) != 1:
|
||||
print("found", len(returned), "matches in database")
|
||||
for entry in returned:
|
||||
parsed = serializing.Match.data_from_database_constructor(entry)
|
||||
rich.print(parsed)
|
||||
rich.print(parsed.atp_tags)
|
||||
rich.print(parsed.osm_link)
|
||||
else:
|
||||
entry = returned[0]
|
||||
parsed = serializing.Match.data_from_database_constructor(entry)
|
||||
osm_opening_hours = element.tags["opening_hours"]
|
||||
atp_opening_hours = parsed.atp_tags.get(config.opening_hours_key())
|
||||
if osm_opening_hours == atp_opening_hours or atp_opening_hours == None:
|
||||
print()
|
||||
print()
|
||||
print(osm_url)
|
||||
rich.print("OSM", osm_opening_hours)
|
||||
rich.print("ATP", atp_opening_hours)
|
||||
else:
|
||||
print()
|
||||
print()
|
||||
print(changeset_id)
|
||||
rich.print(element)
|
||||
print(osm_url)
|
||||
rich.print("OSM", osm_opening_hours)
|
||||
rich.print("ATP", atp_opening_hours)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -11,3 +11,4 @@ python-dotenv
|
|||
libtorrent
|
||||
regex
|
||||
simple_cache
|
||||
osm_easy_api
|
||||
|
|
|
@ -2,6 +2,7 @@ import base64
|
|||
import json
|
||||
import csv
|
||||
import shared
|
||||
import rich
|
||||
|
||||
class Match:
|
||||
def __init__(self, atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches):
|
||||
|
@ -22,6 +23,73 @@ class Match:
|
|||
def link_to_point_in_osm(self):
|
||||
return shared.link_to_point_in_osm(self.osm_match_center['lat'], self.osm_match_center['lon'])
|
||||
|
||||
def insert_into_sqlite_database(self, cursor):
|
||||
osm_match_center_lat = None
|
||||
osm_match_center_lon = None
|
||||
if self.osm_match_center != None:
|
||||
osm_match_center_lat = self.osm_match_center['lat']
|
||||
osm_match_center_lon = self.osm_match_center['lon']
|
||||
cursor.execute("INSERT INTO match_data VALUES (:atp_center_lat, :atp_center_lon, :atp_tags, :osm_match_center_lat, :osm_match_center_lon, :osm_match_tags, :osm_link, :match_distance, :all_very_good_matches)",
|
||||
{
|
||||
"atp_center_lat": self.atp_center['lat'],
|
||||
"atp_center_lon": self.atp_center['lon'],
|
||||
"atp_tags": json.dumps(self.atp_tags),
|
||||
"osm_match_center_lat": osm_match_center_lat,
|
||||
"osm_match_center_lon": osm_match_center_lon,
|
||||
"osm_match_tags": json.dumps(self.osm_match_tags),
|
||||
"osm_link": self.osm_link,
|
||||
"match_distance": self.match_distance,
|
||||
"all_very_good_matches": json.dumps(self.all_very_good_matches),
|
||||
}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def data_from_database_constructor(data):
|
||||
atp_center_lat=data[0]
|
||||
atp_center_lon=data[1]
|
||||
atp_center = {'lat': atp_center_lat, 'lon': atp_center_lon}
|
||||
atp_tags=json.loads(data[2])
|
||||
osm_match_center_lat=data[3]
|
||||
osm_match_center_lon=data[4]
|
||||
osm_match_center = {'lat': osm_match_center_lat, 'lon': osm_match_center_lon}
|
||||
osm_match_tags=json.loads(data[5])
|
||||
osm_link=data[6]
|
||||
match_distance=data[7]
|
||||
all_very_good_matches=json.loads(data[8])
|
||||
return Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
|
||||
|
||||
@staticmethod
|
||||
def create_table_if_needed(cursor):
|
||||
if "match_data" in Match.existing_tables(cursor):
|
||||
print("osm_data table exists already, delete file with database to recreate")
|
||||
else:
|
||||
"""
|
||||
self.atp_center = atp_center
|
||||
self.atp_tags = atp_tags
|
||||
self.osm_match_center = osm_match_center
|
||||
self.osm_match_tags = osm_match_tags
|
||||
self.osm_link = osm_link
|
||||
self.match_distance = match_distance
|
||||
self.all_very_good_matches = all_very_good_matches
|
||||
"""
|
||||
cursor.execute('''CREATE TABLE match_data
|
||||
(atp_center_lat float, atp_center_lon float, atp_tags text, osm_match_center_lat float, osm_match_center_lon float, osm_match_tags text, osm_link text, match_distance float, all_very_good_matches text)''')
|
||||
|
||||
# magnificent speedup
|
||||
#cursor.execute("""CREATE INDEX idx_osm_data_area_identifier ON osm_data (area_identifier);""")
|
||||
#cursor.execute("""CREATE INDEX idx_osm_data_id_type ON osm_data (id, type);""")
|
||||
#cursor.execute("""CREATE INDEX idx_error_id ON osm_data (error_id);""")
|
||||
#cursor.execute("""CREATE INDEX idx_download_timestamp ON osm_data (download_timestamp);""")
|
||||
|
||||
@staticmethod
|
||||
def existing_tables(cursor):
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
||||
table_listing = cursor.fetchall()
|
||||
returned = []
|
||||
for entry in table_listing:
|
||||
returned.append(entry[0])
|
||||
return returned
|
||||
|
||||
def save_list_of_matches_to_csv(filepath, data):
|
||||
with open(filepath, 'w', newline='') as f:
|
||||
writer = csv.writer(f)
|
||||
|
|
48
sqlite_test.py
Normal file
48
sqlite_test.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
import sqlite3
|
||||
import serializing
|
||||
import data_iterator
|
||||
graticule_report = __import__("5_generate_graticule_reports")
|
||||
|
||||
def load_data_if_database_is_empty(cursor):
|
||||
serializing.Match.create_table_if_needed(cursor)
|
||||
# TODO: load from entire area, not only miniscule data from Kraków (change also database_filepath()) - maybe change both now to take from one function?
|
||||
# TODO: add indexes (search 'magnificent speedup')
|
||||
cursor.execute("SELECT * FROM match_data ORDER BY match_distance ASC LIMIT 1000")
|
||||
returned = cursor.fetchall()
|
||||
print(len(returned), "in database")
|
||||
if len(returned) == 0:
|
||||
print("database is empty, inserting entries")
|
||||
#dummy = serializing.Match(atp_center={'lat': 0, 'lon': 0}, atp_tags={'a': 'b'}, osm_match_center={'lat': 0, 'lon': 0}, osm_match_tags={'v': 'd'}, osm_link='https://osm/org', match_distance=10, all_very_good_matches="all_very_good_matches")
|
||||
#dummy.insert_into_sqlite_database(cursor)
|
||||
area = {'min_lat': 50, 'min_lon': 20, 'max_lat': 51, 'max_lon': 21} # Kraków
|
||||
for entry in data_iterator.iterate_over_all_matches(area):
|
||||
entry.insert_into_sqlite_database(cursor)
|
||||
|
||||
def show_content_sample(cursor):
|
||||
cursor.execute(
|
||||
"SELECT * FROM match_data WHERE match_distance <= :max_allowed_distance ORDER BY match_distance DESC LIMIT 1000",
|
||||
{"max_allowed_distance": 100})
|
||||
returned = cursor.fetchall()
|
||||
print(len(returned), "in database")
|
||||
for entry in returned:
|
||||
print(entry)
|
||||
if len(returned) == 0:
|
||||
print("empty :(")
|
||||
|
||||
def main():
|
||||
connection = sqlite3.connect(database_filepath())
|
||||
cursor = connection.cursor()
|
||||
|
||||
print(database_filepath())
|
||||
load_data_if_database_is_empty(cursor)
|
||||
show_content_sample(cursor)
|
||||
connection.commit()
|
||||
connection.close()
|
||||
|
||||
def database_filepath():
|
||||
area = graticule_report.global_graticule_coverage()
|
||||
area = {'min_lat': 50, 'min_lon': 20, 'max_lat': 51, 'max_lon': 21} # Kraków
|
||||
return graticule_report.graticule_cache(area) + "test_database.db"
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Reference in a new issue