WIP from dying laptop, OSM-ATP comparison for recently edited OHs

2025-05-13 05:03:09 +02:00 · 2025-04-07 11:24:42 +02:00 · 2025-04-07 11:24:42 +02:00 · 481fc44b9c
commit 481fc44b9c
parent 854a8c3002
8 changed files with 602 additions and 12 deletions
--- a/0_config.py
+++ b/0_config.py
@ -529,6 +529,10 @@ def planet_download_folder():
    return cache_folder() + "planet_data/"


+def changeset_list_download_folder():
+    return cache_folder() + "openstreetmap_all_changeset_data/"
+
+
 def output_folder():
    return os.getenv("OSM_ATM_MATCHER_OUTPUT_FOLDER") + "/"

--- a/1_obtain_osm_data.py
+++ b/1_obtain_osm_data.py
@ -6,21 +6,16 @@ import os
 import osm_bot_abstraction_layer.util_download_file
 config = __import__("0_config")

-def main():
-    print(datetime.now().isoformat(timespec='minutes'))
-
+def download_file_via_torrent(torrent_file_url, torrent_file_directory, torrent_file_name, file_download_folder):
    ses = libtorrent.session({'listen_interfaces': '0.0.0.0:6881'})

-    torrent_file_directory = config.cache_folder()
-    filename = 'planet-latest.osm.pbf.torrent'
-    if os.path.isfile(torrent_file_directory + filename):
-        os.remove(torrent_file_directory + filename)
-    print(torrent_file_directory + filename)
-    url = 'https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf.torrent'
-    osm_bot_abstraction_layer.util_download_file.download_file_if_not_present_already(url, torrent_file_directory, filename)
+    if os.path.isfile(torrent_file_directory + torrent_file_name):
+        os.remove(torrent_file_directory + torrent_file_name)
+    print(torrent_file_directory + torrent_file_name)
+    osm_bot_abstraction_layer.util_download_file.download_file_if_not_present_already(torrent_file_url, torrent_file_directory, torrent_file_name)

-    info = libtorrent.torrent_info(torrent_file_directory + filename)
-    h = ses.add_torrent({'ti': info, 'save_path': config.planet_download_folder()})
+    info = libtorrent.torrent_info(torrent_file_directory + torrent_file_name)
+    h = ses.add_torrent({'ti': info, 'save_path': file_download_folder})
    s = h.status()
    print('starting', s.name)

@ -41,6 +36,15 @@ def main():
        time.sleep(1)

    print(h.status().name, 'complete')
+
+def main():
+    print(datetime.now().isoformat(timespec='minutes'))
+    download_file_via_torrent(
+        torrent_file_url='https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf.torrent',
+        torrent_file_directory=config.cache_folder(),
+        torrent_file_name='planet-latest.osm.pbf.torrent',
+        file_download_folder=config.planet_download_folder()
+        )
    print(datetime.now().isoformat(timespec='minutes'))

 if __name__ == "__main__":
--- a/changeset_parser_extracting_data_from_xml_line.php
+++ b/changeset_parser_extracting_data_from_xml_line.php
@ -0,0 +1,85 @@
+<?php
+// obtained from https://github.com/matkoniecz/StreetComplete_usage_changeset_analysis
+
+// assumptions
+// changesets are formatted as follows:
+// either (1)
+// line begins with 
+// "<changeset" and ends with "/>"
+// and it is a changeset without tags
+// or (2)
+// line begins with '<changeset' and ends with '">'
+// tags, one in each line follows
+// ends with line including '</changeset>' as sole nonwhitespace text
+// applies
+
+function value_of_key($line, $tag) {
+    $left_stripped = str_replace("<tag k=\"" . $tag . "\" v=\"", "", $line);
+    return str_replace('"/>', '', $left_stripped);
+}
+
+function quest_tag_to_identifier($line) {
+    return value_of_key($line, "StreetComplete:quest_type");
+}
+
+function created_by_tag_to_identifier($line) {
+    return value_of_key($line, "created_by");
+}
+
+// from https://www.php.net/manual/en/function.substr-compare.php
+function str_begins($haystack, $needle) {
+    return 0 === substr_compare($haystack, $needle, 0, strlen($needle));
+}
+  
+function str_ends($haystack,  $needle) {
+    return 0 === substr_compare($haystack, $needle, -strlen($needle));
+}
+
+function contains_substr($mainStr, $str, $loc = false) {
+    if ($loc === false) return (strpos($mainStr, $str) !== false);
+    if (strlen($mainStr) < strlen($str)) return false;
+    if (($loc + strlen($str)) > strlen($mainStr)) return false;
+    return (strcmp(substr($mainStr, $loc, strlen($str)), $str) == 0);
+}
+
+function get_changes_number($changeset_header) {
+    if (preg_match("/num_changes=\"([0-9]+)\"/", $changeset_header, $matches)) {
+        return (int)$matches[1];
+    } else {
+        return 0;
+    }
+}
+
+function get_quest_type($changeset_header) {
+    if (preg_match("/v=\"([^\"]+)\"/", $changeset_header, $matches)) {
+        return $matches[1];
+    } else {
+        return NULL;
+    }
+}
+
+function get_changeset_id($changeset_header) {
+    if (preg_match("/ id=\"([0-9]+)\"/", $changeset_header, $matches)) {
+        return (int)$matches[1];
+    } else {
+        return -1;
+    }
+}
+
+function get_uid($changeset_header) {
+    if (preg_match("/ uid=\"([0-9]+)\"/", $changeset_header, $matches)) {
+        return (int)$matches[1];
+    } else {
+        return -1;
+    }
+}
+
+function get_changeset_creation_date($changeset_header) {
+    if (preg_match("/ created_at=\"([^\"]+)\"/", $changeset_header, $matches)) {
+        return $matches[1];
+    } else {
+        return -1;
+    }
+}
+
+?>
--- a/changeset_parser_streetcomplete_edits_generate_csv_and_make_quest_summary.php
+++ b/changeset_parser_streetcomplete_edits_generate_csv_and_make_quest_summary.php
@ -0,0 +1,112 @@
+<?php
+// obtained from https://github.com/matkoniecz/StreetComplete_usage_changeset_analysis
+
+// special thanks to @Zverik for answering https://github.com/Zverik/editor-stats/issues/4 
+// without this I would not expect processing such data to be feasible (changeset planet file can be read line by line)!
+
+
+// for assumptions being made about file format, allowing to process it without
+// parsing it as an XML, see extracting_data_from_xml_line.php file
+
+require_once('changeset_parser_extracting_data_from_xml_line.php');
+
+function main($input_filepath, $output_filepath) {
+    $file = new SplFileObject($input_filepath);
+
+    $outputFile = fopen($output_filepath, "w") or die("Unable to open file!");
+    fwrite($outputFile, "changeset_id" . "," . "editor" . "," . "changed_objects" . "," . "quest_type" . "," . "user_id" . "\n");
+
+    $popularity = array();
+    // based on https://stackoverflow.com/questions/13246597/how-to-read-a-large-file-line-by-line
+    // Loop until we reach the end of the file.
+    while (!$file->eof()) {
+        $line = trim($file->fgets());
+        if ($line == "</changeset>") {
+            #echo $line;
+            #echo "end of a changeset with tags\n\n";
+            $changeset_header = NULL;
+        } elseif (str_begins($line, "<changeset")) {
+            if(str_ends($line, '">')) {
+                #echo $line;
+                $changeset_header = $line;
+                #echo "new changeset, with tags\n\n";
+            } else {
+                #echo $line;
+                #echo "new changeset, without tags\n\n";
+            }
+        } else {
+            if(str_begins($line, '<tag k="created_by"')) {
+                if(contains_substr($line, "StreetComplete") || contains_substr($line, "zażółć")) {
+                    #echo $changeset_header;
+                    #echo "\n";
+                    #echo $line;
+                    #echo "\n";
+                    #echo "created by tag\n";
+                }
+            } elseif (str_begins($line, '<tag k="StreetComplete:quest_type"') || str_begins($line, '<tag k="zażółć:quest_type"')) {
+                #echo $line;
+                #echo "\n";
+                #echo "quest type tag";
+                #echo get_changes_number($changeset_header);
+                #echo "\n";
+                if(str_begins($line, '<tag k="StreetComplete:quest_type"')){
+                    $editor = "StreetComplete";
+                } elseif(str_begins($line, '<tag k="zażółć:quest_type"')){
+                    $editor = "StreetComplete";
+                } else {
+                    $editor = "?";
+                }
+                $id = get_changeset_id($changeset_header);
+                $count = get_changes_number($changeset_header);
+                $type = get_quest_type($line);
+                $uid = get_uid($changeset_header);
+                fwrite($outputFile, $id . "," . $editor . "," . $count . "," . $type . "," . $uid . "\n");
+                $popularity = register_popularity($popularity, $type, get_changes_number($changeset_header));
+                #var_dump($popularity);
+                #echo "\n\n";
+            }
+        }
+    }
+
+    arsort($popularity);
+    foreach ($popularity as $quest_identifier => $total_edits) {
+        echo "$quest_identifier : $total_edits\n";
+    }
+
+    echo("\n");
+    echo("\n");
+    echo("\n");
+    echo "| QuestCode        | Total modified elements           |\n";
+    echo "| ------------- |-------------|\n";
+    foreach ($popularity as $quest_identifier => $total_edits) {
+        echo "| $quest_identifier | $total_edits |\n";
+    }
+    echo("\n");
+    echo("\n");
+    echo("\n");
+    echo "| QuestCode        | Total modified elements           |\n";
+    echo "| ------------- |-------------|\n";
+    foreach ($popularity as $quest_identifier => $total_edits) {
+        if ($total_edits >= 4000) {
+            echo "| $quest_identifier | ". (int)($total_edits/1000) . "k |\n";
+        } else {
+            echo "| $quest_identifier | $total_edits |\n";
+        }
+    }
+
+    // Unset the file to call __destruct(), closing the file handle.
+    $file = null; 
+    fclose($outputFile);
+}
+
+function register_popularity($dict, $index, $number) {
+    if (isset($dict[$index])) {
+        $dict[$index] += $number;
+    } else {
+        $dict[$index] = $number;
+    }
+    return $dict;
+}
+
+main($argv[1], $argv[1])
+?>
--- a/compare_atp_to_recent_osm.py
+++ b/compare_atp_to_recent_osm.py
@ -0,0 +1,268 @@
+import csv
+from collections import deque
+import os
+import requests
+import json
+import diskcache
+import rich
+from osm_easy_api.api import Api
+from osm_easy_api.data_classes import Node, Way, Relation, Changeset, OsmChange, Action, Tags
+from osm_easy_api.api.endpoints import Elements_Container
+import sqlite3
+import sqlite_test
+import osm_bot_abstraction_layer.util_download_file
+import serializing
+import bz2
+
+obtain_osm_data = __import__("1_obtain_osm_data") # move torrent download code elsewhere?
+config = __import__("0_config")
+
+CHANGESET_CACHE = diskcache.Cache(config.cache_folder() + "osm_changeset_cache", eviction_policy="none")
+HISTORY_CACHE = diskcache.Cache(config.cache_folder() + "osm_history_cache", eviction_policy="none")
+
+def create_filtered_csv(input_filename, output_filename):
+    total_lines = 0
+    saved_lines = 0
+    
+    with open(input_filename, mode='r', newline='', encoding='utf-8') as infile:
+        with open(output_filename, mode='w', newline='', encoding='utf-8') as outfile:        
+            reader = csv.DictReader(infile)
+            writer = csv.writer(outfile)
+            
+            # Write header to output file
+            writer.writerow(['changeset_id', 'editor', 'user_id'])
+            
+            for row in reader:
+                total_lines += 1
+                if row['quest_type'] == 'AddOpeningHours':
+                    # Write the selected columns to the output file
+                    writer.writerow([
+                        row['changeset_id'],
+                        row['editor'],
+                        row['user_id']
+                    ])
+                    saved_lines += 1
+    
+    print(f"Processed {total_lines} lines in total")
+    print(f"Saved {saved_lines} lines to {output_filename}")
+    
+def serialize_element_list(input):
+    returned = []
+    for entry in input:
+        returned.append(entry.to_dict())
+    return json.dumps(returned, default=str, indent=3)
+
+def object_class_from_object_name(object_name):
+    if object_name == "Node":
+        return Node
+    elif object_name == "Way":
+        return Way
+    elif object_name == "Relation":
+        return Relation
+    else:
+        raise Exception("unexpected type " + object_name)
+
+def deserialize_element_list(serialized):
+    returned = []
+    for entry in json.loads(serialized):
+        object_class = object_class_from_object_name(entry['type'])
+        returned.append(object_class.from_dict(entry))
+    return returned
+
+def elements_edited_by_changeset(api, changeset_id):
+    if changeset_id in CHANGESET_CACHE:
+        return deserialize_element_list(CHANGESET_CACHE[changeset_id])
+    element_list = download_elements_edited_by_changeset(api, changeset_id)
+    returned = serialize_element_list(element_list)
+    CHANGESET_CACHE[changeset_id] = returned
+    return element_list
+
+def download_elements_edited_by_changeset(api, changeset_id):
+    print("downloading changeset", changeset_id)
+    element_list = []
+    for action in api.changeset.download(changeset_id):
+        if action[0] != Action.MODIFY and action[0] != Action.DELETE and action[0] != Action.CREATE:
+            print("unexpected action type", action)
+            raise
+        element = action[1]
+        element_list.append(element)
+        #rich.print(action)
+    return element_list
+
+def history_info(api, object_type, object_id):
+    identifier = object_type + "_" + str(object_id)
+    if identifier in HISTORY_CACHE:
+        return deserialize_element_list(HISTORY_CACHE[identifier])
+
+    osm_type = object_class_from_object_name(object_type)
+    container = Elements_Container(api)
+
+    print("downloading history of", object_type, object_id)
+    history = container.history(osm_type, object_id)
+    serialized = serialize_element_list(history)
+    HISTORY_CACHE[identifier] = serialized
+    return deserialize_element_list(HISTORY_CACHE[identifier])
+
+
+def obtain_changeset_listing():
+    """
+    # obtain https://github.com/matkoniecz/StreetComplete_usage_changeset_analysis/blob/master/streetcomplete_edits_generate_csv_and_make_quest_summary.php
+    # obtain https://github.com/matkoniecz/StreetComplete_usage_changeset_analysis/blob/master/extracting_data_from_xml_line.php
+    # check hashes
+    # or just check in it?
+    # with verification for actual source?
+    
+    # find code to download changesets via torrent in python
+    # is in 1_obtain_osm_data.py
+
+    if os.path.isdir(config.changeset_list_download_folder()) == False:
+        os.makedirs(config.changeset_list_download_folder())
+
+    torrent_file_url = 'https://planet.osm.org/planet/changesets-latest.osm.bz2.torrent'
+    torrent_file_name = 'changesets-latest.osm.bz2.torrent'
+    torrent_file_directory = config.changeset_list_download_folder()
+    file_download_folder = config.changeset_list_download_folder()
+    obtain_osm_data.download_file_via_torrent(torrent_file_url, torrent_file_directory, torrent_file_name, file_download_folder)
+
+    # based on https://stackoverflow.com/a/16964073/4130619
+    # https://docs.python.org/3/library/bz2.html has no such example
+
+    # TODO done?
+    # it unpacks to something like changesets-250324.osm.bz2
+    # so ideally uncompressed would be also like this
+    """
+    # remove commenting out above
+    # remove that line below
+    file_download_folder = config.changeset_list_download_folder()
+    command = 'php changeset_parser_streetcomplete_edits_generate_csv_and_make_quest_summary.php "' + decompressed_filepath + '" "' + csv_with_streetcomplete_changesets() + '"'
+    print(command)
+    #remove up to here
+
+    file_with_bz2_changesets = None
+    for file_name in os.listdir(file_download_folder):
+        target = os.path.join(file_download_folder, file_name)
+        if os.path.isfile(target):
+            if target.endswith(".bz2"):
+                if file_with_bz2_changesets == None or file_with_bz2_changesets < file_name:
+                    # relevant if multiple files become unpacked over time
+                    # in such case we want latest one
+                    file_with_bz2_changesets = file_name
+    print("will unpack", file_with_bz2_changesets)
+    filepath = os.path.join(file_download_folder, file_with_bz2_changesets)
+
+    decompressed_filepath = os.path.join(file_download_folder, file_with_bz2_changesets.replace('.bz2', ''))
+    print("unpacking to", decompressed_filepath)
+    with open(decompressed_filepath, 'wb') as new_file, bz2.BZ2File(filepath, 'rb') as file:
+        for data in iter(lambda : file.read(100 * 1024), b''):
+            new_file.write(data)
+
+    """
+    import bz2
+    filepath = '/media/mateusz/OSM_cache/ATP_matcher_cache/openstreetmap_all_changeset_data/changesets-250324.osm.bz2'
+    newfilepath = '/media/mateusz/OSM_cache/ATP_matcher_cache/openstreetmap_all_changeset_data/changesets-250324.osm'
+    with open(newfilepath, 'wb') as new_file, open(filepath, 'rb') as file:
+        decompressor = BZ2Decompressor()
+        for data in iter(lambda : file.read(100 * 1024), b''):
+            new_file.write(decompressor.decompress(data))
+
+    with open(newfilepath, 'wb') as new_file, bz2.BZ2File(filepath, 'rb') as file:
+        for data in iter(lambda : file.read(100 * 1024), b''):
+            new_file.write(data)"""
+
+
+    # yay, now we also got PHP as dependency
+    # TODO https://github.com/matkoniecz/StreetComplete_usage_changeset_analysis/blob/master/streetcomplete_edits_generate_csv_and_make_quest_summary.php - modify it so output.csv is also specified via parameter
+    # /home/mateusz/Documents/install_moje/OSM_software/StreetComplete_usage_changeset_analysis
+    # decide on where output goes
+    # then pull this code into my repo
+    command = 'php changeset_parser_streetcomplete_edits_generate_csv_and_make_quest_summary.php "' + decompressed_filepath + '" "' + csv_with_streetcomplete_changesets() + '"'
+    print(command)
+    os.system(command)
+
+def csv_with_streetcomplete_changesets():
+    return os.path.join(config.changeset_list_download_folder(), 'streetcomplete_changesets.csv')
+
+def main():
+    input_file = csv_with_streetcomplete_changesets()
+    if os.path.isfile(input_file) == False:
+        obtain_changeset_listing()
+
+    filtered_filename = os.path.join(config.changeset_list_download_folder(), 'streetcomplete_changesets_only_opening_hours.csv')
+    filtering_success_filepath = '/media/mateusz/OSM_cache/changesets/filtering_marker.success'
+
+    if os.path.isfile(filtering_success_filepath) == False:
+        create_filtered_csv(input_file, filtered_filename)
+        with open(filtering_success_filepath, "w") as myfile:
+            myfile.write("data prepared")
+
+    # Use a deque with maxlen set to store at most such number of lines 
+    recent_changesets = deque(maxlen=100_000)
+    
+    with open(filtered_filename, mode='r', newline='', encoding='utf-8') as outfile:
+        reader = csv.reader(outfile)
+        next(reader)  # Skip header
+        
+        for line in reader:
+            recent_changesets.append(line)
+    
+    my_changesets = []
+    other_changesets = []
+    my_user_id = "1722488"
+    for line in recent_changesets:
+        changeset_id = line[0]
+        editor_id = line[1]
+        user_id = line[2]
+        if user_id == my_user_id:
+            my_changesets.append(changeset_id)
+        else:
+            other_changesets.append(changeset_id)
+
+    api = Api(url='https://openstreetmap.org')
+
+    #rich.print(history_info(api, "Node", 1))
+
+    # sqlite_test - that should be in serializing, probably TODO
+    connection = sqlite3.connect(sqlite_test.database_filepath())
+    cursor = connection.cursor()
+    #sqlite_test.show_content_sample(cursor)
+    print(sqlite_test.database_filepath())
+    sqlite_test.load_data_if_database_is_empty(cursor)
+
+    for changeset_id in my_changesets + other_changesets:
+        for element in elements_edited_by_changeset(api, changeset_id):
+            if "opening_hours" in element.tags:
+                if element.tags.get("opening_hours:signed") != "no":
+                    osm_url = "https://www.openstreetmap.org/" + element.__class__.__name__.lower() + "/" + str(element.id)
+                    cursor.execute("SELECT * FROM match_data WHERE osm_link = :osm_link ORDER BY match_distance ASC LIMIT 1000", {'osm_link': osm_url})
+                    returned = cursor.fetchall()
+                    if len(returned) == 0:
+                        pass
+                    elif len(returned) != 1:
+                        print("found", len(returned), "matches in database")
+                        for entry in returned:
+                            parsed = serializing.Match.data_from_database_constructor(entry)
+                            rich.print(parsed)
+                            rich.print(parsed.atp_tags)
+                            rich.print(parsed.osm_link)
+                    else:
+                        entry = returned[0]
+                        parsed = serializing.Match.data_from_database_constructor(entry)
+                        osm_opening_hours = element.tags["opening_hours"]
+                        atp_opening_hours = parsed.atp_tags.get(config.opening_hours_key())
+                        if osm_opening_hours == atp_opening_hours or atp_opening_hours == None:
+                            print()
+                            print()
+                            print(osm_url)
+                            rich.print("OSM", osm_opening_hours)
+                            rich.print("ATP", atp_opening_hours)
+                        else:
+                            print()
+                            print()
+                            print(changeset_id)
+                            rich.print(element)
+                            print(osm_url)
+                            rich.print("OSM", osm_opening_hours)
+                            rich.print("ATP", atp_opening_hours)
+
+if __name__ == "__main__":
+    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -11,3 +11,4 @@ python-dotenv
 libtorrent
 regex
 simple_cache
+osm_easy_api
--- a/serializing.py
+++ b/serializing.py
@ -2,6 +2,7 @@ import base64
 import json
 import csv
 import shared
+import rich

 class Match:
    def __init__(self, atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches):
@ -22,6 +23,73 @@ class Match:
    def link_to_point_in_osm(self):
        return shared.link_to_point_in_osm(self.osm_match_center['lat'], self.osm_match_center['lon'])

+    def insert_into_sqlite_database(self, cursor):
+        osm_match_center_lat = None
+        osm_match_center_lon = None
+        if self.osm_match_center != None:
+            osm_match_center_lat = self.osm_match_center['lat']
+            osm_match_center_lon = self.osm_match_center['lon']
+        cursor.execute("INSERT INTO match_data VALUES (:atp_center_lat, :atp_center_lon, :atp_tags, :osm_match_center_lat, :osm_match_center_lon, :osm_match_tags, :osm_link, :match_distance, :all_very_good_matches)",
+                {
+                "atp_center_lat": self.atp_center['lat'],
+                "atp_center_lon": self.atp_center['lon'],
+                "atp_tags": json.dumps(self.atp_tags),
+                "osm_match_center_lat": osm_match_center_lat,
+                "osm_match_center_lon": osm_match_center_lon,
+                "osm_match_tags": json.dumps(self.osm_match_tags),
+                "osm_link": self.osm_link,
+                "match_distance": self.match_distance,
+                "all_very_good_matches": json.dumps(self.all_very_good_matches),
+                }
+                )
+
+    @staticmethod
+    def data_from_database_constructor(data):
+        atp_center_lat=data[0]
+        atp_center_lon=data[1]
+        atp_center = {'lat': atp_center_lat, 'lon': atp_center_lon}
+        atp_tags=json.loads(data[2])
+        osm_match_center_lat=data[3]
+        osm_match_center_lon=data[4]
+        osm_match_center = {'lat': osm_match_center_lat, 'lon': osm_match_center_lon}
+        osm_match_tags=json.loads(data[5])
+        osm_link=data[6]
+        match_distance=data[7]
+        all_very_good_matches=json.loads(data[8])
+        return Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
+
+    @staticmethod
+    def create_table_if_needed(cursor):
+        if "match_data" in Match.existing_tables(cursor):
+            print("osm_data table exists already, delete file with database to recreate")
+        else:
+            """
+            self.atp_center = atp_center
+            self.atp_tags = atp_tags
+            self.osm_match_center = osm_match_center
+            self.osm_match_tags = osm_match_tags
+            self.osm_link = osm_link
+            self.match_distance = match_distance
+            self.all_very_good_matches = all_very_good_matches
+            """
+            cursor.execute('''CREATE TABLE match_data
+                        (atp_center_lat float, atp_center_lon float, atp_tags text, osm_match_center_lat float, osm_match_center_lon float, osm_match_tags text, osm_link text, match_distance float, all_very_good_matches text)''')
+
+            # magnificent speedup
+            #cursor.execute("""CREATE INDEX idx_osm_data_area_identifier ON osm_data (area_identifier);""")
+            #cursor.execute("""CREATE INDEX idx_osm_data_id_type ON osm_data (id, type);""")
+            #cursor.execute("""CREATE INDEX idx_error_id ON osm_data (error_id);""")
+            #cursor.execute("""CREATE INDEX idx_download_timestamp ON osm_data (download_timestamp);""")
+
+    @staticmethod
+    def existing_tables(cursor):
+        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+        table_listing = cursor.fetchall()
+        returned = []
+        for entry in table_listing:
+            returned.append(entry[0])
+        return returned
+
 def save_list_of_matches_to_csv(filepath, data):
    with open(filepath, 'w', newline='') as f:
        writer = csv.writer(f)
--- a/sqlite_test.py
+++ b/sqlite_test.py
@ -0,0 +1,48 @@
+import sqlite3
+import serializing
+import data_iterator
+graticule_report = __import__("5_generate_graticule_reports")
+
+def load_data_if_database_is_empty(cursor):
+    serializing.Match.create_table_if_needed(cursor)
+    # TODO: load from entire area, not only miniscule data from Kraków (change also database_filepath()) - maybe change both now to take from one function?
+    # TODO: add indexes (search 'magnificent speedup')
+    cursor.execute("SELECT * FROM match_data ORDER BY match_distance ASC LIMIT 1000")
+    returned = cursor.fetchall()
+    print(len(returned), "in database")
+    if len(returned) == 0:
+        print("database is empty, inserting entries")
+        #dummy = serializing.Match(atp_center={'lat': 0, 'lon': 0}, atp_tags={'a': 'b'}, osm_match_center={'lat': 0, 'lon': 0}, osm_match_tags={'v': 'd'}, osm_link='https://osm/org', match_distance=10, all_very_good_matches="all_very_good_matches")
+        #dummy.insert_into_sqlite_database(cursor)
+        area = {'min_lat': 50, 'min_lon': 20, 'max_lat': 51, 'max_lon': 21} # Kraków
+        for entry in data_iterator.iterate_over_all_matches(area):
+            entry.insert_into_sqlite_database(cursor)
+
+def show_content_sample(cursor):
+    cursor.execute(
+        "SELECT * FROM match_data WHERE match_distance <= :max_allowed_distance ORDER BY match_distance DESC LIMIT 1000",
+        {"max_allowed_distance": 100})
+    returned = cursor.fetchall()
+    print(len(returned), "in database")
+    for entry in returned:
+        print(entry)
+    if len(returned) == 0:
+        print("empty :(")
+
+def main():
+    connection = sqlite3.connect(database_filepath())
+    cursor = connection.cursor()
+
+    print(database_filepath())
+    load_data_if_database_is_empty(cursor)
+    show_content_sample(cursor)
+    connection.commit()
+    connection.close()
+
+def database_filepath():
+    area = graticule_report.global_graticule_coverage()
+    area = {'min_lat': 50, 'min_lon': 20, 'max_lat': 51, 'max_lon': 21} # Kraków
+    return graticule_report.graticule_cache(area) + "test_database.db"
+
+if __name__ == "__main__":
+    main()