autopep8 --in-place --max-line-length=420 --recursive .

2025-04-11 10:09:29 +02:00 · 2024-08-14 10:17:26 +02:00 · 2024-08-14 10:17:26 +02:00 · 6e5070861f
commit 6e5070861f
parent 9e9f5af2b0
24 changed files with 284 additions and 163 deletions
--- a/0_config.py
+++ b/0_config.py
--- a/1_obtain_osm_data.py
+++ b/1_obtain_osm_data.py
@ -5,6 +5,7 @@ import shops
 import time
 config = __import__("0_config")

+
 def main():
    directory_path = config.cache_folder()

@ -22,10 +23,11 @@ def main():
            print(region, "- list_shops - started")
            start = time.time()
            for entry in shops.osm.list_shops(region, directory_path):
-                pass # needed to trigger processing code
+                pass  # needed to trigger processing code
            print((time.time() - start) / 60, "minutes")
            print(region, "- list_shops - completed")
            processed.append(region)

+
 if __name__ == "__main__":
    main()
--- a/2_obtain_atp_data.py
+++ b/2_obtain_atp_data.py
@ -1,18 +1,20 @@
+import rich
+import osm_bot_abstraction_layer.util_download_file
+import osm_bot_abstraction_layer.tag_knowledge as tag_knowledge
 import json
 import os
 import requests
 config = __import__("0_config")
-import osm_bot_abstraction_layer.tag_knowledge as tag_knowledge
-import osm_bot_abstraction_layer.util_download_file
-import rich
 config = __import__("0_config")

+
 def processed_atp_codes():
    for area_name, area_data in config.processing_plan().items():
        if 'accepted' in area_data:
            for atp_code in area_data['accepted']:
                yield atp_code

+
 def main():
    response = requests.get("https://data.alltheplaces.xyz/runs/latest.json")
    todos = json.loads(response.text)
@ -21,14 +23,15 @@ def main():
    download_entire_atp_dataset(run_id)
    look_through_entire_atp_dataset()

+
 def do_not_remind_that_this_tagging_may_be_worth_supporting():
    notified_about_tag = {
        # should be fixed in ATP, if possible
        # TODO: raise isses at https://github.com/alltheplaces/alltheplaces/issues
        'tourism': ['yes', 'attraction'],
        'healthcare': [
-            'laboratory', # https://github.com/alltheplaces/alltheplaces/issues/8637
-            'centre', # not reported yet TODO
+            'laboratory',  # https://github.com/alltheplaces/alltheplaces/issues/8637
+            'centre',  # not reported yet TODO
        ],

        # TODO maybe start including them?
@ -48,11 +51,11 @@ def do_not_remind_that_this_tagging_may_be_worth_supporting():
        if key not in notified_about_tag:
            notified_about_tag[key] = []

-    #missing shoplike
+    # missing shoplike
    notified_about_tag['office'].append('yes')
    notified_about_tag['amenity'].append('canteen')

-    #kind also shoplike? I want to support them
+    # kind also shoplike? I want to support them
    notified_about_tag['man_made'].append('charge_point')
    notified_about_tag['amenity'].append('music_venue')
    notified_about_tag['amenity'].append('prep_school')
@ -117,9 +120,10 @@ def do_not_remind_that_this_tagging_may_be_worth_supporting():

    # seem hard to confirm by survey
    notified_about_tag['craft'].append('brewery')
-    notified_about_tag['amenity'].append('post_depot') # internal facility, not post_office
+    notified_about_tag['amenity'].append('post_depot')  # internal facility, not post_office
    return notified_about_tag

+
 def warn_about_broken_spider(atp_code, message):
    print()
    print()
@ -130,24 +134,27 @@ def warn_about_broken_spider(atp_code, message):
    print(url_log)
    print()

+
 def maybe_warn_about_spider_with_empty_file(atp_code):
    if config.is_empty_file_for_spider_worth_mentioning(atp_code):
        warn_about_broken_spider(atp_code, "empty output")

+
 def maybe_warn_about_spider_with_broken_file(atp_code):
    if config.is_broken_file_for_spider_worth_mentioning(atp_code):
        warn_about_broken_spider(atp_code, "broken output file")

+
 def warn_about_new_tags_that_are_neither_shoplike_nor_ignored(entry, item_path):
    notified_about_tag = do_not_remind_that_this_tagging_may_be_worth_supporting()
    for key in tag_knowledge.typical_main_keys():
        if key in entry['properties']:
            if ";" in entry['properties'][key]:
-                continue # TODO - is it safe to consider it as being unfinished?
+                continue  # TODO - is it safe to consider it as being unfinished?
            if key == 'healthcare' and entry['properties'].get("amenity") == entry['properties'].get("healthcare"):
                continue
            if tag_knowledge.is_shoplike({key: entry['properties'][key]}) == True:
-                break # handles cases where healthcare is extra tag in addition to proper amenity
+                break  # handles cases where healthcare is extra tag in addition to proper amenity
            if tag_knowledge.is_shoplike({key: entry['properties'][key]}) == False:
                if key not in notified_about_tag:
                    notified_about_tag[key] = []
@ -159,6 +166,7 @@ def warn_about_new_tags_that_are_neither_shoplike_nor_ignored(entry, item_path):
                print()
                notified_about_tag[key].append(entry['properties'][key])

+
 def all_spider_codes_iterator():
    directory_path_with_unpacked_spider_data = config.atp_unpacked_folder()
    # TODO: Is there match between spider codes and their filenames?
@ -180,6 +188,7 @@ def download_entire_atp_dataset(run_id):
        osm_bot_abstraction_layer.util_download_file.download_file_if_not_present_already(download_url, config.atp_cache_folder(), filename)
        os.system('unzip "' + config.atp_cache_folder() + filename + '" -d "' + config.atp_cache_folder() + '"')

+
 def look_through_entire_atp_dataset():
    candidates = {}
    for _area_name, area_data in config.processing_plan().items():
@ -200,10 +209,10 @@ def look_through_entire_atp_dataset():
                        continue
                    warn_about_new_tags_that_are_neither_shoplike_nor_ignored(entry, item_path)
                    if atp_code in [
-                        'hyatt', # https://github.com/alltheplaces/alltheplaces/issues/9399
-                        'maserati', # has many actually empty entries
-                        'skoda', # limitations of source data, unfixable by ATP
-                        'general_logistics_systems_de', # missing data to provide POI data, see https://github.com/alltheplaces/alltheplaces/commit/89f5511bacf24f2d6d0a1c2a183130c9148f772a
+                        'hyatt',  # https://github.com/alltheplaces/alltheplaces/issues/9399
+                        'maserati',  # has many actually empty entries
+                        'skoda',  # limitations of source data, unfixable by ATP
+                        'general_logistics_systems_de',  # missing data to provide POI data, see https://github.com/alltheplaces/alltheplaces/commit/89f5511bacf24f2d6d0a1c2a183130c9148f772a
                    ]:
                        break
                    if config.canonical_feature(entry['properties']) == "?":
@ -226,11 +235,12 @@ def look_through_entire_atp_dataset():
                                print()
                                print("candidate")
                                print(item_path)
-                                if atp_code not in candidates[area_data['country_code']]: # applies when one spider has many main keys
+                                if atp_code not in candidates[area_data['country_code']]:  # applies when one spider has many main keys
                                    candidates[area_data['country_code']].append(atp_code)
                                print(key, "=", entry['properties'][key])
                                print(candidates)

+
 def download(code, run_id):
    script_location = os.path.abspath(__file__)
    directory_path = config.cache_folder()
@ -238,5 +248,6 @@ def download(code, run_id):
    filename = code + ".atp.geojson"
    osm_bot_abstraction_layer.util_download_file.download_file_if_not_present_already(download_url, directory_path, filename)

+
 if __name__ == "__main__":
    main()
--- a/3_matcher.py
+++ b/3_matcher.py
@ -52,6 +52,7 @@ def is_matching_any_name_part_to_osm_tags(name_part_list, osm_tags):
                    return True
    return False

+
 def matching_name_part(part, namelike_value):
    part = part.lower()
    namelike_value = namelike_value.lower()
@ -63,6 +64,7 @@ def matching_name_part(part, namelike_value):
        else:
            return True

+
 def filter_with_fuzzy_name_match(osm_data, name_part_list):
    returned = []
    for osm in osm_data:
@ -70,6 +72,7 @@ def filter_with_fuzzy_name_match(osm_data, name_part_list):
            returned.append(osm)
    return returned

+
 def get_filter_names_from_atp_dataset(current_atp):
    filter_names = []
    for atp in current_atp:
@ -79,12 +82,13 @@ def get_filter_names_from_atp_dataset(current_atp):
            name_sources.append(short_name)
        for name in name_sources:
            for part in name.split():
-                if part.lower() in ["kebab", "kebap", "apteka", "cukiernia", "pizzeria", "na"]: # common and shared - automate detection?
+                if part.lower() in ["kebab", "kebap", "apteka", "cukiernia", "pizzeria", "na"]:  # common and shared - automate detection?
                    continue
                if part not in filter_names:
                    filter_names.append(part)
    return filter_names

+
 def run_match(osm_data, atp_code):
    output_file = "build_temporary_files/" + atp_code + '.csv'
    atp_data = load_atp(atp_code)
@ -93,11 +97,12 @@ def run_match(osm_data, atp_code):
        matches = get_matches(osm_data, atp_data)
    serializing.save_list_of_matches_to_csv(output_file, matches)

+
 def get_matches(osm_data, atp_data):
    match_list = []
    filter_names = get_filter_names_from_atp_dataset(atp_data)
-    #TODO: get also misspellings
-    #TODO: handle nearby objects with matching feature type or vacant ones
+    # TODO: get also misspellings
+    # TODO: handle nearby objects with matching feature type or vacant ones
    filtered_osm = filter_with_fuzzy_name_match(osm_data, filter_names)
    osm_index = spatial_index.SpatialIndex(filtered_osm)
    print("filtering", len(osm_data), "to", len(filtered_osm), "candidates based on names is done, now checking", len(atp_data), "ATP candidates by distance")
@ -145,6 +150,7 @@ def get_matches(osm_data, atp_data):
                raise
    return match_list

+
 def passed_filter(osm_data_tag_filter, tags):
    for key in osm_data_tag_filter.keys():
        if osm_data_tag_filter[key] == None:
@ -173,6 +179,7 @@ def load_atp(atp_code):
            # no need to report also here, so lets fail silently
            return []

+
 def load_atp_from_json(data, atp_code):
    returned = []
    for entry in data['features']:
--- a/4_show_data.py
+++ b/4_show_data.py
@ -18,6 +18,7 @@ import distance_distribution

 config = __import__("0_config")

+
 def get_free_space_in_mb(path):
    total, used, free = shutil.disk_usage(path)
    return free / 1024 / 1024
@ -54,6 +55,7 @@ def generate_report(cache_only):
    copy_data_for_publication(all_atp_codes)
    publish_data_on_internet()

+
 def generate_bot_edit_list_page():
    with open("output/bot_edit_plan_add_tags.html", 'w') as outfile:
        outfile.write(html_bot_edit_prefix())
@ -75,6 +77,7 @@ def generate_bot_edit_list_page():
                outfile.write('<li><a target="_blank" href="' + osm_link + '">' + osm_link + '</a>' + " <code>" + escape_html(key) + "=" + value + "</code></li>\n")
        outfile.write(html_bot_edit_suffix())

+
 def contact_method():
    return """Please <a target="_blank" href="https://codeberg.org/matkoniecz/improving_openstreetmap_using_alltheplaces_dataset/issues">create an issue</a> or <a target="_blank" href="https://www.openstreetmap.org/message/new/Mateusz%20Konieczny">send me an OSM private message</a> if you see a potential for improvements. If potential improvements are in All The Places - better to create PR or issue there. If unsure, please write to me. If you see this data being misued and causing harm (for example, imported without consulting community or ignoring their feedback) - please write to me and I will help with cleanups, including reverts and reconsider how this data is publish."""

@ -109,6 +112,7 @@ def html_bot_edit_prefix():
    <h2>Edit ideas listing</h2>
 """

+
 def html_bot_edit_suffix():
    return """</section>
 </body>
@ -121,12 +125,13 @@ def clear_output_files(folder):
        if os.path.isfile(file_path):
            os.remove(file_path)

+
 def produce_map_analysis_for_atp_code(atp_code, cache_only, url_checker_instance, report_generators):
    csv_filepath = "build_temporary_files/" + atp_code + '.csv'
    if os.path.isfile(csv_filepath) == False:
        report_generators[atp_code] = {
            'atp_file_is_broken': True
-            }
+        }
        return report_generators
    match_list = serializing.load_list_of_matches_from_csv(csv_filepath)

@ -142,12 +147,13 @@ def produce_map_analysis_for_atp_code(atp_code, cache_only, url_checker_instance
        processed = qa.remove_bad_data(entry.atp_tags, atp_code)
        if processed == None:
            continue
-        entry.atp_tags = processed # TODO is it happening as data was passed to qa function
+        entry.atp_tags = processed  # TODO is it happening as data was passed to qa function
        rebuild_match_list.append(entry)

    report_generators[atp_code] = produce_map_analysis_for_atp_data(atp_code, area_name="", match_list=rebuild_match_list, cache_only=cache_only, url_checker_instance=url_checker_instance)
    return report_generators

+
 class MismatchingNameReportCreator:
    def __init__(self, atp_code, area_name):
        self.atp_code = atp_code
@ -227,9 +233,9 @@ class MismatchingNameReportCreator:
    def table_of_contents(self):
        return [
            {
-            'header': "Name mismatch",
-            'section_link': section_link('name mismatch between OSM and ATP', len(self.completely_mismatching_names), self.report_filename()),
-            'output_files': [self.report_filename()]
+                'header': "Name mismatch",
+                'section_link': section_link('name mismatch between OSM and ATP', len(self.completely_mismatching_names), self.report_filename()),
+                'output_files': [self.report_filename()]
            },
        ]

@ -247,6 +253,7 @@ class MismatchingNameReportCreator:
                #outfile.write(leafleter.generator.get_line(missing.atp_center['lat'], missing.atp_center['lon'], missing.osm_match_center['lat'], missing.osm_match_center['lon'], color = 'blue'))
            outfile.write(leafleter.generator.get_html_page_suffix())

+
 class ATPGivesTagsReportCreator:
    def __init__(self, url_checker_instance, atp_code, area_name):
        self.atp_code = atp_code
@ -281,7 +288,7 @@ class ATPGivesTagsReportCreator:
            writer = csv.writer(outfile)
            for key in self.importable_keys:
                if self.count_of_total_tag_mismatches[key] == 0:
-                    for entry in self.shops_with_tags_to_be_added: # already Nominatim-filtered
+                    for entry in self.shops_with_tags_to_be_added:  # already Nominatim-filtered
                        if key in entry['tags_to_be_added']:
                            value = entry['entry'].atp_tags[key]
                            atp_code = entry['entry'].atp_tags['@spider']
@ -344,7 +351,7 @@ class ATPGivesTagsReportCreator:
            return False
        returned = self.url_checker.is_website_eligible(atp, cache_only)
        if returned == None:
-            return False # not cached, instructed to use only cache
+            return False  # not cached, instructed to use only cache
        if returned:
            if atp.atp_tags.get(tested_key) != atp.osm_match_tags.get(tested_key):
                return True
@ -356,7 +363,7 @@ class ATPGivesTagsReportCreator:
                # pointing to the main brand page
                pass
            elif self.url_checker.is_difference_limited_to_slash_at_end(atp_value, osm_value):
-                pass # effectively the same anyway, no real mismatch
+                pass  # effectively the same anyway, no real mismatch
            else:
                self.mismatching_website_tags.append(atp)
                self.report_mismatch(atp, tested_key)
@ -402,7 +409,7 @@ class ATPGivesTagsReportCreator:
                    new_tags[key] = entry['entry'].atp_tags[key]
                message += tag_list_to_html(new_tags)
                outfile.write(leafleter.generator.get_marker(message, entry['entry'].atp_center['lat'], entry['entry'].atp_center['lon'], color='green'))
-                outfile.write(leafleter.generator.get_line(entry['entry'].atp_center['lat'], entry['entry'].atp_center['lon'], entry['entry'].osm_match_center['lat'], entry['entry'].osm_match_center['lon'], color = 'green'))
+                outfile.write(leafleter.generator.get_line(entry['entry'].atp_center['lat'], entry['entry'].atp_center['lon'], entry['entry'].osm_match_center['lat'], entry['entry'].osm_match_center['lon'], color='green'))
            outfile.write(leafleter.generator.get_html_page_suffix())

    def generate_mismatching_website_listing(self):
@ -422,9 +429,10 @@ class ATPGivesTagsReportCreator:
                summary += 'tag list as suggested by ATP (should not be assumed to be directly usable in OSM):<br><br>'
                summary += tag_list_to_html(bad.atp_tags)
                outfile.write(leafleter.generator.get_marker(summary, bad.atp_center['lat'], bad.atp_center['lon'], color='red'))
-                outfile.write(leafleter.generator.get_line(bad.atp_center['lat'], bad.atp_center['lon'], bad.osm_match_center['lat'], bad.osm_match_center['lon'], color = 'red'))
+                outfile.write(leafleter.generator.get_line(bad.atp_center['lat'], bad.atp_center['lon'], bad.osm_match_center['lat'], bad.osm_match_center['lon'], color='red'))
            outfile.write(leafleter.generator.get_html_page_suffix())

+
 class MissingObjectsReportCreator:
    def __init__(self, atp_code, area_name):
        self.area_name = area_name
@ -488,6 +496,7 @@ class MissingObjectsReportCreator:
                bad_tags_skipped.append(atp)
            json.dump(serializing.generate_geojson_structure(bad_tags_skipped), f)

+
 class NominatimMismatchReportCreator:
    def __init__(self, atp_code, area_name):
        self.area_name = area_name
@ -546,9 +555,10 @@ class NominatimMismatchReportCreator:
                summary += tag_list_to_html(missing.atp_tags)
                outfile.write(leafleter.generator.get_marker(summary, missing.atp_center['lat'], missing.atp_center['lon'], color='red'))
                location_from_nominatim = nominatim.location_given_tags(missing.atp_tags, debug_identifier=self.atp_code)[0]
-                outfile.write(leafleter.generator.get_line(missing.atp_center['lat'], missing.atp_center['lon'], location_from_nominatim['lat'], location_from_nominatim['lon'], color = 'red'))
+                outfile.write(leafleter.generator.get_line(missing.atp_center['lat'], missing.atp_center['lon'], location_from_nominatim['lat'], location_from_nominatim['lon'], color='red'))
            outfile.write(leafleter.generator.get_html_page_suffix())

+
 """
 class MismatchingNameReportCreator:
    def __init__(self, atp_code):
@ -577,6 +587,8 @@ add to test_display_website
 """
 # TODO: passing atp_code should not be needed
 # TODO: save files one level higher, here just produce analysis
+
+
 def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_only, url_checker_instance):
    missing_objects_report = MissingObjectsReportCreator(atp_code, area_name)
    mismatching_name_report = MismatchingNameReportCreator(atp_code, area_name)
@ -613,7 +625,7 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl
            conflict_between_atp_and_nominatim_report.register_case_using_known_nominatim_status(atp, status)
        elif atp.match_distance == None or atp.match_distance > config.missing_shop_distance_in_kilometers_for_specific_case(atp.atp_tags):
            nominatim_match = nominatim.is_location_matching_tags(atp.atp_tags, atp.atp_center, cache_only=cache_only, spider=atp_code)
-            if nominatim_match != False: # both matches, failed geolocation and geolocation not done at all go here
+            if nominatim_match != False:  # both matches, failed geolocation and geolocation not done at all go here
                missing_objects_report.check_case(atp)
            conflict_between_atp_and_nominatim_report.register_case_using_known_nominatim_status(atp, nominatim_match)
        else:
@ -635,7 +647,8 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl
        'missing_objects_report': missing_objects_report,
        'conflict_between_atp_and_nominatim_report': conflict_between_atp_and_nominatim_report,
        'total_atp_entries': len(match_list),
-        }
+    }
+

 def format_for_geojson_export(dataset):
    for entry in dataset:
@ -644,6 +657,7 @@ def format_for_geojson_export(dataset):
                del atp.atp_tags[key]
    return dataset

+
 def get_center(dataset):
    max_lat = -90
    max_lon = -180
@ -671,6 +685,7 @@ def sidebar_content(page_specific_info, atp_code):
    sidebar += '<br><br>\n<a href="https://github.com/alltheplaces/alltheplaces/blob/master/locations/spiders/' + atp_code + '.py" target="_blank">atp source code</a>'
    return sidebar

+
 def tag_list_to_html(tags):
    returned = ""
    normal_tags = ""
@ -691,21 +706,26 @@ def tag_list_to_html(tags):
        returned += "<br><br>tags present in ATP, very likely not usable directly in OSM<br>" + dropped_tags
    return returned

+
 def htmlify_key_value_pair(key, value):
    return key + " = " + htmlify_value(key, value) + "<br>"

+
 def htmlify_value(key, value):
    value = escape_html(value)
    if key == "website" or (key == "image" and value.find("http") == 0):
        value = '<a href="' + value + '">' + value + "</a>"
    return value

+
 def escape_url(value):
    return str(value).replace('"', '%22').replace("'", "%27")

+
 def escape_html(value):
    return html.escape(value).replace("\r\n", "<br>").replace("\n", "<br>")

+
 def headers():
    # TODO: pass it smarter in config (list of main report creators?)
    # or at least make it static method
@ -718,6 +738,7 @@ def headers():
        NominatimMismatchReportCreator('dummy', 'dummy area name').table_of_contents()[0]['header'],
    ]

+
 def generate_website_index_listing_by_country(report_generators, released_codes_by_region, partial=False):
    with open("output/index.html", 'w') as outfile:
        outfile.write(html_prefix())
@ -732,6 +753,7 @@ def generate_website_index_listing_by_country(report_generators, released_codes_
            outfile.write(table_with_spider_overview(atp_codes, report_generators, partial))
        outfile.write(html_suffix())

+
 def generate_website_index_for_named_area(report_generators, area_name, partial=False):
    with open("output/" + area_name + "_index.html", 'w') as outfile:
        outfile.write(html_prefix())
@ -739,6 +761,7 @@ def generate_website_index_for_named_area(report_generators, area_name, partial=
        outfile.write(table_with_spider_overview(report_generators.keys(), report_generators, partial))
        outfile.write(html_suffix())

+
 def table_with_spider_overview(atp_codes, report_generators, partial):
    returned = ""
    returned += '<table class="statistics-summary"><thead><tr><th>' + '</th><th>'.join(headers()) + '</th></tr></thead>\n'
@ -756,8 +779,9 @@ def table_with_spider_overview(atp_codes, report_generators, partial):
        returned += "no entries shown in this area\n"
    return returned

+
 def table_row(atp_code, statistics):
-    if statistics['missing_objects_report'] == None: #TODO test is it working
+    if statistics['missing_objects_report'] == None:  # TODO test is it working
        return '<tr><th></th><td colspan="5">Data missing</td></tr>'

    missing_section = statistics['missing_objects_report'].table_of_contents()[0]['section_link']
@ -772,11 +796,13 @@ def table_row(atp_code, statistics):

    return '<tr><th>' + atp_code + '</th><td>' + missing_section + '</td><td>' + mismatching_names_section + '</td><td>' + tags_section + '</td><td>' + website_mismatch_section + '</td><td>' + mismatch_section + not_attempted + '</td></tr>'

+
 def section_link(description, count, page):
    if count == 0:
        return '<span class=less-visible title="' + description + '">' + str(count) + '</span>'
    return '<a href="' + page + '" title="' + description + '">' + str(count) + '</a>'

+
 def contact_method():
    return """Please <a href="https://codeberg.org/matkoniecz/list_how_openstreetmap_can_be_improved_with_alltheplaces_data/issues">create an issue</a> or <a href="https://www.openstreetmap.org/message/new/Mateusz%20Konieczny">send me an OSM private message</a> if you see a potential for improvements. If potential improvements are in All The Places - better to create PR or issue <a href="https://github.com/alltheplaces/alltheplaces">there</a>. If unsure, please write to me."""

@ -819,11 +845,13 @@ def html_prefix():
    </p>
 """

+
 def html_suffix():
    return """<hr><br>Published on <a href="https://matkoniecz.codeberg.page/improving_openstreetmap_using_alltheplaces_dataset/">https://matkoniecz.codeberg.page/improving_openstreetmap_using_alltheplaces_dataset/</a> - generated on """ + f'{datetime.datetime.now():%Y-%m-%d %H:%M:%S%z}' + """ (note that ATP and OSM data used here may be older) </section>
 </body>
 </html>"""

+
 def iterate_over_output_files(atp_code):
    reports = [
        MissingObjectsReportCreator(atp_code, 'dummy area name'),
@ -836,6 +864,7 @@ def iterate_over_output_files(atp_code):
            for file in entry['output_files']:
                yield file

+
 def copy_data_for_publication(all_atp_codes):
    for atp_code in all_atp_codes:
        if get_free_space_in_mb('../public_website_with_output') < 400:
@ -850,8 +879,10 @@ def copy_data_for_publication(all_atp_codes):
    os.system("cp output/index.html ../public_website_with_output/index.html")
    # published on https://matkoniecz.codeberg.page/improving_openstreetmap_using_alltheplaces_dataset/

+
 def publish_data_on_internet():
    os.system('cd ../public_website_with_output && git add . && git commit -m "automatic update" && git push')

+
 if __name__ == "__main__":
    main()
--- a/5_generate_organic_map_bookmarks.py
+++ b/5_generate_organic_map_bookmarks.py
@ -10,20 +10,25 @@ import shared
 obtain_atp_data = __import__("2_obtain_atp_data")
 config = __import__("0_config")

+
 def is_in_this_area(area, atp):
    if atp.atp_center['lat'] > area['min_lat'] and atp.atp_center['lat'] < area['max_lat']:
        if atp.atp_center['lon'] > area['min_lon'] and atp.atp_center['lon'] < area['max_lon']:
            return True
    return False

+
 def areas():
    return {
        'kraków': {'min_lat': 50, 'min_lon': 19.5, 'max_lat': 50.3, 'max_lon': 20.5},
        # http://bboxfinder.com/#52.383301,16.885986,52.436182,17.044859
        'poznań': {'min_lat': 52.383301, 'min_lon': 16.885986, 'max_lat': 52.436182, 'max_lon': 17.044859},
-        }
+    }
+

 general_statistics = {}
+
+
 def main():
    for atp_code, _item_path in obtain_atp_data.all_spider_codes_iterator():
        print(atp_code)
@ -55,6 +60,7 @@ def save_files(data, name):
    with open(name + '_missing.kml', 'w') as f:
        f.write(serializing.generate_kml_text(data))

+
 def clear_output_files(folder):
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
@ -71,4 +77,5 @@ def generate_missing_shop_listing(atp_code, apparently_missing_shops):
            osm_location_link = shared.link_to_point_in_osm(missing.atp_center['lat'], missing.atp_center['lon'])
            summary = 'here ATP shows object being present, which seems not mapped in OpenStreetMap (<a href="' + osm_location_link + '">location</a>):<br><br>'

+
 main()
--- a/6_experimental_graticule_splitter.py
+++ b/6_experimental_graticule_splitter.py
@ -23,11 +23,13 @@ def graticule_id(lat, lon, lat_span, lon_span, margin_in_kilometers):
 # filter data for each
 # filter data for each in constant time (just check is given location within graticule range)

+
 def main():
    check_is_any_graticule_having_margin_greater_than_entire_graticule()
    generate_test_graticule_coverage_map()
    test_area_run()

+
 def generate_test_graticule_coverage_map():
    graticule_anchor_coverage = {'min_lat': 49, 'min_lon': 14, 'max_lat': 54, 'max_lon': 24}
    with open("test_coverage_graticule_display.html", 'w') as outfile:
@ -37,9 +39,10 @@ def generate_test_graticule_coverage_map():
        for lat_anchor in range(graticule_anchor_coverage['min_lat'], graticule_anchor_coverage['max_lat'] + 1):
            for lon_anchor in range(graticule_anchor_coverage['min_lon'], graticule_anchor_coverage['max_lon'] + 1):
                shape = [[lat_anchor + 1, lon_anchor + 1], [lat_anchor + 1, lon_anchor], [lat_anchor, lon_anchor], [lat_anchor, lon_anchor + 1], [lat_anchor + 1, lon_anchor + 1]]
-                outfile.write(leafleter.generator.get_polygon(shape, color = "green", fill_color = "green", link = "https://pl.wikipedia.org/wiki/Pozna%C5%84"))
+                outfile.write(leafleter.generator.get_polygon(shape, color="green", fill_color="green", link="https://pl.wikipedia.org/wiki/Pozna%C5%84"))
        outfile.write(leafleter.generator.get_html_page_suffix())

+
 def test_area_run():
    # http://bboxfinder.com/#52.383301,16.885986,52.436182,17.044859
    poznań = {'min_lat': 52.383301, 'min_lon': 16.885986, 'max_lat': 52.436182, 'max_lon': 17.044859, 'name': 'Poznań'}
@ -57,7 +60,7 @@ def test_area_run():
        outfile.write(leafleter.generator.get_html_page_prefix("website title", (area['max_lat'] + area['min_lat'])/2, (area['max_lon'] + area['min_lon'])/2))
        #outfile.write(leafleter.generator.get_marker("text", 50.06, 19.93))
        shape = [[area['max_lat'], area['max_lon']], [area['max_lat'], area['min_lon']], [area['min_lat'], area['min_lon']], [area['min_lat'], area['max_lon']], [area['max_lat'], area['max_lon']]]
-        outfile.write(leafleter.generator.get_polygon(shape, color = "green", fill_color = "green", link = "https://pl.wikipedia.org/wiki/Pozna%C5%84"))
+        outfile.write(leafleter.generator.get_polygon(shape, color="green", fill_color="green", link="https://pl.wikipedia.org/wiki/Pozna%C5%84"))
        outfile.write(leafleter.generator.get_html_page_suffix())

    atp_data_by_spider = {}
@ -72,13 +75,12 @@ def test_area_run():
        if len(gathered) > 0:
            atp_data_by_spider[atp_code] = gathered

-
    general_area = "europe/poland"
    osm_data = []
    for entry in matcher.load_geofabrik(general_area, config.cache_folder()):
-            if entry['center']['lat'] > area['min_lat'] and entry['center']['lat'] < area['max_lat']:
-                if entry['center']['lon'] > area['min_lon'] and entry['center']['lon'] < area['max_lon']:
-                    osm_data.append(entry)
+        if entry['center']['lat'] > area['min_lat'] and entry['center']['lat'] < area['max_lat']:
+            if entry['center']['lon'] > area['min_lon'] and entry['center']['lon'] < area['max_lon']:
+                osm_data.append(entry)

    print(len(atp_data_by_spider))
    print(len(osm_data))
@ -100,6 +102,7 @@ def test_area_run():

    print(output_file)

+
 def check_is_any_graticule_having_margin_greater_than_entire_graticule():
    for lat in range(-89, 89):
        for lon in range(-180, 180):
@ -110,17 +113,17 @@ def check_is_any_graticule_having_margin_greater_than_entire_graticule():
            distance_for_lat_degree_alt = shared.calculate_distance(
                {'lat': tested_location['lat'] + 1, 'lon': tested_location['lon'] + 1},
                {'lat': tested_location['lat'] + 2, 'lon': tested_location['lon'] + 1}
-                )
+            )
            distance_for_lat_degree_alt_alt = shared.calculate_distance(
                {'lat': tested_location['lat'] + 1, 'lon': tested_location['lon']},
                {'lat': tested_location['lat'] + 2, 'lon': tested_location['lon']}
-                )
+            )
            print("expected zero, maybe espilon changes", distance_for_lat_degree_alt - distance_for_lat_degree)
            print("expected zero, maybe espilon changes", distance_for_lat_degree_alt_alt - distance_for_lat_degree)
            distance_for_lon_degree_alt = shared.calculate_distance(
                {'lat': tested_location['lat'] + 1, 'lon': tested_location['lon'] + 1},
                {'lat': tested_location['lat'] + 1, 'lon': tested_location['lon'] + 1}
-                )
+            )
            print("expected meaningful changes", distance_for_lon_degree_alt - distance_for_lon_degree)

            margin_in_kilometers = config.maximum_missing_shop_distance_in_kilometers()
@ -133,4 +136,5 @@ def check_is_any_graticule_having_margin_greater_than_entire_graticule():
                raise
            break

+
 main()
--- a/7_experimental_taginfo_tag_lister.py
+++ b/7_experimental_taginfo_tag_lister.py
@ -33,7 +33,7 @@ def main():
        except FileNotFoundError as e:
            print(e)
            pass
-    #TODO skip freeform/valid ones
+    # TODO skip freeform/valid ones
    for key, values in used_tags.items():
        if tag_knowledge.is_freeform_key(key):
            print(key, "=", "*")
--- a/distance_distribution.py
+++ b/distance_distribution.py
@ -1,6 +1,7 @@
 import matplotlib.pyplot as plt
 import os

+
 class MatchDistanceDestributionReportCreator:
    def __init__(self, identifier, area_name):
        self.identifier = identifier
@ -29,7 +30,7 @@ class MatchDistanceDestributionReportCreator:
        plt.rcParams["figure.figsize"] = [10, 10]
        # https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html
        # see 02 file for more investigation
-        plt.style.use('fivethirtyeight') # affects all charts, 'seaborn-v0_8-whitegrid' is also nice
+        plt.style.use('fivethirtyeight')  # affects all charts, 'seaborn-v0_8-whitegrid' is also nice
        plt.grid(True)
        plt.clf()
        plt.xlim(0, 1200)
@ -39,7 +40,7 @@ class MatchDistanceDestributionReportCreator:
        plt.rcParams["figure.figsize"] = [10, 10]
        # https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html
        # see 02 file for more investigation
-        plt.style.use('fivethirtyeight') # affects all charts, 'seaborn-v0_8-whitegrid' is also nice
+        plt.style.use('fivethirtyeight')  # affects all charts, 'seaborn-v0_8-whitegrid' is also nice
        plt.grid(True)
        plt.clf()
        plt.xlim(0, 300)
--- a/link_scan_worker.py
+++ b/link_scan_worker.py
@ -4,6 +4,7 @@ import datetime
 import time
 config = __import__("0_config")

+
 def scan_eligible(grab_bag, scanner):
    while True:
        any_scanned = False
@ -18,6 +19,7 @@ def scan_eligible(grab_bag, scanner):
        if any_scanned == False:
            return

+
 def main():
    wait_between_the_same_domain_minutes = 5
    grab_bag = {}
@ -38,5 +40,6 @@ def main():
        scan_eligible(grab_bag, scanner)
        time.sleep(10)

+
 if __name__ == "__main__":
    main()
--- a/nominatim.py
+++ b/nominatim.py
@ -6,24 +6,27 @@ import re
 import shutil
 config = __import__("0_config")

+
 def cache_path():
    return 'nominatim_cache'

+
 # Initialize disk cache
 nominatim_cache = diskcache.Cache(cache_path())

+
 def drop_extra_detail_blocking_nominatim(value):
    # patch nominatim bug where inclusion of apartment code breaks search
    # https://github.com/osm-search/Nominatim/issues/145#issuecomment-2143549199
    # see https://pythex.org/ for testing
-    value = re.sub(r'/\d+([a-zA-Z])?', '', value) # turns 178/12 into 178
-    value = re.sub(r'(,|, |)lok\..*', '', value, flags=re.IGNORECASE) # "lokal" is Polish for "unit"
-    value = re.sub(r'(,|, |)LOK .*', '', value) # "lokal" is Polish for "unit"
-    value = re.sub(r'(,|, |)lokal .*', '', value) # "lokal" is Polish for "unit"
-    value = re.sub(r'(,|, |)lok .*', '', value) # "lokal" is Polish for "unit"
-    value = re.sub(r'(,|, |)lok.*', '', value) # "lokal" is Polish for "unit"
-    value = re.sub(r'(,|, |)LU.*', '', value) # "lokal użytkowy" is Polish legalese for "unit"
-    value = re.sub(r'(,|, |)Lu.*', '', value) # "lokal użytkowy" is Polish legalese for "unit"
+    value = re.sub(r'/\d+([a-zA-Z])?', '', value)  # turns 178/12 into 178
+    value = re.sub(r'(,|, |)lok\..*', '', value, flags=re.IGNORECASE)  # "lokal" is Polish for "unit"
+    value = re.sub(r'(,|, |)LOK .*', '', value)  # "lokal" is Polish for "unit"
+    value = re.sub(r'(,|, |)lokal .*', '', value)  # "lokal" is Polish for "unit"
+    value = re.sub(r'(,|, |)lok .*', '', value)  # "lokal" is Polish for "unit"
+    value = re.sub(r'(,|, |)lok.*', '', value)  # "lokal" is Polish for "unit"
+    value = re.sub(r'(,|, |)LU.*', '', value)  # "lokal użytkowy" is Polish legalese for "unit"
+    value = re.sub(r'(,|, |)Lu.*', '', value)  # "lokal użytkowy" is Polish legalese for "unit"

    value = re.sub(r'(,|, |)suite .*', '', value, flags=re.IGNORECASE)

@ -34,6 +37,7 @@ def drop_extra_detail_blocking_nominatim(value):
    value = re.sub(r'(,|, |)unit .*', '', value, flags=re.IGNORECASE)
    return value

+
 def nominatim_queries(tags, debug=False):
    address_tag_groups = [
        ['addr:country', 'addr:city', 'addr:street', 'addr:housenumber'],
@ -67,7 +71,7 @@ def nominatim_queries(tags, debug=False):
                if key in ["addr:street_address", 'addr:street', 'addr:full']:
                    # see https://github.com/osm-search/Nominatim/issues/87
                    value = re.sub(r'ul\. ?', '', value, flags=re.IGNORECASE)
-                    value = re.sub(r'( |$)ul ', ' ', value, flags=re.IGNORECASE) # "ul Żabia"
+                    value = re.sub(r'( |$)ul ', ' ', value, flags=re.IGNORECASE)  # "ul Żabia"
                if key in ["addr:street_address", 'addr:full']:
                    value = drop_extra_detail_blocking_nominatim(value)
                query += value
@ -76,6 +80,7 @@ def nominatim_queries(tags, debug=False):
                print(group)
            yield query

+
 def location_given_tags_cache_only(tags):
    """
    True: matches
@ -87,21 +92,22 @@ def location_given_tags_cache_only(tags):
        if query not in nominatim_cache:
            with open(config.nominatim_requests_missing_from_cache(), 'a') as outfile:
                outfile.write(query+"\n")
-            return -1 # maybe transformed query would give better result?
-                        # should not check further ones
+            return -1  # maybe transformed query would give better result?
+            # should not check further ones
        else:
            response = nominatim_cache[query]
            if len(response) >= 1:
                return response
    return None

+
 def location_given_tags(tags, debug_identifier):
    for query in nominatim_queries(tags):
        response = query_nominatim(query)
        if len(response) >= 1:
            return response

-    atp_code = debug_identifier # TODO handle this
+    atp_code = debug_identifier  # TODO handle this
    if config.is_failed_geocoding_worth_mentioning(atp_code):
        print()
        print()
@ -119,6 +125,7 @@ def location_given_tags(tags, debug_identifier):
        print()
    return None

+
 def is_location_matching_tags(tags, center, spider, cache_only=False):
    """
    True: matches
@ -135,6 +142,7 @@ def is_location_matching_tags(tags, center, spider, cache_only=False):
        return response
    return are_locations_matching(tags, response[0], center)

+
 def are_locations_matching(tags, location, center):
    distance = shared.calculate_distance(center, location)
    if distance > config.missing_shop_distance_in_kilometers_for_specific_case(tags):
@ -142,10 +150,12 @@ def are_locations_matching(tags, location, center):
    else:
        return True

+
 def get_free_space_in_mb(path):
    total, used, free = shutil.disk_usage(path)
    return free / 1024 / 1024

+
 def query_nominatim(query):
    # Check if the response is in the cache
    if query in nominatim_cache:
@ -221,6 +231,7 @@ def query_nominatim(query):
        else:
            response.raise_for_status()

+
 # Example usage
 # gptchat generated
 if __name__ == '__main__':
--- a/nominatim_worker.py
+++ b/nominatim_worker.py
@ -1,10 +1,12 @@
 import nominatim
 config = __import__("0_config")

+
 def main():
    with open(config.nominatim_requests_missing_from_cache()) as fp:
        for query in fp:
            nominatim.query_nominatim(query.strip())

+
 if __name__ == "__main__":
    main()
--- a/qa.py
+++ b/qa.py
@ -1,8 +1,9 @@
-config = __import__("0_config")
-import shops
-import rich
-import phonenumbers
 import datetime
+import phonenumbers
+import rich
+import shops
+config = __import__("0_config")
+

 def remove_bad_data(data, atp_code):
    """
@ -94,6 +95,7 @@ def remove_bad_data(data, atp_code):
            del data[key]
    return data

+
 def is_empty_value(key, value, atp_code):
    if value.lower() in ["undefined", "b/n", "---", "none", "n/a"]:
        if config.is_null_specified_as_text_worth_mentioning(atp_code):
@ -114,11 +116,12 @@ def is_empty_value(key, value, atp_code):
        return True
    return False

+
 def handle_ref_tag(data, atp_code):
    if atp_code in ['paczkomat_inpost_pl', 'allegro_one_box_pl']:
-        return data # actual ref
+        return data  # actual ref
    elif atp_code in ['credit_agricole_pl']:
-        del data["ref"] # synthethic ref created by ATP
+        del data["ref"]  # synthethic ref created by ATP
    elif "ref" in data:
        # https://github.com/alltheplaces/alltheplaces/blob/master/DATA_FORMAT.md describe `ref` and I am a bit confused
        # > A unique identifier for this feature inside this spider. The code that generates the output will remove duplicates based on the value of this key.
@ -137,6 +140,7 @@ def handle_ref_tag(data, atp_code):
        del data["ref"]
    return data

+
 def remove_bad_phone_data(data, atp_code):
    if 'phone' in data:
        if data['phone'].replace(" ", "").startswith("+443"):
@ -157,6 +161,7 @@ def remove_bad_phone_data(data, atp_code):
            del data['phone']
    return data

+
 def is_valid_phone_tag(phone_tag):
    if ";" not in phone_tag:
        return is_valid_phone_number(phone_tag)
@ -166,13 +171,14 @@ def is_valid_phone_tag(phone_tag):
                return False
        return True

+
 def is_valid_phone_number(phone):
-        if phone in [
-            '+4800000000', # https://github.com/alltheplaces/alltheplaces/issues/8633
-        ]:
-            return False
-        try:
-            parsed = phonenumbers.parse(phone, None)
-            return phonenumbers.is_valid_number(parsed)
-        except phonenumbers.phonenumberutil.NumberParseException:
-            return False
+    if phone in [
+        '+4800000000',  # https://github.com/alltheplaces/alltheplaces/issues/8633
+    ]:
+        return False
+    try:
+        parsed = phonenumbers.parse(phone, None)
+        return phonenumbers.is_valid_number(parsed)
+    except phonenumbers.phonenumberutil.NumberParseException:
+        return False
--- a/run.py
+++ b/run.py
@ -5,6 +5,7 @@ obtain_atp_data = __import__("2_obtain_atp_data")
 matcher = __import__("3_matcher")
 show_data = __import__("4_show_data")

+
 def main():
    # TODO: test dependencies on fresh OS
    # see readme for instructions how to install dependencies
@ -27,6 +28,7 @@ def main():
    # maps listing various missing data - shops, tags, and of various wrong data (shop in OSM not in ATP and so on)
    show_data.main()

+
 if __name__ == "__main__":
    main()

--- a/serializing.py
+++ b/serializing.py
@ -2,6 +2,7 @@ import base64
 import json
 import csv

+
 class Match:
    def __init__(self, atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches):
        self.atp_center = atp_center
@ -11,16 +12,18 @@ class Match:
        self.osm_link = osm_link
        self.match_distance = match_distance
        self.all_very_good_matches = all_very_good_matches
+
    def __str__(self):
        return "Match(" + str(self.atp_center) + ',' + str(self.atp_tags) + ',' + str(self.osm_match_center) + ',' + str(self.osm_match_tags) + ',' + str(self.osm_link) + ',' + str(self.match_distance) + ',' + str(self.all_very_good_matches) + ")"

+
 def save_list_of_matches_to_csv(filepath, data):
    with open(filepath, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['atp_lat', 'atp_lon', 'atp_tags_dict_in_base64', 'osm_lat', 'osm_lon', 'osm_tags_dict_in_base64', 'osm_link', 'match_distance', 'all_very_good_matches'])
        for entry in data:
            if entry.match_distance == None:
-                writer.writerow([entry.atp_center['lat'], entry.atp_center['lon'], encode_to_base64_via_json(entry.atp_tags),"","","","","", ""])
+                writer.writerow([entry.atp_center['lat'], entry.atp_center['lon'], encode_to_base64_via_json(entry.atp_tags), "", "", "", "", "", ""])
            else:
                writer.writerow([
                    entry.atp_center['lat'],
@ -32,7 +35,8 @@ def save_list_of_matches_to_csv(filepath, data):
                    entry.osm_link,
                    entry.match_distance,
                    encode_to_base64_via_json(entry.all_very_good_matches)
-                    ])
+                ])
+

 def load_list_of_matches_from_csv(filepath):
    try:
@ -52,7 +56,7 @@ def load_list_of_matches_from_csv(filepath):
                    osm_match_center = {'lat': float(row[3]), 'lon': float(row[4])}
                    osm_match_tags = decode_from_base64_via_json(row[5])
                    for key, value in osm_match_tags.items():
-                        osm_match_tags[key] = str(value) # TODO - review saving code, this should not be needed
+                        osm_match_tags[key] = str(value)  # TODO - review saving code, this should not be needed
                    osm_link = row[6]
                    match_distance = float(row[7])
                    all_very_good_matches = decode_from_base64_via_json(row[8])
@ -62,7 +66,9 @@ def load_list_of_matches_from_csv(filepath):
        print(filepath)
        raise

-#gptchat generated
+# gptchat generated
+
+
 def encode_to_base64_via_json(input_dict):
    # Convert the dictionary to a JSON string
    json_str = json.dumps(input_dict)
@ -74,7 +80,9 @@ def encode_to_base64_via_json(input_dict):
    base64_str = base64_bytes.decode('utf-8')
    return base64_str

-#gptchat generated
+# gptchat generated
+
+
 def decode_from_base64_via_json(base64_str):
    # Decode the Base64 string to bytes
    base64_bytes = base64_str.encode('utf-8')
@ -86,18 +94,20 @@ def decode_from_base64_via_json(base64_str):
    output_dict = json.loads(json_str)
    return output_dict

+
 def generate_geojson_structure(dataset):
-    geojson_data = {"type": "FeatureCollection","features": []}
+    geojson_data = {"type": "FeatureCollection", "features": []}
    for atp in dataset:
        geojson_data['features'].append({"type": "Feature",
-      "geometry": {
-        "type": "Point",
-        "coordinates": [atp.atp_center['lon'], atp.atp_center['lat']]
-      },
-      "properties": atp.atp_tags
-    })
+                                         "geometry": {
+                                             "type": "Point",
+                                             "coordinates": [atp.atp_center['lon'], atp.atp_center['lat']]
+                                         },
+                                         "properties": atp.atp_tags
+                                         })
    return geojson_data

+
 def generate_kml_text(dataset):
    geojson_data = generate_geojson_structure(dataset)
    returned = """<?xml version="1.0" encoding="UTF-8"?>
--- a/shared.py
+++ b/shared.py
@ -1,8 +1,10 @@
 import geopy.distance

+
 def link_to_point_in_osm(lat, lon):
    return 'https://www.openstreetmap.org/?mlat=' + str(lat) + "&mlon=" + str(lon) + "#map=19/" + str(lat) + '/' + str(lon)

+
 def calculate_distance(point_a, point_b):
    # https://github.com/geopy/geopy?tab=readme-ov-file#measuring-distance
    coords_1 = (point_a['lat'], point_a['lon'])
--- a/spatial_index.py
+++ b/spatial_index.py
@ -14,4 +14,3 @@ class SpatialIndex:

    # sort by longitude
    # select quickly by longitude, leaving unlimited for latitude
-
--- a/test_display_website.py
+++ b/test_display_website.py
@ -1,9 +1,10 @@
+import distance_distribution
+import url_checker
+import leafleter
+import serializing
 import unittest
 show_data = __import__("4_show_data")
-import serializing
-import leafleter
-import url_checker
-import distance_distribution
+

 class IsCodeCompletelyCrashingSmoketests(unittest.TestCase):
    def test_rough_code_validity(self):
@ -49,6 +50,7 @@ class IsCodeCompletelyCrashingSmoketests(unittest.TestCase):
        for file in show_data.iterate_over_output_files('dummy_atp_code'):
            pass

+
 class TagListFormattingTests(unittest.TestCase):
    def test_escaping_newlines(self):
        self.assertEqual(show_data.escape_html("ajaj\naaaa"), "ajaj<br>aaaa")
@ -59,6 +61,7 @@ class TagListFormattingTests(unittest.TestCase):
    def test_tag_list_generation_newline_in_tags_escape(self):
        self.assertEqual("aaaa<br>bbb" in show_data.tag_list_to_html({"description": "aaaa\nbbb"}), True)

+
 class PhoneSuggestingTests(unittest.TestCase):
    def test_accept_normal_phone(self):
        add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name')
@ -88,6 +91,7 @@ class PhoneSuggestingTests(unittest.TestCase):
        creator = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy_atp_code', 'dummy area name')
        self.assertEqual(creator.is_phone_eligible(match), False)

+
 class WebsiteSuggestingTests(unittest.TestCase):
    def test_accept_normal_website(self):
        add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name')
--- a/test_general_smoke_test.py
+++ b/test_general_smoke_test.py
@ -3,6 +3,7 @@ import link_scan_worker
 import run
 import unittest

+
 class SmokeTest(unittest.TestCase):
    def test_math(self):
        self.assertEqual(2+2, 4)
--- a/test_matching_logic.py
+++ b/test_matching_logic.py
@ -1,7 +1,8 @@
+import serializing
 import unittest
 matcher = __import__("3_matcher")
 config = __import__("0_config")
-import serializing
+

 class RealityTests(unittest.TestCase):
    def test_match_on_exact_match(self):
@ -59,7 +60,6 @@ class RealityTests(unittest.TestCase):
        matches = matcher.get_matches(osm_data, atp_data)
        self.assertEqual(matches[0].match_distance, None)

-
    def test_accept_matches_for_ice_cream_synonyms(self):
        atp_data = [self.package_tags_into_mock({'brand': "Titan", 'amenity': 'ice_cream'})]
        osm_data = [self.package_tags_into_mock({'brand': "Titan", 'shop': 'ice_cream'})]
@ -119,4 +119,4 @@ class RealityTests(unittest.TestCase):
        matches = matcher.get_matches(osm_data, atp_data)
        self.assertEqual(matches[0].match_distance, 0)

-    #TODO: how to handle shop=yes shop=vacant
+    # TODO: how to handle shop=yes shop=vacant
--- a/test_processing.py
+++ b/test_processing.py
@ -1,6 +1,7 @@
 import unittest
 import qa

+
 class RealityTests(unittest.TestCase):
    def test_mathworks(self):
        self.assertEqual(2 + 1, 3)
--- a/test_spatial_index.py
+++ b/test_spatial_index.py
@ -1,6 +1,7 @@
 import unittest
 import spatial_index

+
 class Tests(unittest.TestCase):
    def test_basic_match_for_single_entry(self):
        data = [
@ -96,7 +97,7 @@ class Tests(unittest.TestCase):
            if entry["tags"] not in matches:
                matches[entry["tags"]] = 0
            matches[entry["tags"]] += 1
-        self.assertEqual(matches, {4: 1, 5:1, 6:1, 7:1})
+        self.assertEqual(matches, {4: 1, 5: 1, 6: 1, 7: 1})

    def test_basic_match_for_all_entries_except_first(self):
        data = [
@ -120,8 +121,7 @@ class Tests(unittest.TestCase):
            if entry["tags"] not in matches:
                matches[entry["tags"]] = 0
            matches[entry["tags"]] += 1
-        self.assertEqual(matches, {4: 1, 5:1, 6:1, 7:1})
-
+        self.assertEqual(matches, {4: 1, 5: 1, 6: 1, 7: 1})

    def test_basic_match_for_all_entries_except_last(self):
        data = [
@ -144,4 +144,4 @@ class Tests(unittest.TestCase):
            if entry["tags"] not in matches:
                matches[entry["tags"]] = 0
            matches[entry["tags"]] += 1
-        self.assertEqual(matches, {4: 1, 5:1, 6:1})
+        self.assertEqual(matches, {4: 1, 5: 1, 6: 1})
--- a/url_checker.py
+++ b/url_checker.py
@ -6,6 +6,7 @@ import shutil
 import time
 config = __import__("0_config")

+
 class URLChecker():
    def __init__(self):
        """
@ -15,7 +16,7 @@ class URLChecker():
        that later should have been disposed but were not
        """
        self.url_check_cache = diskcache.Cache(self.cache_path())
-        urllib3.disable_warnings() # silences complaints about unverified requests via HTTPS
+        urllib3.disable_warnings()  # silences complaints about unverified requests via HTTPS
        # this is done to ignore complaints about "verify=False" in requests.get
        # this is not so terrible as I only check is website up
        # see https://stackoverflow.com/questions/78855740/starfield-ca-not-recoggnised-by-requests-package
@ -89,7 +90,7 @@ class URLChecker():
                    # https://salony.orange.pl/pl/orange-jastrz%C4%99bie-zdr%C3%B3j-galeria-zdr%C3%B3j-26882
                    pass
                elif self.is_difference_limited_to_slash_at_end(atp_value, atp_after_redirect):
-                    pass # just adding trailing / is not worth raising an alarm... I think?
+                    pass  # just adding trailing / is not worth raising an alarm... I think?
                else:
                    self.consider_logging_that_atp_link_redirects(tested_key, atp_value, atp)
                    return False
@ -110,13 +111,13 @@ class URLChecker():
        if link_a[-1] == "/":
            link_a = link_a[:-1]
        if link_b[-1] == "/":
-            link_b =link_b[:-1]
+            link_b = link_b[:-1]
        return link_a == link_b

    def consider_logging_that_atp_link_was_rejected(self, tested_key, atp_value, atp):
        if atp.atp_tags['@spider'] not in [
-            'aldi_sud_de', # https://github.com/alltheplaces/alltheplaces/issues/9415
-            'true_value_us', # see above
+            'aldi_sud_de',  # https://github.com/alltheplaces/alltheplaces/issues/9415
+            'true_value_us',  # see above
        ]:
            pass
            #do not log problems as long as above issues are not fixed
@ -125,8 +126,8 @@ class URLChecker():

    def consider_logging_that_atp_link_redirects(self, tested_key, atp_value, atp):
        if atp.atp_tags["@spider"] not in [
-            'agata_meble_pl', # https://github.com/alltheplaces/alltheplaces/issues/9409
-            'bevmo_us', # https://github.com/alltheplaces/alltheplaces/issues/9493
+            'agata_meble_pl',  # https://github.com/alltheplaces/alltheplaces/issues/9409
+            'bevmo_us',  # https://github.com/alltheplaces/alltheplaces/issues/9493
        ]:
            pass
            #do not log problems as long as above issues are not fixed
@ -290,7 +291,7 @@ class URLChecker():
            'sobeys.ca',
            'zambrero.com',
            'zambrero.com.au'
-            ]:
+        ]:
            # handles also broken such as
            # website = ps://www.biedronka.pl
            for protocol in ["", "http://", "https://", "ps://"]:
@ -302,7 +303,7 @@ class URLChecker():
            'https://www.circlek.pl/wyszukaj-stacje',
            'http://www.statoil.pl',
            'Biedronka.PL',
-            'https://www.aldi-sued.de/de/homepage.html', # seems to be added by some ATP?
+            'https://www.aldi-sued.de/de/homepage.html',  # seems to be added by some ATP?
            'https://allegro.pl/kampania/one/znajdz-nas',
            'https://allegro.pl/kampania/one',
            'https://www.castorama.pl',
@ -386,11 +387,10 @@ class URLChecker():
        if self.get_free_space_in_mb(self.cache_path()) < 400:
            raise Exception("running out of free space on drive")

-
        print(link, reason)
        try:
            headers = {
-               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
            }
            # NOTE: SSL verification checks are disabled
            # to keep https://aviastacjapaliw.pl/stacje/avia-protasy/ working
--- a/url_checker_test.py
+++ b/url_checker_test.py
@ -1,6 +1,7 @@
 import unittest
 import url_checker

+
 class LinkCheckingTests(unittest.TestCase):
    def test_link_rejector_rejecting_known_bad(self):
        test = url_checker.URLChecker()