diff --git a/21_list_import_status.py b/21_list_import_status.py index 2d676a5..ab6f702 100644 --- a/21_list_import_status.py +++ b/21_list_import_status.py @@ -16,6 +16,17 @@ obtain_atp_data = __import__("2_obtain_atp_data") config = __import__("0_config") import show_data import wikidata +import nominatim +import url_checker + +# TODO: entries failing on Nominatim here will be not listed by show_data.py +# maybe show_data.py should try checking all cases for Nominatim mismatches though with low priority? +# or share this data otherwise? +# or consider show_data.py as deprecated and on life support? + + +# TODO: maybe it is actualy OK? Or should I pass it in an elaborate argument chain? +url_checker_instance = url_checker.URLChecker() def skipped_osm_cases(): return [ @@ -175,6 +186,9 @@ def process_single_dataset(checked_data_sources, atp_code, area): continue data_extractor_function = extracted['extract_function'] import_judgment = data_extractor_function(entry) + # a bit weird location, but it is checked only here as it requires external call + # so it is preferable to do it sparingly + import_judgment = adjust_judgment_for_address_location_mismatch_checked_by_nominatim(entry, import_judgment) extracted['data'].append(entry_to_presentation_object(extracted['key'], entry, import_judgment)) for extracted in checked_data_sources: @@ -193,6 +207,21 @@ def process_single_dataset(checked_data_sources, atp_code, area): outfile.write(output) print(f"wrote file to {location}") +def adjust_judgment_for_address_location_mismatch_checked_by_nominatim(entry, judgment): + if judgment['status'] in ['it_is_not_matching', 'dubious_match', 'no_import_for_this_key']: + # no import, no need to check Nominatim + return judgment + elif judgment['status'] in ['importable']: + # TODO: allow to run without consulting cache? + # TODO: run by default without checking cache? And reject not cached entries? + if nominatim.is_location_matching_tags(entry.atp_tags, entry.atp_center, cache_only=False): + # passed test + return judgment + else: + return {'status': 'it_is_not_matching', 'mismatching_key_list': 'location_mismatches_address_data'} + else: + raise Exception("Unexpected status " + str(judgment)) + def header_of_presentation_objects(checked_key): return ['ATP link', 'OSM link', 'ATP ' + checked_key, 'OSM ' + checked_key, 'ATP tags', 'OSM tags', 'Mismatch on'] @@ -242,6 +271,9 @@ def thorough_match_mismatch_check(entry): return None if entry.match_distance > config.missing_shop_distance_in_kilometers_for_specific_case(entry.atp_tags): return None + if entry.all_very_good_matches != None and len(entry.all_very_good_matches) > 1: + # it may be possible to recover, see note in produce_map_analysis_for_atp_data + return None if is_still_passing_standard_match_check(entry) == False: return None # TODO add tests @@ -319,6 +351,25 @@ def extract_website_import_info(entry): return {'status': 'it_is_not_matching', 'mismatching_key_list': ['website']} else: return {'status': 'no_import_for_this_key'} + + tested_key = "website" + cache_only = True # definitely needed, to avoid hammering the same POI website many time + returned = url_checker_instance.is_website_eligible(entry, cache_only, tested_key) + if returned == None: + # separate status for cache miss? TODO + return {'status': 'it_is_not_matching', 'mismatching_key_list': ['POI_website_status_not_checked_yet']} + elif returned == False: + atp_value = entry.atp_tags.get(tested_key) + osm_value = entry.osm_match_tags.get(tested_key) + if url_checker_instance.drop_this_link(osm_value): + # not really mismatch, both are garbage like + # pointing to the main brand page + return {'status': 'no_import_for_this_key'} + elif url_checker_instance.is_difference_limited_to_slash_at_end(atp_value, osm_value): + # effectively the same, no real mismatch + return {'status': 'no_import_for_this_key'} + else: + return {'status': 'it_is_not_matching', 'mismatching_key_list': ['website']} return {'status': 'importable'} def show_conflicting_entry(entry): diff --git a/5_generate_graticule_reports.py b/5_generate_graticule_reports.py index 9dafe46..1b82432 100644 --- a/5_generate_graticule_reports.py +++ b/5_generate_graticule_reports.py @@ -355,13 +355,12 @@ def generate_report_for_given_graticule_or_return_cache_if_present(area_name, ar def generate_report_for_given_graticule(area_name, area, lat_anchor, lon_anchor): generated_filepaths = [] - url_checker_instance = url_checker.URLChecker() report_generators = {} for atp_code in obtain_atp_data.all_spider_codes_iterator(): potential_match_results = match_output_for_spider_and_graticule(area, atp_code, lat_anchor, lon_anchor) if os.path.isfile(potential_match_results): matched = serializing.load_list_of_matches_from_csv(potential_match_results) - report_generators[atp_code] = show_data.produce_map_analysis_for_atp_data(atp_code, area_name=area_name, match_list=matched, cache_only=True, url_checker_instance=url_checker_instance) + report_generators[atp_code] = show_data.produce_map_analysis_for_atp_data(atp_code, area_name=area_name, match_list=matched, cache_only=True) for path in show_data.iterate_over_output_files(atp_code, area_name): generated_filepaths.append(path) show_data.generate_website_index_for_named_area(report_generators, area_name) diff --git a/show_data.py b/show_data.py index 4e4d3ea..f330ccc 100644 --- a/show_data.py +++ b/show_data.py @@ -133,191 +133,6 @@ class MismatchingNameReportCreator: return output -class ATPGivesTagsReportCreator: - def __init__(self, url_checker_instance, atp_code, area_name): - self.atp_code = atp_code - self.area_name = area_name - self.shops_with_tags_to_be_added = [] - self.mismatching_website_tags = [] - self.count_of_total_tag_mismatches = {} - self.importable_keys = ['website'] - # 'opening_hours', - niedziele handlowe... - # 'phone' - pending removal (upstream, in ATP) of the same supposed call center phone applied to many points - # lets wait whether it will happen... - for key in self.importable_keys: - self.count_of_total_tag_mismatches[key] = 0 - self.url_checker = url_checker_instance - - def check_case(self, atp_match_entry, cache_only): - """ - note: last_nominatim_match_status is used by code outside this class - this is hacky but no better solution was found - details documented where last_nominatim_match_status is present - """ - self.last_nominatim_match_status = None - tags_to_be_added = self.key_with_values_that_can_be_added_to_osm_from_atp(atp_match_entry, cache_only) - if len(tags_to_be_added) > 0: - self.last_nominatim_match_status = nominatim.is_location_matching_tags(atp_match_entry.atp_tags, atp_match_entry.atp_center, cache_only=cache_only) - if self.last_nominatim_match_status != False: - self.shops_with_tags_to_be_added.append({'tags_to_be_added': tags_to_be_added, 'entry': atp_match_entry}) - return True - - def present_reports(self): - with open(config.output_folder() + "proposed_bot_edit.csv", 'a') as outfile: - writer = csv.writer(outfile) - for key in self.importable_keys: - if self.count_of_total_tag_mismatches[key] == 0: - for entry in self.shops_with_tags_to_be_added: # already Nominatim-filtered - if key in entry['tags_to_be_added']: - value = entry['entry'].atp_tags[key] - atp_code = entry['entry'].atp_tags['@spider'] - writer.writerow([entry['entry'].osm_link, key, value, atp_code]) - else: - print(key, "ineligible for import", self.count_of_total_tag_mismatches[key], "mismatches") - - self.generate_potential_new_tags_listing() - # TODO: more complex - how to export it? if any export is worth here... - # with open(config.output_folder() + "add_tags_" + atp_code + ".geojson", 'w') as f: - # json.dump(serializing.generate_geojson_structure(self.shops_with_tags_to_be_added), f) - self.generate_mismatching_website_listing() - - def add_tags_filename(self): - area_name_part = "" - if self.area_name not in [None, ""]: - area_name_part = "_" + self.area_name - return 'add_tags_' + self.atp_code + area_name_part + ".html" - - def mismatching_website_report_filename(self): - area_name_part = "" - if self.area_name not in [None, ""]: - area_name_part = "_" + self.area_name - return 'website_mismatch_complicated_to_fix_' + self.atp_code + area_name_part + '.html' - - def table_of_contents(self): - return [ - { - 'header': 'Tags to add', - 'section_link': section_link('this tags can be imported', len(self.shops_with_tags_to_be_added), self.add_tags_filename()), - 'is_extra_complicated': False, - 'output_files': [self.add_tags_filename()], - }, - { - 'header': 'Mismatching website tags', - 'section_link': section_link('website tag mismatch', len(self.mismatching_website_tags), self.mismatching_website_report_filename()), - 'is_extra_complicated': True, - 'output_files': [self.mismatching_website_report_filename()], - }, - ] - - def key_with_values_that_can_be_added_to_osm_from_atp(self, atp, cache_only): - tags_to_be_added = [] - for tested_key in self.importable_keys: - if tested_key not in atp.atp_tags: - continue - if tested_key == "phone": - if self.is_phone_eligible(atp): - tags_to_be_added.append(tested_key) - if tested_key == "website": - if self.is_website_eligible(atp, cache_only): - tags_to_be_added.append(tested_key) - else: - raise NotImplementedError - return tags_to_be_added - - def is_website_eligible(self, atp, cache_only): - tested_key = "website" - if tested_key not in atp.atp_tags: - return False - returned = self.url_checker.is_website_eligible(atp, cache_only) - if returned == None: - return False # not cached, instructed to use only cache - if returned: - if atp.atp_tags.get(tested_key) != atp.osm_match_tags.get(tested_key): - return True - else: - atp_value = atp.atp_tags.get(tested_key) - osm_value = atp.osm_match_tags.get(tested_key) - if self.url_checker.drop_this_link(osm_value): - # not really mismatch, both are garbage like - # pointing to the main brand page - pass - elif self.url_checker.is_difference_limited_to_slash_at_end(atp_value, osm_value): - pass # effectively the same anyway, no real mismatch - else: - self.mismatching_website_tags.append(atp) - self.report_mismatch(atp, tested_key) - - def is_phone_eligible(self, atp): - tested_key = "phone" - canonical_atp_value = atp.atp_tags[tested_key].replace(" ", "") - canonical_osm_value = atp.osm_match_tags.get(tested_key) - if canonical_osm_value == None: - canonical_osm_value = atp.osm_match_tags.get("contact:" + tested_key) - if canonical_osm_value != None: - canonical_osm_value = canonical_osm_value.replace(" ", "") - if canonical_osm_value not in (None, canonical_atp_value): - self.report_mismatch(atp, tested_key) - return False - return canonical_osm_value == None - - def report_mismatch(self, atp, tested_key): - print("atp\nosm - tag mismatch between them for", tested_key, "in", atp.osm_link) - if 'website' in atp.atp_tags and tested_key != "website": - print("ATP link:", atp.atp_tags['website']) - print(atp.atp_tags[tested_key]) - if tested_key not in atp.osm_match_tags: - print(tested_key, "not in OSM tags, apparently ATP value was rejected outright for", self.atp_code) - else: - print(atp.osm_match_tags[tested_key]) - print() - self.count_of_total_tag_mismatches[tested_key] += 1 - - def generate_potential_new_tags_listing(self): - center = get_center([x['entry'] for x in self.shops_with_tags_to_be_added]) - atp_spider_link_target = config.get_github_link_to_spider(self.atp_code) - linked_text = self.atp_code.replace("_", "_​") + " spider" - link = '<a href="' + atp_spider_link_target + '">' + linked_text + "</a>" - with open(config.output_folder() + self.add_tags_filename(), 'w') as outfile: - sidebar = prose.sidebar_content("This page lists where " + link + " data has shops that seem to have useful info not present in OSM data. Markers shown here are positioned according to ATP data, lines lead to location of matched OSM shop. " + str(len(self.shops_with_tags_to_be_added)) + " entries are shown.", self.atp_code) - outfile.write(leafleter.generator.get_html_page_prefix("ATP <-> OSM " + self.atp_code + " - new tags?", center['lat'], center['lon'], zlevel_centered=7, width_percent=90, sidebar_content=sidebar)) - for entry in self.shops_with_tags_to_be_added: - message = '<a href=\\"' + entry['entry'].osm_link + '\\">osm</a><br>' - if 'website' in entry['entry'].osm_match_tags: - message += '<a href=\\"' + escape_url(entry['entry'].osm_match_tags['website']) + '\\">object website from OSM</a><br>' - if 'website' in entry['entry'].atp_tags: - message += '<a href=\\"' + escape_url(entry['entry'].atp_tags['website']) + '\\">object website from ATP</a><br>' - new_tags = {} - for key in entry['tags_to_be_added']: - new_tags[key] = entry['entry'].atp_tags[key] - message += tag_list_to_html(new_tags) - outfile.write(leafleter.generator.get_marker(message, entry['entry'].atp_center['lat'], entry['entry'].atp_center['lon'], color='green')) - outfile.write(leafleter.generator.get_line(entry['entry'].atp_center['lat'], entry['entry'].atp_center['lon'], entry['entry'].osm_match_center['lat'], entry['entry'].osm_match_center['lon'], color='green')) - outfile.write(leafleter.generator.get_html_page_suffix()) - - def generate_mismatching_website_listing(self): - atp_spider_link_target = config.get_github_link_to_spider(self.atp_code) - linked_text = self.atp_code.replace("_", "_​") + " spider" - link = '<a href="' + atp_spider_link_target + '">' + linked_text + "</a>" - center = get_center(self.mismatching_website_tags) - with open(config.output_folder() + self.mismatching_website_report_filename(), 'w') as outfile: - sidebar = prose.sidebar_content("This page lists where " + link + " data has shops that were matched but <code>website</code> website tags are not matching.<br><br>WARNING WARNING this may be tricky to fix!<br>ATP version is not always better, some shops are matched incorrectly! In some cases OSM has correct or better data!<br>Markers shown here are positioned according to ATP data.<br>" + str(len(self.mismatching_website_tags)) + " entries are shown.", self.atp_code) - outfile.write(leafleter.generator.get_html_page_prefix("ATP <-> OSM " + self.atp_code + " - mismatching website tags", center['lat'], center['lon'], zlevel_centered=7, width_percent=90, sidebar_content=sidebar)) - for bad in self.mismatching_website_tags: - osm_location_link = shared.link_to_point_in_osm(bad.atp_center['lat'], bad.atp_center['lon']) - summary = 'here ATP shows object being present, which has match but their website tags conflict (<a href="' + osm_location_link + '" target="_blank">OSM location</a>)' - summary += "<br><hr><br>" - summary += 'website from ATP:' + htmlify_value("website", str(bad.atp_tags.get('website'))) - - summary += "<br>" - summary += 'website from OSM:' + htmlify_value("website", str(bad.osm_match_tags.get('website'))) - summary += "<br><hr><br>" - summary += 'tag list as suggested by ATP (should not be assumed to be directly usable in OSM):<br><br>' - summary += tag_list_to_html(bad.atp_tags) - outfile.write(leafleter.generator.get_marker(summary, bad.atp_center['lat'], bad.atp_center['lon'], color='red')) - outfile.write(leafleter.generator.get_line(bad.atp_center['lat'], bad.atp_center['lon'], bad.osm_match_center['lat'], bad.osm_match_center['lon'], color='red')) - outfile.write(leafleter.generator.get_html_page_suffix()) - - class MissingObjectsReportCreator: def __init__(self, atp_code, area_name, specifier=''): self.area_name = area_name @@ -559,11 +374,10 @@ add to test_display_website # TODO: save files one level higher, here just produce analysis -def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_only, url_checker_instance): +def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_only): missing_objects_report = MissingObjectsReportCreator(atp_code, area_name) missing_or_not_objects_report_gray_area = MissingObjectsReportCreator(atp_code, area_name, specifier=" - maybe") mismatching_name_report = MismatchingNameReportCreator(atp_code, area_name) - add_tags_from_atp_report = ATPGivesTagsReportCreator(url_checker_instance, atp_code, area_name) conflict_between_atp_and_nominatim_report = NominatimMismatchReportCreator(atp_code, area_name) match_report = MatchMeteringReportCreator(len(match_list), atp_code, area_name) if config.graph_distance_distribution_when_generating_reports(): @@ -573,32 +387,14 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl if config.graph_distance_distribution_when_generating_reports(): distance_distribution_report.check_case(atp) if atp.all_very_good_matches != None and len(atp.all_very_good_matches) > 1: - # on all_very_good_matches > 1 it should be still possible to recover + # TODO on all_very_good_matches > 1 it should be still possible to recover # but it will require extra logic # see say # https://www.openstreetmap.org/way/280274214 https://www.openstreetmap.org/node/4635678378 continue if atp.match_distance != None and atp.match_distance < config.good_match_distance_in_kilometers(): - # this if-else structure is build here to improve matching efficiency and not - # a lot can be done to eliminate hand-crafted structure here - # and report count is not high enough to justify spending time - # on some declarative syntax if mismatching_name_report.check_case(atp): continue - add_tags_from_atp_report.check_case(atp, cache_only=cache_only) - - # quite hacky, but I have no better ideas than take this internal thing - # that should be an implementation detail - # (1) High failure rate of Nominatin and limited ability to make queries - # means that it cannot be run for all cases - # (2) So it needs to be restricted only where tags would be moved from ATP to OSM - # (3) Code for Nominatim mismatch report and adding ATP tags is supposed to be separated - # So code for reports need to be separated and atthe same time I need info how - # matching went... - status = add_tags_from_atp_report.last_nominatim_match_status - conflict_between_atp_and_nominatim_report.register_case_using_known_nominatim_status(atp, status, cache_only=cache_only) - if status != False: - match_report.check_case(atp) elif atp.match_distance == None or atp.match_distance > config.missing_shop_distance_in_kilometers_for_specific_case(atp.atp_tags): nominatim_match = nominatim.is_location_matching_tags(atp.atp_tags, atp.atp_center, cache_only=cache_only) if nominatim_match != False: # both matches, failed geolocation and geolocation not done at all go here @@ -615,7 +411,6 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl if shared.get_free_space_in_mb(config.output_folder()) < 400: raise Exception("running out of free space on drive") mismatching_name_report.present_reports() - add_tags_from_atp_report.present_reports() missing_objects_report.present_reports() missing_or_not_objects_report_gray_area.present_reports() conflict_between_atp_and_nominatim_report.present_reports() @@ -628,7 +423,6 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl missing_objects_report, missing_or_not_objects_report_gray_area, mismatching_name_report, - add_tags_from_atp_report, conflict_between_atp_and_nominatim_report, match_report, ], @@ -712,8 +506,6 @@ def headers(): MissingObjectsReportCreator('dummy', 'dummy area name').table_of_contents()[0]['header'], MissingObjectsReportCreator('dummy', 'dummy area name', specifier=" - maybe").table_of_contents()[0]['header'], MismatchingNameReportCreator('dummy', 'dummy area name').table_of_contents()[0]['header'], - ATPGivesTagsReportCreator(None, 'dummy', 'dummy area name').table_of_contents()[0]['header'], - ATPGivesTagsReportCreator(None, 'dummy', 'dummy area name').table_of_contents()[1]['header'], NominatimMismatchReportCreator('dummy', 'dummy area name').table_of_contents()[0]['header'], MatchMeteringReportCreator(-1, 'dummy', 'dummy area name').table_of_contents()[0]['header'], ] @@ -791,7 +583,6 @@ def iterate_over_output_files(atp_code, area_name): MissingObjectsReportCreator(atp_code, area_name), MissingObjectsReportCreator(atp_code, area_name, specifier=" - maybe"), MismatchingNameReportCreator(atp_code, area_name), - ATPGivesTagsReportCreator(None, atp_code, area_name), NominatimMismatchReportCreator(atp_code, area_name), MatchMeteringReportCreator(-1, atp_code, area_name), ] diff --git a/test_display_website.py b/test_display_website.py index ab0bf0c..35c1fcf 100644 --- a/test_display_website.py +++ b/test_display_website.py @@ -7,19 +7,6 @@ import show_data class IsCodeCompletelyCrashingSmoketests(unittest.TestCase): - def test_rough_code_validity(self): - add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy atp code', 'dummy area name') - center = {'lat': 50, 'lon': 20} - atp_center = center - atp_tags = {'brand': 'Foobar', 'website': 'https://example.com/foobar/12', '@spider': 'dummy_code'} - osm_match_center = center - osm_match_tags = {'brand': 'Foobar'} - osm_link = 'https://www.openstreetmap.org/node/1' - match_distance = 0 - all_very_good_matches = [] - match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches) - add_tags_from_atp.check_case(match, cache_only=False) - def test_creation_of_missing_object_report_creator_table_contents_smoke_test(self): report = show_data.MissingObjectsReportCreator('dummy_atp_code', 'dummy_area_name') report.table_of_contents() @@ -34,10 +21,6 @@ class IsCodeCompletelyCrashingSmoketests(unittest.TestCase): report = show_data.MismatchingNameReportCreator('dummy_atp_code', 'dummy_area_name') report.table_of_contents() - def test_creation_of_tag_giver_report_creator(self): - report = show_data.ATPGivesTagsReportCreator(None, 'dummy_atp_code', 'dummy_area_name') - report.table_of_contents() - def test_creation_of_nominatim_report_creator(self): report = show_data.NominatimMismatchReportCreator('dummy_atp_code', 'dummy_area_name') report.html_report_content() @@ -61,61 +44,3 @@ class TagListFormattingTests(unittest.TestCase): def test_tag_list_generation_newline_in_tags_escape(self): self.assertEqual("aaaa<br>bbb" in show_data.tag_list_to_html({"description": "aaaa\nbbb"}), True) - - -class PhoneSuggestingTests(unittest.TestCase): - def test_accept_normal_phone(self): - add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name') - center = {'lat': 50, 'lon': 20} - atp_center = center - atp_tags = {'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'} - osm_match_center = center - osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse'} - osm_link = 'https://www.openstreetmap.org/node/1' - match_distance = 0 - all_very_good_matches = [] - match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches) - self.assertEqual(add_tags_from_atp.is_phone_eligible(match), True) - - def test_handle_contact_prefix(self): - add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name') - center = {'lat': 50, 'lon': 20} - atp_center = center - atp_tags = {'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'} - osm_match_center = center - osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse', 'contact:phone': '+48 12 286-32-83'} - osm_link = 'https://www.openstreetmap.org/node/1' - match_distance = 0 - all_very_good_matches = [] - match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches) - self.assertEqual(add_tags_from_atp.is_phone_eligible(match), False) - - -class WebsiteSuggestingTests(unittest.TestCase): - def test_accept_normal_website(self): - add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name') - center = {'lat': 50, 'lon': 20} - atp_center = center - atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'} - osm_match_center = center - osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse'} - osm_link = 'https://www.openstreetmap.org/node/1' - match_distance = 0 - all_very_good_matches = [] - match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches) - add_tags_from_atp.check_case(match, cache_only=False) - self.assertEqual(len(add_tags_from_atp.shops_with_tags_to_be_added), 1) - - def test_reject_already_present_website(self): - add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name') - center = {'lat': 50, 'lon': 20} - atp_center = center - atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'} - osm_match_center = center - osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse', 'website': 'https://krakow-sr.sr.gov.pl/'} - osm_link = 'https://www.openstreetmap.org/node/1' - match_distance = 0 - all_very_good_matches = [] - match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches) - add_tags_from_atp.check_case(match, cache_only=False) - self.assertEqual(len(add_tags_from_atp.shops_with_tags_to_be_added), 0) diff --git a/test_import_list_maker.py b/test_import_list_maker.py new file mode 100644 index 0000000..539495f --- /dev/null +++ b/test_import_list_maker.py @@ -0,0 +1,29 @@ +import_script = __import__("21_list_import_status") + +import unittest +import serializing + +class WebsiteSuggestingTests(unittest.TestCase): + def test_accept_normal_website(self): + center = {'lat': 50, 'lon': 20} + atp_center = center + atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'} + osm_match_center = center + osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse'} + osm_link = 'https://www.openstreetmap.org/node/1' + match_distance = 0 + all_very_good_matches = [] + match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches) + self.assertEqual(import_script.extract_website_import_info(match), {'status': 'importable'}) + + def test_reject_already_present_website(self): + center = {'lat': 50, 'lon': 20} + atp_center = center + atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'} + osm_match_center = center + osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse', 'website': 'https://krakow-sr.sr.gov.pl/'} + osm_link = 'https://www.openstreetmap.org/node/1' + match_distance = 0 + all_very_good_matches = [] + match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches) + self.assertEqual(import_script.extract_website_import_info(match), {'status': 'no_import_for_this_key'}) diff --git a/url_checker.py b/url_checker.py index 5110d58..5719045 100644 --- a/url_checker.py +++ b/url_checker.py @@ -55,13 +55,14 @@ class URLChecker(): print("Maybe it can be safely assumed that following value can be thrown away?") return False - def is_website_eligible(self, atp, cache_only): + def is_website_eligible(self, atp, cache_only, tested_key): """ - return True if existing osm website tag should be replace by ATP claimed tag + return True if existing tag should be replace by ATP claimed tag return False if it should not be return None if it could not be established """ - tested_key = "website" + if tested_key != "website": + raise Exception("this was not considered, at least for now") atp_value = atp.atp_tags[tested_key] osm_value = atp.osm_match_tags.get(tested_key) if atp_value == osm_value: