1
0
Fork 0

move import listers into own file, remove from graticule processing

This commit is contained in:
Mateusz Konieczny 2025-01-30 18:39:38 +01:00
parent e323aee1ce
commit 8c7d9ea326
6 changed files with 87 additions and 291 deletions

View file

@ -16,6 +16,17 @@ obtain_atp_data = __import__("2_obtain_atp_data")
config = __import__("0_config")
import show_data
import wikidata
import nominatim
import url_checker
# TODO: entries failing on Nominatim here will be not listed by show_data.py
# maybe show_data.py should try checking all cases for Nominatim mismatches though with low priority?
# or share this data otherwise?
# or consider show_data.py as deprecated and on life support?
# TODO: maybe it is actualy OK? Or should I pass it in an elaborate argument chain?
url_checker_instance = url_checker.URLChecker()
def skipped_osm_cases():
return [
@ -175,6 +186,9 @@ def process_single_dataset(checked_data_sources, atp_code, area):
continue
data_extractor_function = extracted['extract_function']
import_judgment = data_extractor_function(entry)
# a bit weird location, but it is checked only here as it requires external call
# so it is preferable to do it sparingly
import_judgment = adjust_judgment_for_address_location_mismatch_checked_by_nominatim(entry, import_judgment)
extracted['data'].append(entry_to_presentation_object(extracted['key'], entry, import_judgment))
for extracted in checked_data_sources:
@ -193,6 +207,21 @@ def process_single_dataset(checked_data_sources, atp_code, area):
outfile.write(output)
print(f"wrote file to {location}")
def adjust_judgment_for_address_location_mismatch_checked_by_nominatim(entry, judgment):
if judgment['status'] in ['it_is_not_matching', 'dubious_match', 'no_import_for_this_key']:
# no import, no need to check Nominatim
return judgment
elif judgment['status'] in ['importable']:
# TODO: allow to run without consulting cache?
# TODO: run by default without checking cache? And reject not cached entries?
if nominatim.is_location_matching_tags(entry.atp_tags, entry.atp_center, cache_only=False):
# passed test
return judgment
else:
return {'status': 'it_is_not_matching', 'mismatching_key_list': 'location_mismatches_address_data'}
else:
raise Exception("Unexpected status " + str(judgment))
def header_of_presentation_objects(checked_key):
return ['ATP link', 'OSM link', 'ATP ' + checked_key, 'OSM ' + checked_key, 'ATP tags', 'OSM tags', 'Mismatch on']
@ -242,6 +271,9 @@ def thorough_match_mismatch_check(entry):
return None
if entry.match_distance > config.missing_shop_distance_in_kilometers_for_specific_case(entry.atp_tags):
return None
if entry.all_very_good_matches != None and len(entry.all_very_good_matches) > 1:
# it may be possible to recover, see note in produce_map_analysis_for_atp_data
return None
if is_still_passing_standard_match_check(entry) == False:
return None
# TODO add tests
@ -319,6 +351,25 @@ def extract_website_import_info(entry):
return {'status': 'it_is_not_matching', 'mismatching_key_list': ['website']}
else:
return {'status': 'no_import_for_this_key'}
tested_key = "website"
cache_only = True # definitely needed, to avoid hammering the same POI website many time
returned = url_checker_instance.is_website_eligible(entry, cache_only, tested_key)
if returned == None:
# separate status for cache miss? TODO
return {'status': 'it_is_not_matching', 'mismatching_key_list': ['POI_website_status_not_checked_yet']}
elif returned == False:
atp_value = entry.atp_tags.get(tested_key)
osm_value = entry.osm_match_tags.get(tested_key)
if url_checker_instance.drop_this_link(osm_value):
# not really mismatch, both are garbage like
# pointing to the main brand page
return {'status': 'no_import_for_this_key'}
elif url_checker_instance.is_difference_limited_to_slash_at_end(atp_value, osm_value):
# effectively the same, no real mismatch
return {'status': 'no_import_for_this_key'}
else:
return {'status': 'it_is_not_matching', 'mismatching_key_list': ['website']}
return {'status': 'importable'}
def show_conflicting_entry(entry):

View file

@ -355,13 +355,12 @@ def generate_report_for_given_graticule_or_return_cache_if_present(area_name, ar
def generate_report_for_given_graticule(area_name, area, lat_anchor, lon_anchor):
generated_filepaths = []
url_checker_instance = url_checker.URLChecker()
report_generators = {}
for atp_code in obtain_atp_data.all_spider_codes_iterator():
potential_match_results = match_output_for_spider_and_graticule(area, atp_code, lat_anchor, lon_anchor)
if os.path.isfile(potential_match_results):
matched = serializing.load_list_of_matches_from_csv(potential_match_results)
report_generators[atp_code] = show_data.produce_map_analysis_for_atp_data(atp_code, area_name=area_name, match_list=matched, cache_only=True, url_checker_instance=url_checker_instance)
report_generators[atp_code] = show_data.produce_map_analysis_for_atp_data(atp_code, area_name=area_name, match_list=matched, cache_only=True)
for path in show_data.iterate_over_output_files(atp_code, area_name):
generated_filepaths.append(path)
show_data.generate_website_index_for_named_area(report_generators, area_name)

View file

@ -133,191 +133,6 @@ class MismatchingNameReportCreator:
return output
class ATPGivesTagsReportCreator:
def __init__(self, url_checker_instance, atp_code, area_name):
self.atp_code = atp_code
self.area_name = area_name
self.shops_with_tags_to_be_added = []
self.mismatching_website_tags = []
self.count_of_total_tag_mismatches = {}
self.importable_keys = ['website']
# 'opening_hours', - niedziele handlowe...
# 'phone' - pending removal (upstream, in ATP) of the same supposed call center phone applied to many points
# lets wait whether it will happen...
for key in self.importable_keys:
self.count_of_total_tag_mismatches[key] = 0
self.url_checker = url_checker_instance
def check_case(self, atp_match_entry, cache_only):
"""
note: last_nominatim_match_status is used by code outside this class
this is hacky but no better solution was found
details documented where last_nominatim_match_status is present
"""
self.last_nominatim_match_status = None
tags_to_be_added = self.key_with_values_that_can_be_added_to_osm_from_atp(atp_match_entry, cache_only)
if len(tags_to_be_added) > 0:
self.last_nominatim_match_status = nominatim.is_location_matching_tags(atp_match_entry.atp_tags, atp_match_entry.atp_center, cache_only=cache_only)
if self.last_nominatim_match_status != False:
self.shops_with_tags_to_be_added.append({'tags_to_be_added': tags_to_be_added, 'entry': atp_match_entry})
return True
def present_reports(self):
with open(config.output_folder() + "proposed_bot_edit.csv", 'a') as outfile:
writer = csv.writer(outfile)
for key in self.importable_keys:
if self.count_of_total_tag_mismatches[key] == 0:
for entry in self.shops_with_tags_to_be_added: # already Nominatim-filtered
if key in entry['tags_to_be_added']:
value = entry['entry'].atp_tags[key]
atp_code = entry['entry'].atp_tags['@spider']
writer.writerow([entry['entry'].osm_link, key, value, atp_code])
else:
print(key, "ineligible for import", self.count_of_total_tag_mismatches[key], "mismatches")
self.generate_potential_new_tags_listing()
# TODO: more complex - how to export it? if any export is worth here...
# with open(config.output_folder() + "add_tags_" + atp_code + ".geojson", 'w') as f:
# json.dump(serializing.generate_geojson_structure(self.shops_with_tags_to_be_added), f)
self.generate_mismatching_website_listing()
def add_tags_filename(self):
area_name_part = ""
if self.area_name not in [None, ""]:
area_name_part = "_" + self.area_name
return 'add_tags_' + self.atp_code + area_name_part + ".html"
def mismatching_website_report_filename(self):
area_name_part = ""
if self.area_name not in [None, ""]:
area_name_part = "_" + self.area_name
return 'website_mismatch_complicated_to_fix_' + self.atp_code + area_name_part + '.html'
def table_of_contents(self):
return [
{
'header': 'Tags to add',
'section_link': section_link('this tags can be imported', len(self.shops_with_tags_to_be_added), self.add_tags_filename()),
'is_extra_complicated': False,
'output_files': [self.add_tags_filename()],
},
{
'header': 'Mismatching website tags',
'section_link': section_link('website tag mismatch', len(self.mismatching_website_tags), self.mismatching_website_report_filename()),
'is_extra_complicated': True,
'output_files': [self.mismatching_website_report_filename()],
},
]
def key_with_values_that_can_be_added_to_osm_from_atp(self, atp, cache_only):
tags_to_be_added = []
for tested_key in self.importable_keys:
if tested_key not in atp.atp_tags:
continue
if tested_key == "phone":
if self.is_phone_eligible(atp):
tags_to_be_added.append(tested_key)
if tested_key == "website":
if self.is_website_eligible(atp, cache_only):
tags_to_be_added.append(tested_key)
else:
raise NotImplementedError
return tags_to_be_added
def is_website_eligible(self, atp, cache_only):
tested_key = "website"
if tested_key not in atp.atp_tags:
return False
returned = self.url_checker.is_website_eligible(atp, cache_only)
if returned == None:
return False # not cached, instructed to use only cache
if returned:
if atp.atp_tags.get(tested_key) != atp.osm_match_tags.get(tested_key):
return True
else:
atp_value = atp.atp_tags.get(tested_key)
osm_value = atp.osm_match_tags.get(tested_key)
if self.url_checker.drop_this_link(osm_value):
# not really mismatch, both are garbage like
# pointing to the main brand page
pass
elif self.url_checker.is_difference_limited_to_slash_at_end(atp_value, osm_value):
pass # effectively the same anyway, no real mismatch
else:
self.mismatching_website_tags.append(atp)
self.report_mismatch(atp, tested_key)
def is_phone_eligible(self, atp):
tested_key = "phone"
canonical_atp_value = atp.atp_tags[tested_key].replace(" ", "")
canonical_osm_value = atp.osm_match_tags.get(tested_key)
if canonical_osm_value == None:
canonical_osm_value = atp.osm_match_tags.get("contact:" + tested_key)
if canonical_osm_value != None:
canonical_osm_value = canonical_osm_value.replace(" ", "")
if canonical_osm_value not in (None, canonical_atp_value):
self.report_mismatch(atp, tested_key)
return False
return canonical_osm_value == None
def report_mismatch(self, atp, tested_key):
print("atp\nosm - tag mismatch between them for", tested_key, "in", atp.osm_link)
if 'website' in atp.atp_tags and tested_key != "website":
print("ATP link:", atp.atp_tags['website'])
print(atp.atp_tags[tested_key])
if tested_key not in atp.osm_match_tags:
print(tested_key, "not in OSM tags, apparently ATP value was rejected outright for", self.atp_code)
else:
print(atp.osm_match_tags[tested_key])
print()
self.count_of_total_tag_mismatches[tested_key] += 1
def generate_potential_new_tags_listing(self):
center = get_center([x['entry'] for x in self.shops_with_tags_to_be_added])
atp_spider_link_target = config.get_github_link_to_spider(self.atp_code)
linked_text = self.atp_code.replace("_", "_​") + " spider"
link = '<a href="' + atp_spider_link_target + '">' + linked_text + "</a>"
with open(config.output_folder() + self.add_tags_filename(), 'w') as outfile:
sidebar = prose.sidebar_content("This page lists where " + link + " data has shops that seem to have useful info not present in OSM data. Markers shown here are positioned according to ATP data, lines lead to location of matched OSM shop. " + str(len(self.shops_with_tags_to_be_added)) + " entries are shown.", self.atp_code)
outfile.write(leafleter.generator.get_html_page_prefix("ATP <-> OSM " + self.atp_code + " - new tags?", center['lat'], center['lon'], zlevel_centered=7, width_percent=90, sidebar_content=sidebar))
for entry in self.shops_with_tags_to_be_added:
message = '<a href=\\"' + entry['entry'].osm_link + '\\">osm</a><br>'
if 'website' in entry['entry'].osm_match_tags:
message += '<a href=\\"' + escape_url(entry['entry'].osm_match_tags['website']) + '\\">object website from OSM</a><br>'
if 'website' in entry['entry'].atp_tags:
message += '<a href=\\"' + escape_url(entry['entry'].atp_tags['website']) + '\\">object website from ATP</a><br>'
new_tags = {}
for key in entry['tags_to_be_added']:
new_tags[key] = entry['entry'].atp_tags[key]
message += tag_list_to_html(new_tags)
outfile.write(leafleter.generator.get_marker(message, entry['entry'].atp_center['lat'], entry['entry'].atp_center['lon'], color='green'))
outfile.write(leafleter.generator.get_line(entry['entry'].atp_center['lat'], entry['entry'].atp_center['lon'], entry['entry'].osm_match_center['lat'], entry['entry'].osm_match_center['lon'], color='green'))
outfile.write(leafleter.generator.get_html_page_suffix())
def generate_mismatching_website_listing(self):
atp_spider_link_target = config.get_github_link_to_spider(self.atp_code)
linked_text = self.atp_code.replace("_", "_&ZeroWidthSpace;") + " spider"
link = '<a href="' + atp_spider_link_target + '">' + linked_text + "</a>"
center = get_center(self.mismatching_website_tags)
with open(config.output_folder() + self.mismatching_website_report_filename(), 'w') as outfile:
sidebar = prose.sidebar_content("This page lists where " + link + " data has shops that were matched but <code>website</code> website tags are not matching.<br><br>WARNING WARNING this may be tricky to fix!<br>ATP version is not always better, some shops are matched incorrectly! In some cases OSM has correct or better data!<br>Markers shown here are positioned according to ATP data.<br>" + str(len(self.mismatching_website_tags)) + " entries are shown.", self.atp_code)
outfile.write(leafleter.generator.get_html_page_prefix("ATP <-> OSM " + self.atp_code + " - mismatching website tags", center['lat'], center['lon'], zlevel_centered=7, width_percent=90, sidebar_content=sidebar))
for bad in self.mismatching_website_tags:
osm_location_link = shared.link_to_point_in_osm(bad.atp_center['lat'], bad.atp_center['lon'])
summary = 'here ATP shows object being present, which has match but their website tags conflict (<a href="' + osm_location_link + '" target="_blank">OSM location</a>)'
summary += "<br><hr><br>"
summary += 'website from ATP:' + htmlify_value("website", str(bad.atp_tags.get('website')))
summary += "<br>"
summary += 'website from OSM:' + htmlify_value("website", str(bad.osm_match_tags.get('website')))
summary += "<br><hr><br>"
summary += 'tag list as suggested by ATP (should not be assumed to be directly usable in OSM):<br><br>'
summary += tag_list_to_html(bad.atp_tags)
outfile.write(leafleter.generator.get_marker(summary, bad.atp_center['lat'], bad.atp_center['lon'], color='red'))
outfile.write(leafleter.generator.get_line(bad.atp_center['lat'], bad.atp_center['lon'], bad.osm_match_center['lat'], bad.osm_match_center['lon'], color='red'))
outfile.write(leafleter.generator.get_html_page_suffix())
class MissingObjectsReportCreator:
def __init__(self, atp_code, area_name, specifier=''):
self.area_name = area_name
@ -559,11 +374,10 @@ add to test_display_website
# TODO: save files one level higher, here just produce analysis
def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_only, url_checker_instance):
def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_only):
missing_objects_report = MissingObjectsReportCreator(atp_code, area_name)
missing_or_not_objects_report_gray_area = MissingObjectsReportCreator(atp_code, area_name, specifier=" - maybe")
mismatching_name_report = MismatchingNameReportCreator(atp_code, area_name)
add_tags_from_atp_report = ATPGivesTagsReportCreator(url_checker_instance, atp_code, area_name)
conflict_between_atp_and_nominatim_report = NominatimMismatchReportCreator(atp_code, area_name)
match_report = MatchMeteringReportCreator(len(match_list), atp_code, area_name)
if config.graph_distance_distribution_when_generating_reports():
@ -573,32 +387,14 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl
if config.graph_distance_distribution_when_generating_reports():
distance_distribution_report.check_case(atp)
if atp.all_very_good_matches != None and len(atp.all_very_good_matches) > 1:
# on all_very_good_matches > 1 it should be still possible to recover
# TODO on all_very_good_matches > 1 it should be still possible to recover
# but it will require extra logic
# see say
# https://www.openstreetmap.org/way/280274214 https://www.openstreetmap.org/node/4635678378
continue
if atp.match_distance != None and atp.match_distance < config.good_match_distance_in_kilometers():
# this if-else structure is build here to improve matching efficiency and not
# a lot can be done to eliminate hand-crafted structure here
# and report count is not high enough to justify spending time
# on some declarative syntax
if mismatching_name_report.check_case(atp):
continue
add_tags_from_atp_report.check_case(atp, cache_only=cache_only)
# quite hacky, but I have no better ideas than take this internal thing
# that should be an implementation detail
# (1) High failure rate of Nominatin and limited ability to make queries
# means that it cannot be run for all cases
# (2) So it needs to be restricted only where tags would be moved from ATP to OSM
# (3) Code for Nominatim mismatch report and adding ATP tags is supposed to be separated
# So code for reports need to be separated and atthe same time I need info how
# matching went...
status = add_tags_from_atp_report.last_nominatim_match_status
conflict_between_atp_and_nominatim_report.register_case_using_known_nominatim_status(atp, status, cache_only=cache_only)
if status != False:
match_report.check_case(atp)
elif atp.match_distance == None or atp.match_distance > config.missing_shop_distance_in_kilometers_for_specific_case(atp.atp_tags):
nominatim_match = nominatim.is_location_matching_tags(atp.atp_tags, atp.atp_center, cache_only=cache_only)
if nominatim_match != False: # both matches, failed geolocation and geolocation not done at all go here
@ -615,7 +411,6 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl
if shared.get_free_space_in_mb(config.output_folder()) < 400:
raise Exception("running out of free space on drive")
mismatching_name_report.present_reports()
add_tags_from_atp_report.present_reports()
missing_objects_report.present_reports()
missing_or_not_objects_report_gray_area.present_reports()
conflict_between_atp_and_nominatim_report.present_reports()
@ -628,7 +423,6 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl
missing_objects_report,
missing_or_not_objects_report_gray_area,
mismatching_name_report,
add_tags_from_atp_report,
conflict_between_atp_and_nominatim_report,
match_report,
],
@ -712,8 +506,6 @@ def headers():
MissingObjectsReportCreator('dummy', 'dummy area name').table_of_contents()[0]['header'],
MissingObjectsReportCreator('dummy', 'dummy area name', specifier=" - maybe").table_of_contents()[0]['header'],
MismatchingNameReportCreator('dummy', 'dummy area name').table_of_contents()[0]['header'],
ATPGivesTagsReportCreator(None, 'dummy', 'dummy area name').table_of_contents()[0]['header'],
ATPGivesTagsReportCreator(None, 'dummy', 'dummy area name').table_of_contents()[1]['header'],
NominatimMismatchReportCreator('dummy', 'dummy area name').table_of_contents()[0]['header'],
MatchMeteringReportCreator(-1, 'dummy', 'dummy area name').table_of_contents()[0]['header'],
]
@ -791,7 +583,6 @@ def iterate_over_output_files(atp_code, area_name):
MissingObjectsReportCreator(atp_code, area_name),
MissingObjectsReportCreator(atp_code, area_name, specifier=" - maybe"),
MismatchingNameReportCreator(atp_code, area_name),
ATPGivesTagsReportCreator(None, atp_code, area_name),
NominatimMismatchReportCreator(atp_code, area_name),
MatchMeteringReportCreator(-1, atp_code, area_name),
]

View file

@ -7,19 +7,6 @@ import show_data
class IsCodeCompletelyCrashingSmoketests(unittest.TestCase):
def test_rough_code_validity(self):
add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy atp code', 'dummy area name')
center = {'lat': 50, 'lon': 20}
atp_center = center
atp_tags = {'brand': 'Foobar', 'website': 'https://example.com/foobar/12', '@spider': 'dummy_code'}
osm_match_center = center
osm_match_tags = {'brand': 'Foobar'}
osm_link = 'https://www.openstreetmap.org/node/1'
match_distance = 0
all_very_good_matches = []
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
add_tags_from_atp.check_case(match, cache_only=False)
def test_creation_of_missing_object_report_creator_table_contents_smoke_test(self):
report = show_data.MissingObjectsReportCreator('dummy_atp_code', 'dummy_area_name')
report.table_of_contents()
@ -34,10 +21,6 @@ class IsCodeCompletelyCrashingSmoketests(unittest.TestCase):
report = show_data.MismatchingNameReportCreator('dummy_atp_code', 'dummy_area_name')
report.table_of_contents()
def test_creation_of_tag_giver_report_creator(self):
report = show_data.ATPGivesTagsReportCreator(None, 'dummy_atp_code', 'dummy_area_name')
report.table_of_contents()
def test_creation_of_nominatim_report_creator(self):
report = show_data.NominatimMismatchReportCreator('dummy_atp_code', 'dummy_area_name')
report.html_report_content()
@ -61,61 +44,3 @@ class TagListFormattingTests(unittest.TestCase):
def test_tag_list_generation_newline_in_tags_escape(self):
self.assertEqual("aaaa<br>bbb" in show_data.tag_list_to_html({"description": "aaaa\nbbb"}), True)
class PhoneSuggestingTests(unittest.TestCase):
def test_accept_normal_phone(self):
add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name')
center = {'lat': 50, 'lon': 20}
atp_center = center
atp_tags = {'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
osm_match_center = center
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse'}
osm_link = 'https://www.openstreetmap.org/node/1'
match_distance = 0
all_very_good_matches = []
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
self.assertEqual(add_tags_from_atp.is_phone_eligible(match), True)
def test_handle_contact_prefix(self):
add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name')
center = {'lat': 50, 'lon': 20}
atp_center = center
atp_tags = {'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
osm_match_center = center
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse', 'contact:phone': '+48 12 286-32-83'}
osm_link = 'https://www.openstreetmap.org/node/1'
match_distance = 0
all_very_good_matches = []
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
self.assertEqual(add_tags_from_atp.is_phone_eligible(match), False)
class WebsiteSuggestingTests(unittest.TestCase):
def test_accept_normal_website(self):
add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name')
center = {'lat': 50, 'lon': 20}
atp_center = center
atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
osm_match_center = center
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse'}
osm_link = 'https://www.openstreetmap.org/node/1'
match_distance = 0
all_very_good_matches = []
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
add_tags_from_atp.check_case(match, cache_only=False)
self.assertEqual(len(add_tags_from_atp.shops_with_tags_to_be_added), 1)
def test_reject_already_present_website(self):
add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name')
center = {'lat': 50, 'lon': 20}
atp_center = center
atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
osm_match_center = center
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse', 'website': 'https://krakow-sr.sr.gov.pl/'}
osm_link = 'https://www.openstreetmap.org/node/1'
match_distance = 0
all_very_good_matches = []
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
add_tags_from_atp.check_case(match, cache_only=False)
self.assertEqual(len(add_tags_from_atp.shops_with_tags_to_be_added), 0)

29
test_import_list_maker.py Normal file
View file

@ -0,0 +1,29 @@
import_script = __import__("21_list_import_status")
import unittest
import serializing
class WebsiteSuggestingTests(unittest.TestCase):
def test_accept_normal_website(self):
center = {'lat': 50, 'lon': 20}
atp_center = center
atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
osm_match_center = center
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse'}
osm_link = 'https://www.openstreetmap.org/node/1'
match_distance = 0
all_very_good_matches = []
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
self.assertEqual(import_script.extract_website_import_info(match), {'status': 'importable'})
def test_reject_already_present_website(self):
center = {'lat': 50, 'lon': 20}
atp_center = center
atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
osm_match_center = center
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse', 'website': 'https://krakow-sr.sr.gov.pl/'}
osm_link = 'https://www.openstreetmap.org/node/1'
match_distance = 0
all_very_good_matches = []
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
self.assertEqual(import_script.extract_website_import_info(match), {'status': 'no_import_for_this_key'})

View file

@ -55,13 +55,14 @@ class URLChecker():
print("Maybe it can be safely assumed that following value can be thrown away?")
return False
def is_website_eligible(self, atp, cache_only):
def is_website_eligible(self, atp, cache_only, tested_key):
"""
return True if existing osm website tag should be replace by ATP claimed tag
return True if existing tag should be replace by ATP claimed tag
return False if it should not be
return None if it could not be established
"""
tested_key = "website"
if tested_key != "website":
raise Exception("this was not considered, at least for now")
atp_value = atp.atp_tags[tested_key]
osm_value = atp.osm_match_tags.get(tested_key)
if atp_value == osm_value: