mirror of
https://codeberg.org/matkoniecz/list_how_openstreetmap_can_be_improved_with_alltheplaces_data.git
synced 2025-04-11 10:09:29 +02:00
move import listers into own file, remove from graticule processing
This commit is contained in:
parent
e323aee1ce
commit
8c7d9ea326
6 changed files with 87 additions and 291 deletions
|
@ -16,6 +16,17 @@ obtain_atp_data = __import__("2_obtain_atp_data")
|
|||
config = __import__("0_config")
|
||||
import show_data
|
||||
import wikidata
|
||||
import nominatim
|
||||
import url_checker
|
||||
|
||||
# TODO: entries failing on Nominatim here will be not listed by show_data.py
|
||||
# maybe show_data.py should try checking all cases for Nominatim mismatches though with low priority?
|
||||
# or share this data otherwise?
|
||||
# or consider show_data.py as deprecated and on life support?
|
||||
|
||||
|
||||
# TODO: maybe it is actualy OK? Or should I pass it in an elaborate argument chain?
|
||||
url_checker_instance = url_checker.URLChecker()
|
||||
|
||||
def skipped_osm_cases():
|
||||
return [
|
||||
|
@ -175,6 +186,9 @@ def process_single_dataset(checked_data_sources, atp_code, area):
|
|||
continue
|
||||
data_extractor_function = extracted['extract_function']
|
||||
import_judgment = data_extractor_function(entry)
|
||||
# a bit weird location, but it is checked only here as it requires external call
|
||||
# so it is preferable to do it sparingly
|
||||
import_judgment = adjust_judgment_for_address_location_mismatch_checked_by_nominatim(entry, import_judgment)
|
||||
extracted['data'].append(entry_to_presentation_object(extracted['key'], entry, import_judgment))
|
||||
|
||||
for extracted in checked_data_sources:
|
||||
|
@ -193,6 +207,21 @@ def process_single_dataset(checked_data_sources, atp_code, area):
|
|||
outfile.write(output)
|
||||
print(f"wrote file to {location}")
|
||||
|
||||
def adjust_judgment_for_address_location_mismatch_checked_by_nominatim(entry, judgment):
|
||||
if judgment['status'] in ['it_is_not_matching', 'dubious_match', 'no_import_for_this_key']:
|
||||
# no import, no need to check Nominatim
|
||||
return judgment
|
||||
elif judgment['status'] in ['importable']:
|
||||
# TODO: allow to run without consulting cache?
|
||||
# TODO: run by default without checking cache? And reject not cached entries?
|
||||
if nominatim.is_location_matching_tags(entry.atp_tags, entry.atp_center, cache_only=False):
|
||||
# passed test
|
||||
return judgment
|
||||
else:
|
||||
return {'status': 'it_is_not_matching', 'mismatching_key_list': 'location_mismatches_address_data'}
|
||||
else:
|
||||
raise Exception("Unexpected status " + str(judgment))
|
||||
|
||||
def header_of_presentation_objects(checked_key):
|
||||
return ['ATP link', 'OSM link', 'ATP ' + checked_key, 'OSM ' + checked_key, 'ATP tags', 'OSM tags', 'Mismatch on']
|
||||
|
||||
|
@ -242,6 +271,9 @@ def thorough_match_mismatch_check(entry):
|
|||
return None
|
||||
if entry.match_distance > config.missing_shop_distance_in_kilometers_for_specific_case(entry.atp_tags):
|
||||
return None
|
||||
if entry.all_very_good_matches != None and len(entry.all_very_good_matches) > 1:
|
||||
# it may be possible to recover, see note in produce_map_analysis_for_atp_data
|
||||
return None
|
||||
if is_still_passing_standard_match_check(entry) == False:
|
||||
return None
|
||||
# TODO add tests
|
||||
|
@ -319,6 +351,25 @@ def extract_website_import_info(entry):
|
|||
return {'status': 'it_is_not_matching', 'mismatching_key_list': ['website']}
|
||||
else:
|
||||
return {'status': 'no_import_for_this_key'}
|
||||
|
||||
tested_key = "website"
|
||||
cache_only = True # definitely needed, to avoid hammering the same POI website many time
|
||||
returned = url_checker_instance.is_website_eligible(entry, cache_only, tested_key)
|
||||
if returned == None:
|
||||
# separate status for cache miss? TODO
|
||||
return {'status': 'it_is_not_matching', 'mismatching_key_list': ['POI_website_status_not_checked_yet']}
|
||||
elif returned == False:
|
||||
atp_value = entry.atp_tags.get(tested_key)
|
||||
osm_value = entry.osm_match_tags.get(tested_key)
|
||||
if url_checker_instance.drop_this_link(osm_value):
|
||||
# not really mismatch, both are garbage like
|
||||
# pointing to the main brand page
|
||||
return {'status': 'no_import_for_this_key'}
|
||||
elif url_checker_instance.is_difference_limited_to_slash_at_end(atp_value, osm_value):
|
||||
# effectively the same, no real mismatch
|
||||
return {'status': 'no_import_for_this_key'}
|
||||
else:
|
||||
return {'status': 'it_is_not_matching', 'mismatching_key_list': ['website']}
|
||||
return {'status': 'importable'}
|
||||
|
||||
def show_conflicting_entry(entry):
|
||||
|
|
|
@ -355,13 +355,12 @@ def generate_report_for_given_graticule_or_return_cache_if_present(area_name, ar
|
|||
|
||||
def generate_report_for_given_graticule(area_name, area, lat_anchor, lon_anchor):
|
||||
generated_filepaths = []
|
||||
url_checker_instance = url_checker.URLChecker()
|
||||
report_generators = {}
|
||||
for atp_code in obtain_atp_data.all_spider_codes_iterator():
|
||||
potential_match_results = match_output_for_spider_and_graticule(area, atp_code, lat_anchor, lon_anchor)
|
||||
if os.path.isfile(potential_match_results):
|
||||
matched = serializing.load_list_of_matches_from_csv(potential_match_results)
|
||||
report_generators[atp_code] = show_data.produce_map_analysis_for_atp_data(atp_code, area_name=area_name, match_list=matched, cache_only=True, url_checker_instance=url_checker_instance)
|
||||
report_generators[atp_code] = show_data.produce_map_analysis_for_atp_data(atp_code, area_name=area_name, match_list=matched, cache_only=True)
|
||||
for path in show_data.iterate_over_output_files(atp_code, area_name):
|
||||
generated_filepaths.append(path)
|
||||
show_data.generate_website_index_for_named_area(report_generators, area_name)
|
||||
|
|
213
show_data.py
213
show_data.py
|
@ -133,191 +133,6 @@ class MismatchingNameReportCreator:
|
|||
return output
|
||||
|
||||
|
||||
class ATPGivesTagsReportCreator:
|
||||
def __init__(self, url_checker_instance, atp_code, area_name):
|
||||
self.atp_code = atp_code
|
||||
self.area_name = area_name
|
||||
self.shops_with_tags_to_be_added = []
|
||||
self.mismatching_website_tags = []
|
||||
self.count_of_total_tag_mismatches = {}
|
||||
self.importable_keys = ['website']
|
||||
# 'opening_hours', - niedziele handlowe...
|
||||
# 'phone' - pending removal (upstream, in ATP) of the same supposed call center phone applied to many points
|
||||
# lets wait whether it will happen...
|
||||
for key in self.importable_keys:
|
||||
self.count_of_total_tag_mismatches[key] = 0
|
||||
self.url_checker = url_checker_instance
|
||||
|
||||
def check_case(self, atp_match_entry, cache_only):
|
||||
"""
|
||||
note: last_nominatim_match_status is used by code outside this class
|
||||
this is hacky but no better solution was found
|
||||
details documented where last_nominatim_match_status is present
|
||||
"""
|
||||
self.last_nominatim_match_status = None
|
||||
tags_to_be_added = self.key_with_values_that_can_be_added_to_osm_from_atp(atp_match_entry, cache_only)
|
||||
if len(tags_to_be_added) > 0:
|
||||
self.last_nominatim_match_status = nominatim.is_location_matching_tags(atp_match_entry.atp_tags, atp_match_entry.atp_center, cache_only=cache_only)
|
||||
if self.last_nominatim_match_status != False:
|
||||
self.shops_with_tags_to_be_added.append({'tags_to_be_added': tags_to_be_added, 'entry': atp_match_entry})
|
||||
return True
|
||||
|
||||
def present_reports(self):
|
||||
with open(config.output_folder() + "proposed_bot_edit.csv", 'a') as outfile:
|
||||
writer = csv.writer(outfile)
|
||||
for key in self.importable_keys:
|
||||
if self.count_of_total_tag_mismatches[key] == 0:
|
||||
for entry in self.shops_with_tags_to_be_added: # already Nominatim-filtered
|
||||
if key in entry['tags_to_be_added']:
|
||||
value = entry['entry'].atp_tags[key]
|
||||
atp_code = entry['entry'].atp_tags['@spider']
|
||||
writer.writerow([entry['entry'].osm_link, key, value, atp_code])
|
||||
else:
|
||||
print(key, "ineligible for import", self.count_of_total_tag_mismatches[key], "mismatches")
|
||||
|
||||
self.generate_potential_new_tags_listing()
|
||||
# TODO: more complex - how to export it? if any export is worth here...
|
||||
# with open(config.output_folder() + "add_tags_" + atp_code + ".geojson", 'w') as f:
|
||||
# json.dump(serializing.generate_geojson_structure(self.shops_with_tags_to_be_added), f)
|
||||
self.generate_mismatching_website_listing()
|
||||
|
||||
def add_tags_filename(self):
|
||||
area_name_part = ""
|
||||
if self.area_name not in [None, ""]:
|
||||
area_name_part = "_" + self.area_name
|
||||
return 'add_tags_' + self.atp_code + area_name_part + ".html"
|
||||
|
||||
def mismatching_website_report_filename(self):
|
||||
area_name_part = ""
|
||||
if self.area_name not in [None, ""]:
|
||||
area_name_part = "_" + self.area_name
|
||||
return 'website_mismatch_complicated_to_fix_' + self.atp_code + area_name_part + '.html'
|
||||
|
||||
def table_of_contents(self):
|
||||
return [
|
||||
{
|
||||
'header': 'Tags to add',
|
||||
'section_link': section_link('this tags can be imported', len(self.shops_with_tags_to_be_added), self.add_tags_filename()),
|
||||
'is_extra_complicated': False,
|
||||
'output_files': [self.add_tags_filename()],
|
||||
},
|
||||
{
|
||||
'header': 'Mismatching website tags',
|
||||
'section_link': section_link('website tag mismatch', len(self.mismatching_website_tags), self.mismatching_website_report_filename()),
|
||||
'is_extra_complicated': True,
|
||||
'output_files': [self.mismatching_website_report_filename()],
|
||||
},
|
||||
]
|
||||
|
||||
def key_with_values_that_can_be_added_to_osm_from_atp(self, atp, cache_only):
|
||||
tags_to_be_added = []
|
||||
for tested_key in self.importable_keys:
|
||||
if tested_key not in atp.atp_tags:
|
||||
continue
|
||||
if tested_key == "phone":
|
||||
if self.is_phone_eligible(atp):
|
||||
tags_to_be_added.append(tested_key)
|
||||
if tested_key == "website":
|
||||
if self.is_website_eligible(atp, cache_only):
|
||||
tags_to_be_added.append(tested_key)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
return tags_to_be_added
|
||||
|
||||
def is_website_eligible(self, atp, cache_only):
|
||||
tested_key = "website"
|
||||
if tested_key not in atp.atp_tags:
|
||||
return False
|
||||
returned = self.url_checker.is_website_eligible(atp, cache_only)
|
||||
if returned == None:
|
||||
return False # not cached, instructed to use only cache
|
||||
if returned:
|
||||
if atp.atp_tags.get(tested_key) != atp.osm_match_tags.get(tested_key):
|
||||
return True
|
||||
else:
|
||||
atp_value = atp.atp_tags.get(tested_key)
|
||||
osm_value = atp.osm_match_tags.get(tested_key)
|
||||
if self.url_checker.drop_this_link(osm_value):
|
||||
# not really mismatch, both are garbage like
|
||||
# pointing to the main brand page
|
||||
pass
|
||||
elif self.url_checker.is_difference_limited_to_slash_at_end(atp_value, osm_value):
|
||||
pass # effectively the same anyway, no real mismatch
|
||||
else:
|
||||
self.mismatching_website_tags.append(atp)
|
||||
self.report_mismatch(atp, tested_key)
|
||||
|
||||
def is_phone_eligible(self, atp):
|
||||
tested_key = "phone"
|
||||
canonical_atp_value = atp.atp_tags[tested_key].replace(" ", "")
|
||||
canonical_osm_value = atp.osm_match_tags.get(tested_key)
|
||||
if canonical_osm_value == None:
|
||||
canonical_osm_value = atp.osm_match_tags.get("contact:" + tested_key)
|
||||
if canonical_osm_value != None:
|
||||
canonical_osm_value = canonical_osm_value.replace(" ", "")
|
||||
if canonical_osm_value not in (None, canonical_atp_value):
|
||||
self.report_mismatch(atp, tested_key)
|
||||
return False
|
||||
return canonical_osm_value == None
|
||||
|
||||
def report_mismatch(self, atp, tested_key):
|
||||
print("atp\nosm - tag mismatch between them for", tested_key, "in", atp.osm_link)
|
||||
if 'website' in atp.atp_tags and tested_key != "website":
|
||||
print("ATP link:", atp.atp_tags['website'])
|
||||
print(atp.atp_tags[tested_key])
|
||||
if tested_key not in atp.osm_match_tags:
|
||||
print(tested_key, "not in OSM tags, apparently ATP value was rejected outright for", self.atp_code)
|
||||
else:
|
||||
print(atp.osm_match_tags[tested_key])
|
||||
print()
|
||||
self.count_of_total_tag_mismatches[tested_key] += 1
|
||||
|
||||
def generate_potential_new_tags_listing(self):
|
||||
center = get_center([x['entry'] for x in self.shops_with_tags_to_be_added])
|
||||
atp_spider_link_target = config.get_github_link_to_spider(self.atp_code)
|
||||
linked_text = self.atp_code.replace("_", "_​") + " spider"
|
||||
link = '<a href="' + atp_spider_link_target + '">' + linked_text + "</a>"
|
||||
with open(config.output_folder() + self.add_tags_filename(), 'w') as outfile:
|
||||
sidebar = prose.sidebar_content("This page lists where " + link + " data has shops that seem to have useful info not present in OSM data. Markers shown here are positioned according to ATP data, lines lead to location of matched OSM shop. " + str(len(self.shops_with_tags_to_be_added)) + " entries are shown.", self.atp_code)
|
||||
outfile.write(leafleter.generator.get_html_page_prefix("ATP <-> OSM " + self.atp_code + " - new tags?", center['lat'], center['lon'], zlevel_centered=7, width_percent=90, sidebar_content=sidebar))
|
||||
for entry in self.shops_with_tags_to_be_added:
|
||||
message = '<a href=\\"' + entry['entry'].osm_link + '\\">osm</a><br>'
|
||||
if 'website' in entry['entry'].osm_match_tags:
|
||||
message += '<a href=\\"' + escape_url(entry['entry'].osm_match_tags['website']) + '\\">object website from OSM</a><br>'
|
||||
if 'website' in entry['entry'].atp_tags:
|
||||
message += '<a href=\\"' + escape_url(entry['entry'].atp_tags['website']) + '\\">object website from ATP</a><br>'
|
||||
new_tags = {}
|
||||
for key in entry['tags_to_be_added']:
|
||||
new_tags[key] = entry['entry'].atp_tags[key]
|
||||
message += tag_list_to_html(new_tags)
|
||||
outfile.write(leafleter.generator.get_marker(message, entry['entry'].atp_center['lat'], entry['entry'].atp_center['lon'], color='green'))
|
||||
outfile.write(leafleter.generator.get_line(entry['entry'].atp_center['lat'], entry['entry'].atp_center['lon'], entry['entry'].osm_match_center['lat'], entry['entry'].osm_match_center['lon'], color='green'))
|
||||
outfile.write(leafleter.generator.get_html_page_suffix())
|
||||
|
||||
def generate_mismatching_website_listing(self):
|
||||
atp_spider_link_target = config.get_github_link_to_spider(self.atp_code)
|
||||
linked_text = self.atp_code.replace("_", "_​") + " spider"
|
||||
link = '<a href="' + atp_spider_link_target + '">' + linked_text + "</a>"
|
||||
center = get_center(self.mismatching_website_tags)
|
||||
with open(config.output_folder() + self.mismatching_website_report_filename(), 'w') as outfile:
|
||||
sidebar = prose.sidebar_content("This page lists where " + link + " data has shops that were matched but <code>website</code> website tags are not matching.<br><br>WARNING WARNING this may be tricky to fix!<br>ATP version is not always better, some shops are matched incorrectly! In some cases OSM has correct or better data!<br>Markers shown here are positioned according to ATP data.<br>" + str(len(self.mismatching_website_tags)) + " entries are shown.", self.atp_code)
|
||||
outfile.write(leafleter.generator.get_html_page_prefix("ATP <-> OSM " + self.atp_code + " - mismatching website tags", center['lat'], center['lon'], zlevel_centered=7, width_percent=90, sidebar_content=sidebar))
|
||||
for bad in self.mismatching_website_tags:
|
||||
osm_location_link = shared.link_to_point_in_osm(bad.atp_center['lat'], bad.atp_center['lon'])
|
||||
summary = 'here ATP shows object being present, which has match but their website tags conflict (<a href="' + osm_location_link + '" target="_blank">OSM location</a>)'
|
||||
summary += "<br><hr><br>"
|
||||
summary += 'website from ATP:' + htmlify_value("website", str(bad.atp_tags.get('website')))
|
||||
|
||||
summary += "<br>"
|
||||
summary += 'website from OSM:' + htmlify_value("website", str(bad.osm_match_tags.get('website')))
|
||||
summary += "<br><hr><br>"
|
||||
summary += 'tag list as suggested by ATP (should not be assumed to be directly usable in OSM):<br><br>'
|
||||
summary += tag_list_to_html(bad.atp_tags)
|
||||
outfile.write(leafleter.generator.get_marker(summary, bad.atp_center['lat'], bad.atp_center['lon'], color='red'))
|
||||
outfile.write(leafleter.generator.get_line(bad.atp_center['lat'], bad.atp_center['lon'], bad.osm_match_center['lat'], bad.osm_match_center['lon'], color='red'))
|
||||
outfile.write(leafleter.generator.get_html_page_suffix())
|
||||
|
||||
|
||||
class MissingObjectsReportCreator:
|
||||
def __init__(self, atp_code, area_name, specifier=''):
|
||||
self.area_name = area_name
|
||||
|
@ -559,11 +374,10 @@ add to test_display_website
|
|||
# TODO: save files one level higher, here just produce analysis
|
||||
|
||||
|
||||
def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_only, url_checker_instance):
|
||||
def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_only):
|
||||
missing_objects_report = MissingObjectsReportCreator(atp_code, area_name)
|
||||
missing_or_not_objects_report_gray_area = MissingObjectsReportCreator(atp_code, area_name, specifier=" - maybe")
|
||||
mismatching_name_report = MismatchingNameReportCreator(atp_code, area_name)
|
||||
add_tags_from_atp_report = ATPGivesTagsReportCreator(url_checker_instance, atp_code, area_name)
|
||||
conflict_between_atp_and_nominatim_report = NominatimMismatchReportCreator(atp_code, area_name)
|
||||
match_report = MatchMeteringReportCreator(len(match_list), atp_code, area_name)
|
||||
if config.graph_distance_distribution_when_generating_reports():
|
||||
|
@ -573,32 +387,14 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl
|
|||
if config.graph_distance_distribution_when_generating_reports():
|
||||
distance_distribution_report.check_case(atp)
|
||||
if atp.all_very_good_matches != None and len(atp.all_very_good_matches) > 1:
|
||||
# on all_very_good_matches > 1 it should be still possible to recover
|
||||
# TODO on all_very_good_matches > 1 it should be still possible to recover
|
||||
# but it will require extra logic
|
||||
# see say
|
||||
# https://www.openstreetmap.org/way/280274214 https://www.openstreetmap.org/node/4635678378
|
||||
continue
|
||||
if atp.match_distance != None and atp.match_distance < config.good_match_distance_in_kilometers():
|
||||
# this if-else structure is build here to improve matching efficiency and not
|
||||
# a lot can be done to eliminate hand-crafted structure here
|
||||
# and report count is not high enough to justify spending time
|
||||
# on some declarative syntax
|
||||
if mismatching_name_report.check_case(atp):
|
||||
continue
|
||||
add_tags_from_atp_report.check_case(atp, cache_only=cache_only)
|
||||
|
||||
# quite hacky, but I have no better ideas than take this internal thing
|
||||
# that should be an implementation detail
|
||||
# (1) High failure rate of Nominatin and limited ability to make queries
|
||||
# means that it cannot be run for all cases
|
||||
# (2) So it needs to be restricted only where tags would be moved from ATP to OSM
|
||||
# (3) Code for Nominatim mismatch report and adding ATP tags is supposed to be separated
|
||||
# So code for reports need to be separated and atthe same time I need info how
|
||||
# matching went...
|
||||
status = add_tags_from_atp_report.last_nominatim_match_status
|
||||
conflict_between_atp_and_nominatim_report.register_case_using_known_nominatim_status(atp, status, cache_only=cache_only)
|
||||
if status != False:
|
||||
match_report.check_case(atp)
|
||||
elif atp.match_distance == None or atp.match_distance > config.missing_shop_distance_in_kilometers_for_specific_case(atp.atp_tags):
|
||||
nominatim_match = nominatim.is_location_matching_tags(atp.atp_tags, atp.atp_center, cache_only=cache_only)
|
||||
if nominatim_match != False: # both matches, failed geolocation and geolocation not done at all go here
|
||||
|
@ -615,7 +411,6 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl
|
|||
if shared.get_free_space_in_mb(config.output_folder()) < 400:
|
||||
raise Exception("running out of free space on drive")
|
||||
mismatching_name_report.present_reports()
|
||||
add_tags_from_atp_report.present_reports()
|
||||
missing_objects_report.present_reports()
|
||||
missing_or_not_objects_report_gray_area.present_reports()
|
||||
conflict_between_atp_and_nominatim_report.present_reports()
|
||||
|
@ -628,7 +423,6 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl
|
|||
missing_objects_report,
|
||||
missing_or_not_objects_report_gray_area,
|
||||
mismatching_name_report,
|
||||
add_tags_from_atp_report,
|
||||
conflict_between_atp_and_nominatim_report,
|
||||
match_report,
|
||||
],
|
||||
|
@ -712,8 +506,6 @@ def headers():
|
|||
MissingObjectsReportCreator('dummy', 'dummy area name').table_of_contents()[0]['header'],
|
||||
MissingObjectsReportCreator('dummy', 'dummy area name', specifier=" - maybe").table_of_contents()[0]['header'],
|
||||
MismatchingNameReportCreator('dummy', 'dummy area name').table_of_contents()[0]['header'],
|
||||
ATPGivesTagsReportCreator(None, 'dummy', 'dummy area name').table_of_contents()[0]['header'],
|
||||
ATPGivesTagsReportCreator(None, 'dummy', 'dummy area name').table_of_contents()[1]['header'],
|
||||
NominatimMismatchReportCreator('dummy', 'dummy area name').table_of_contents()[0]['header'],
|
||||
MatchMeteringReportCreator(-1, 'dummy', 'dummy area name').table_of_contents()[0]['header'],
|
||||
]
|
||||
|
@ -791,7 +583,6 @@ def iterate_over_output_files(atp_code, area_name):
|
|||
MissingObjectsReportCreator(atp_code, area_name),
|
||||
MissingObjectsReportCreator(atp_code, area_name, specifier=" - maybe"),
|
||||
MismatchingNameReportCreator(atp_code, area_name),
|
||||
ATPGivesTagsReportCreator(None, atp_code, area_name),
|
||||
NominatimMismatchReportCreator(atp_code, area_name),
|
||||
MatchMeteringReportCreator(-1, atp_code, area_name),
|
||||
]
|
||||
|
|
|
@ -7,19 +7,6 @@ import show_data
|
|||
|
||||
|
||||
class IsCodeCompletelyCrashingSmoketests(unittest.TestCase):
|
||||
def test_rough_code_validity(self):
|
||||
add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy atp code', 'dummy area name')
|
||||
center = {'lat': 50, 'lon': 20}
|
||||
atp_center = center
|
||||
atp_tags = {'brand': 'Foobar', 'website': 'https://example.com/foobar/12', '@spider': 'dummy_code'}
|
||||
osm_match_center = center
|
||||
osm_match_tags = {'brand': 'Foobar'}
|
||||
osm_link = 'https://www.openstreetmap.org/node/1'
|
||||
match_distance = 0
|
||||
all_very_good_matches = []
|
||||
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
|
||||
add_tags_from_atp.check_case(match, cache_only=False)
|
||||
|
||||
def test_creation_of_missing_object_report_creator_table_contents_smoke_test(self):
|
||||
report = show_data.MissingObjectsReportCreator('dummy_atp_code', 'dummy_area_name')
|
||||
report.table_of_contents()
|
||||
|
@ -34,10 +21,6 @@ class IsCodeCompletelyCrashingSmoketests(unittest.TestCase):
|
|||
report = show_data.MismatchingNameReportCreator('dummy_atp_code', 'dummy_area_name')
|
||||
report.table_of_contents()
|
||||
|
||||
def test_creation_of_tag_giver_report_creator(self):
|
||||
report = show_data.ATPGivesTagsReportCreator(None, 'dummy_atp_code', 'dummy_area_name')
|
||||
report.table_of_contents()
|
||||
|
||||
def test_creation_of_nominatim_report_creator(self):
|
||||
report = show_data.NominatimMismatchReportCreator('dummy_atp_code', 'dummy_area_name')
|
||||
report.html_report_content()
|
||||
|
@ -61,61 +44,3 @@ class TagListFormattingTests(unittest.TestCase):
|
|||
|
||||
def test_tag_list_generation_newline_in_tags_escape(self):
|
||||
self.assertEqual("aaaa<br>bbb" in show_data.tag_list_to_html({"description": "aaaa\nbbb"}), True)
|
||||
|
||||
|
||||
class PhoneSuggestingTests(unittest.TestCase):
|
||||
def test_accept_normal_phone(self):
|
||||
add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name')
|
||||
center = {'lat': 50, 'lon': 20}
|
||||
atp_center = center
|
||||
atp_tags = {'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
|
||||
osm_match_center = center
|
||||
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse'}
|
||||
osm_link = 'https://www.openstreetmap.org/node/1'
|
||||
match_distance = 0
|
||||
all_very_good_matches = []
|
||||
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
|
||||
self.assertEqual(add_tags_from_atp.is_phone_eligible(match), True)
|
||||
|
||||
def test_handle_contact_prefix(self):
|
||||
add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name')
|
||||
center = {'lat': 50, 'lon': 20}
|
||||
atp_center = center
|
||||
atp_tags = {'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
|
||||
osm_match_center = center
|
||||
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse', 'contact:phone': '+48 12 286-32-83'}
|
||||
osm_link = 'https://www.openstreetmap.org/node/1'
|
||||
match_distance = 0
|
||||
all_very_good_matches = []
|
||||
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
|
||||
self.assertEqual(add_tags_from_atp.is_phone_eligible(match), False)
|
||||
|
||||
|
||||
class WebsiteSuggestingTests(unittest.TestCase):
|
||||
def test_accept_normal_website(self):
|
||||
add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name')
|
||||
center = {'lat': 50, 'lon': 20}
|
||||
atp_center = center
|
||||
atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
|
||||
osm_match_center = center
|
||||
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse'}
|
||||
osm_link = 'https://www.openstreetmap.org/node/1'
|
||||
match_distance = 0
|
||||
all_very_good_matches = []
|
||||
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
|
||||
add_tags_from_atp.check_case(match, cache_only=False)
|
||||
self.assertEqual(len(add_tags_from_atp.shops_with_tags_to_be_added), 1)
|
||||
|
||||
def test_reject_already_present_website(self):
|
||||
add_tags_from_atp = show_data.ATPGivesTagsReportCreator(url_checker.URLChecker(), 'dummy identifier for tests', 'dummy area name')
|
||||
center = {'lat': 50, 'lon': 20}
|
||||
atp_center = center
|
||||
atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
|
||||
osm_match_center = center
|
||||
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse', 'website': 'https://krakow-sr.sr.gov.pl/'}
|
||||
osm_link = 'https://www.openstreetmap.org/node/1'
|
||||
match_distance = 0
|
||||
all_very_good_matches = []
|
||||
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
|
||||
add_tags_from_atp.check_case(match, cache_only=False)
|
||||
self.assertEqual(len(add_tags_from_atp.shops_with_tags_to_be_added), 0)
|
||||
|
|
29
test_import_list_maker.py
Normal file
29
test_import_list_maker.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
import_script = __import__("21_list_import_status")
|
||||
|
||||
import unittest
|
||||
import serializing
|
||||
|
||||
class WebsiteSuggestingTests(unittest.TestCase):
|
||||
def test_accept_normal_website(self):
|
||||
center = {'lat': 50, 'lon': 20}
|
||||
atp_center = center
|
||||
atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
|
||||
osm_match_center = center
|
||||
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse'}
|
||||
osm_link = 'https://www.openstreetmap.org/node/1'
|
||||
match_distance = 0
|
||||
all_very_good_matches = []
|
||||
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
|
||||
self.assertEqual(import_script.extract_website_import_info(match), {'status': 'importable'})
|
||||
|
||||
def test_reject_already_present_website(self):
|
||||
center = {'lat': 50, 'lon': 20}
|
||||
atp_center = center
|
||||
atp_tags = {'@spider': 'bogus_atp_code', 'brand': 'Foobar', 'website': 'https://krakow-sr.sr.gov.pl/', 'phone': '+48 12 286-32-83', 'amenity': 'courthouse'}
|
||||
osm_match_center = center
|
||||
osm_match_tags = {'brand': 'Foobar', 'amenity': 'courthouse', 'website': 'https://krakow-sr.sr.gov.pl/'}
|
||||
osm_link = 'https://www.openstreetmap.org/node/1'
|
||||
match_distance = 0
|
||||
all_very_good_matches = []
|
||||
match = serializing.Match(atp_center, atp_tags, osm_match_center, osm_match_tags, osm_link, match_distance, all_very_good_matches)
|
||||
self.assertEqual(import_script.extract_website_import_info(match), {'status': 'no_import_for_this_key'})
|
|
@ -55,13 +55,14 @@ class URLChecker():
|
|||
print("Maybe it can be safely assumed that following value can be thrown away?")
|
||||
return False
|
||||
|
||||
def is_website_eligible(self, atp, cache_only):
|
||||
def is_website_eligible(self, atp, cache_only, tested_key):
|
||||
"""
|
||||
return True if existing osm website tag should be replace by ATP claimed tag
|
||||
return True if existing tag should be replace by ATP claimed tag
|
||||
return False if it should not be
|
||||
return None if it could not be established
|
||||
"""
|
||||
tested_key = "website"
|
||||
if tested_key != "website":
|
||||
raise Exception("this was not considered, at least for now")
|
||||
atp_value = atp.atp_tags[tested_key]
|
||||
osm_value = atp.osm_match_tags.get(tested_key)
|
||||
if atp_value == osm_value:
|
||||
|
|
Loading…
Add table
Reference in a new issue