mirror of
https://codeberg.org/matkoniecz/list_how_openstreetmap_can_be_improved_with_alltheplaces_data.git
synced 2025-04-11 10:09:29 +02:00
let autopep8 do some pointless changes
OK, one or two was helpful I rejected harmful ones and let pointless and OK to go through
This commit is contained in:
parent
91590fb3c9
commit
cc91eb1da2
24 changed files with 565 additions and 441 deletions
0_config.py0_obtain_related_repositories_to_provide_better_feedback_on_atp_data_quality.py11_experimental_taginfo_tag_lister.py13_generate_atp_issue_tracker_report.py14_generate_atp_issue_reports_about_poorly_matched_entries.py15_generate_atp_issue_reports_about_bad_names.py16_run_remove_bad_data_across_atp_to_trigger_log_output.py17_list_mismatching_brand_wikidata.py18_check_poi_type_conflicts_for_suspicious.py19_detect_unhandled_cascading_values_for_canonical_poi_types.py2_obtain_atp_data.py3_delete_old_build_and_output.py5_generate_graticule_reports.pydistance_distribution.pylatest_date_of_file_commit.pymatcher.pynominatim.pyqa.pyqa_autofix_atp.pyshared.pyshow_data.pytest_config.pytest_matching_logic.pytest_processing.py
557
0_config.py
557
0_config.py
|
@ -11,22 +11,27 @@ import rich
|
|||
# see https://github.com/theskumar/python-dotenv
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
def allow_extremely_low_priority_atp_logging():
|
||||
# not an ideal metric, but avoids asking me to be active in ATP if ATP is bottlenecked
|
||||
return len(known_broken_addr_full_spiders()) == 0 and allow_very_low_priority_atp_logging()
|
||||
|
||||
|
||||
def allow_very_low_priority_atp_logging():
|
||||
# not an ideal metric, but avoids asking me to be active in ATP if ATP is bottlenecked
|
||||
return len(known_broken_addr_full_spiders()) < 5 and allow_low_priority_atp_logging()
|
||||
|
||||
|
||||
def allow_low_priority_atp_logging():
|
||||
# not an ideal metric, but avoids asking me to be active in ATP if ATP is bottlenecked
|
||||
return len(known_broken_addr_full_spiders()) < 20 and allow_normal_priority_atp_logging()
|
||||
|
||||
|
||||
def allow_normal_priority_atp_logging():
|
||||
# not an ideal metric, but avoids asking me to be active in ATP if ATP is bottlenecked
|
||||
return len(known_broken_addr_full_spiders()) < 30
|
||||
|
||||
|
||||
def publish_geojson_when_generating_reports():
|
||||
return False
|
||||
|
||||
|
@ -53,7 +58,7 @@ def is_nonlocal_phone_worth_mentioning(atp_code):
|
|||
|
||||
def is_bogus_key_worth_mentioning(key, atp_code):
|
||||
if atp_code == 'atp_code':
|
||||
return False # from tests, synthetic data, lets not spam in tests
|
||||
return False # from tests, synthetic data, lets not spam in tests
|
||||
if key in ["branch", "branch:en"]:
|
||||
# this data is just ignored - lets not spend effort on improving it
|
||||
return False
|
||||
|
@ -85,101 +90,101 @@ def is_bogus_key_worth_mentioning(key, atp_code):
|
|||
return False
|
||||
return atp_code not in [
|
||||
'credit_union_us', # https://github.com/alltheplaces/alltheplaces/issues/11185
|
||||
'dominos_pizza_bh', # https://github.com/alltheplaces/alltheplaces/issues/11160
|
||||
'dominos_pizza_hr', # looks like the same case, checked and looks invalid
|
||||
'dominos_pizza_ec', # not checked is it invalid, assumed to be for now
|
||||
'dominos_pizza_bh', # https://github.com/alltheplaces/alltheplaces/issues/11160
|
||||
'dominos_pizza_hr', # looks like the same case, checked and looks invalid
|
||||
'dominos_pizza_ec', # not checked is it invalid, assumed to be for now
|
||||
]
|
||||
if key == "type":
|
||||
return atp_code not in [
|
||||
'kaiser_permanente_us', # https://github.com/alltheplaces/alltheplaces/pull/11785 TODO merged
|
||||
'kaiser_permanente_us', # https://github.com/alltheplaces/alltheplaces/pull/11785 TODO merged
|
||||
]
|
||||
if key == "addr:housenumber":
|
||||
return atp_code not in [
|
||||
'worldcat', # https://github.com/alltheplaces/alltheplaces/issues/11198
|
||||
'tradelink_au', # just a single case, report after this list is clean
|
||||
'popeyes_sg', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'vida_e_caffe', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'salvos_au', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'banxico_mx', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'ray_white_au_nz', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'orlen', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'ulybka_radugi_ru', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'charge_place_scotland_gb', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'ship_and_go_ro', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'worldcat', # https://github.com/alltheplaces/alltheplaces/issues/11198
|
||||
'tradelink_au', # just a single case, report after this list is clean
|
||||
'popeyes_sg', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'vida_e_caffe', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'salvos_au', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'banxico_mx', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'ray_white_au_nz', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'orlen', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'ulybka_radugi_ru', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'charge_place_scotland_gb', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'ship_and_go_ro', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
]
|
||||
if key == "addr:full":
|
||||
# note: use is_addr_full_known_to_be_broken if geocoding should not be attempted there
|
||||
if atp_code in [
|
||||
# not blocking geocoding
|
||||
'rostics_ru', # lower priority, tricky script TODO handle
|
||||
'brico_ok_it', # see https://github.com/alltheplaces/alltheplaces/pull/11738
|
||||
'barrhead_travel_gb', # https://github.com/alltheplaces/alltheplaces/pull/11739#issuecomment-2567184196
|
||||
'western_union', # https://github.com/alltheplaces/alltheplaces/issues/11838
|
||||
'burger_king_es_pt', # country info missing
|
||||
'leon', # sometimes country info missing
|
||||
'gov_cma_fuel_gb', # sometimes includes place name, sometimes not
|
||||
'vic_free_wifi_au', # mostly street address... Still, free wifi, not POIs - not going to care about this
|
||||
'accor', # sometimes includes place name, sometimes not - rather small number is affected (?)
|
||||
'uniqlo', # seems to include city names, at least in English. Still, missing countries
|
||||
'dominos_pizza_om', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'berlin_doner_kebap_pl', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'baskin_robbins_ca', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'the_courier_guy_za', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'pizza_hut_kr', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'costa_coffee', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'mcdonalds_es', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'clothing_junction_za', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'lhw', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'gov_dfe_gias_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'romans_pizza', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'newyorker', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'amcal_au', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'glassons', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'ctm', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'columbia_us', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'subway_kr', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'russells_za', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'lukoil', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'blooms_the_chemist_au', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'dominos_pizza_ae', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'rage_za', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'easypay', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'mr_liquor_au', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'library_institute_hu', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'mtexx_bg', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'hyundai_bw_ls_na_sz_za', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'gridserve_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'coffee_like', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'lewis_stores', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'lilly_bg', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'dunkin_sa', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'mcdonalds_hk', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'us_army_national_guard', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
't_market_bg', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'texaco_central_america', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'wesola_pani', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'ptt_th', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'baby_bunting', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'kfc_sg', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'cooplands_doncaster_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'jack_wills_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'bras_n_things', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'mer_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'wingstop_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'safeway_ca', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'papa_johns_az', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'kia_kr', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'burger_king_cy', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'dusk_au', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'agnvet_au', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'real_hu', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'burger_king_il', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'burger_king_cn', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'church_of_england_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'magasin_vert_fr', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'cobasi_br', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'i_and_g_brokers', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'ewiva_it', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'rostics_ru', # lower priority, tricky script TODO handle
|
||||
'brico_ok_it', # see https://github.com/alltheplaces/alltheplaces/pull/11738
|
||||
'barrhead_travel_gb', # https://github.com/alltheplaces/alltheplaces/pull/11739#issuecomment-2567184196
|
||||
'western_union', # https://github.com/alltheplaces/alltheplaces/issues/11838
|
||||
'burger_king_es_pt', # country info missing
|
||||
'leon', # sometimes country info missing
|
||||
'gov_cma_fuel_gb', # sometimes includes place name, sometimes not
|
||||
'vic_free_wifi_au', # mostly street address... Still, free wifi, not POIs - not going to care about this
|
||||
'accor', # sometimes includes place name, sometimes not - rather small number is affected (?)
|
||||
'uniqlo', # seems to include city names, at least in English. Still, missing countries
|
||||
'dominos_pizza_om', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'berlin_doner_kebap_pl', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'baskin_robbins_ca', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'the_courier_guy_za', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'pizza_hut_kr', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'costa_coffee', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'mcdonalds_es', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'clothing_junction_za', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'lhw', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'gov_dfe_gias_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'romans_pizza', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'newyorker', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'amcal_au', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'glassons', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'ctm', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'columbia_us', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'subway_kr', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'russells_za', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'lukoil', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'blooms_the_chemist_au', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'dominos_pizza_ae', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'rage_za', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'easypay', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'mr_liquor_au', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'library_institute_hu', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'mtexx_bg', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'hyundai_bw_ls_na_sz_za', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'gridserve_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'coffee_like', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'lewis_stores', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'lilly_bg', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'dunkin_sa', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'mcdonalds_hk', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'us_army_national_guard', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
't_market_bg', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'texaco_central_america', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'wesola_pani', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'ptt_th', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'baby_bunting', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'kfc_sg', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'cooplands_doncaster_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'jack_wills_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'bras_n_things', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'mer_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'wingstop_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'safeway_ca', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'papa_johns_az', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'kia_kr', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'burger_king_cy', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'dusk_au', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'agnvet_au', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'real_hu', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'burger_king_il', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'burger_king_cn', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'church_of_england_gb', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'magasin_vert_fr', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'cobasi_br', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'i_and_g_brokers', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
'ewiva_it', # https://github.com/alltheplaces/alltheplaces/issues/11797 - just few entries
|
||||
]:
|
||||
return False
|
||||
if atp_code in [
|
||||
|
@ -187,9 +192,9 @@ def is_bogus_key_worth_mentioning(key, atp_code):
|
|||
'familymart_tw',
|
||||
'starbucks_jp',
|
||||
]:
|
||||
return False # tricky case - partially false positive (nonASCII), has also city, just country missing etc. Lets ignore this.
|
||||
return False # tricky case - partially false positive (nonASCII), has also city, just country missing etc. Lets ignore this.
|
||||
if atp_code.endswith('_ru') or atp_code.endswith('_tw') or atp_code.endswith('_jp') or atp_code.endswith('_eg'):
|
||||
return False # likely tricky - TODO handle
|
||||
return False # likely tricky - TODO handle
|
||||
if is_addr_full_known_to_be_broken(atp_code):
|
||||
return False
|
||||
return allow_normal_priority_atp_logging()
|
||||
|
@ -200,32 +205,32 @@ def is_bogus_key_worth_mentioning(key, atp_code):
|
|||
return False
|
||||
if key == "addr:full:en":
|
||||
if atp_code in [
|
||||
'starbucks_cn', # https://github.com/alltheplaces/alltheplaces/issues/11870
|
||||
'starbucks_cn', # https://github.com/alltheplaces/alltheplaces/issues/11870
|
||||
]:
|
||||
return False
|
||||
return allow_low_priority_atp_logging()
|
||||
if key == "addr:street_address:en":
|
||||
if atp_code in [
|
||||
'starbucks_cn', # https://github.com/alltheplaces/alltheplaces/issues/11870
|
||||
'starbucks_cn', # https://github.com/alltheplaces/alltheplaces/issues/11870
|
||||
]:
|
||||
return False
|
||||
return allow_low_priority_atp_logging()
|
||||
if key == "addr:district":
|
||||
return atp_code not in [
|
||||
'kooperatifmarket_tr', # https://github.com/alltheplaces/alltheplaces/issues/11184
|
||||
'kooperatifmarket_tr', # https://github.com/alltheplaces/alltheplaces/issues/11184
|
||||
]
|
||||
if key == "ref":
|
||||
return atp_code not in [
|
||||
'kooperatifmarket_tr', # https://github.com/alltheplaces/alltheplaces/issues/11184
|
||||
'tui', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'texas_department_of_transportation_us', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'northern_california_breweries', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'teboil_ru', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'burger_king_tr', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'kooperatifmarket_tr', # https://github.com/alltheplaces/alltheplaces/issues/11184
|
||||
'tui', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'texas_department_of_transportation_us', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'northern_california_breweries', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'teboil_ru', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
'burger_king_tr', # https://github.com/alltheplaces/alltheplaces/issues/11790 - whitespace (fixed by my processing)
|
||||
]
|
||||
if key == "note":
|
||||
return atp_code not in [
|
||||
'stadt_zuerich_ch', # https://github.com/alltheplaces/alltheplaces/issues/11182
|
||||
'stadt_zuerich_ch', # https://github.com/alltheplaces/alltheplaces/issues/11182
|
||||
]
|
||||
if key == "contact:linkedin":
|
||||
return True
|
||||
|
@ -239,23 +244,23 @@ def is_bogus_key_worth_mentioning(key, atp_code):
|
|||
return atp_code not in [
|
||||
]
|
||||
if key == "website":
|
||||
return False # https://github.com/alltheplaces/alltheplaces/issues/11736
|
||||
return False # https://github.com/alltheplaces/alltheplaces/issues/11736
|
||||
return atp_code not in [
|
||||
'aldi_sud_de', # https://github.com/alltheplaces/alltheplaces/issues/9415
|
||||
'agata_meble_pl', # https://github.com/alltheplaces/alltheplaces/issues/9409
|
||||
'cheddars_scratch_kitchen', # https://github.com/alltheplaces/alltheplaces/issues/11205
|
||||
'nravizza_by', # https://github.com/alltheplaces/alltheplaces/pull/11707 - TODO merged
|
||||
'aldi_sud_de', # https://github.com/alltheplaces/alltheplaces/issues/9415
|
||||
'agata_meble_pl', # https://github.com/alltheplaces/alltheplaces/issues/9409
|
||||
'cheddars_scratch_kitchen', # https://github.com/alltheplaces/alltheplaces/issues/11205
|
||||
'nravizza_by', # https://github.com/alltheplaces/alltheplaces/pull/11707 - TODO merged
|
||||
]
|
||||
if key == "website:menu":
|
||||
return False # waiting for https://github.com/alltheplaces/alltheplaces/issues/11736 first
|
||||
return False # waiting for https://github.com/alltheplaces/alltheplaces/issues/11736 first
|
||||
if key == "website:orders":
|
||||
return False # waiting for https://github.com/alltheplaces/alltheplaces/issues/11736 first
|
||||
return False # waiting for https://github.com/alltheplaces/alltheplaces/issues/11736 first
|
||||
if key == "contact:instagram":
|
||||
return False # waiting for https://github.com/alltheplaces/alltheplaces/issues/11736 first
|
||||
return False # waiting for https://github.com/alltheplaces/alltheplaces/issues/11736 first
|
||||
if key == "contact:twitter":
|
||||
return False # waiting for https://github.com/alltheplaces/alltheplaces/issues/11736 first
|
||||
return False # waiting for https://github.com/alltheplaces/alltheplaces/issues/11736 first
|
||||
if key == "website:jp":
|
||||
return False # low priority ATP bug, lets not deal with it for now...
|
||||
return False # low priority ATP bug, lets not deal with it for now...
|
||||
return atp_code not in [
|
||||
]
|
||||
if key in ["ref", "operator:website"]:
|
||||
|
@ -266,7 +271,7 @@ def is_bogus_key_worth_mentioning(key, atp_code):
|
|||
|
||||
def is_mismatching_name_worth_mentioning(atp_code):
|
||||
if atp_code in [
|
||||
#'lewiatan_pl', # https://www.openstreetmap.org/note/4436169
|
||||
# 'lewiatan_pl', # https://www.openstreetmap.org/note/4436169
|
||||
# see also https://www.openstreetmap.org/note/4349666 for Lewiatan
|
||||
# and https://www.openstreetmap.org/note/4349667
|
||||
'blue_bottle_liquors_za', # requires local knowledge - ask after higher ranked ones (including PRs) are processed
|
||||
|
@ -281,12 +286,13 @@ def is_mismatching_name_worth_mentioning(atp_code):
|
|||
'totally_workwear_au', 'bi_mart_us', 'carls_jr_au', 'crocs_za',
|
||||
|
||||
# OpenStreetMap is wrong
|
||||
'vinmonopolet_no' # https://github.com/alltheplaces/alltheplaces/pull/10982#issuecomment-2402392856
|
||||
'vinmonopolet_no' # https://github.com/alltheplaces/alltheplaces/pull/10982#issuecomment-2402392856
|
||||
]:
|
||||
return False
|
||||
# https://github.com/alltheplaces/alltheplaces/issues/11015
|
||||
return False
|
||||
|
||||
|
||||
def is_missing_main_tag_worth_mentioning(atp_code):
|
||||
already_known = [
|
||||
'king_kullen_us', # https://github.com/alltheplaces/alltheplaces/issues/10987
|
||||
|
@ -296,11 +302,11 @@ def is_missing_main_tag_worth_mentioning(atp_code):
|
|||
'kia_us', # https://github.com/alltheplaces/alltheplaces/issues/10885 (tricky, waits)
|
||||
'bonita', # https://github.com/alltheplaces/alltheplaces/issues/10934
|
||||
'quality_dairy_us', # https://github.com/alltheplaces/alltheplaces/issues/9660 (blocks non-USA traffic)
|
||||
'blyzenko_ua', # https://github.com/alltheplaces/alltheplaces/issues/11697
|
||||
'waterdrop', # https://github.com/alltheplaces/alltheplaces/issues/11698
|
||||
'house_au', # https://github.com/alltheplaces/alltheplaces/issues/11699
|
||||
'medical_city_healthcare', # https://github.com/alltheplaces/alltheplaces/issues/11700
|
||||
'bens_cookies', # https://github.com/alltheplaces/alltheplaces/issues/11701
|
||||
'blyzenko_ua', # https://github.com/alltheplaces/alltheplaces/issues/11697
|
||||
'waterdrop', # https://github.com/alltheplaces/alltheplaces/issues/11698
|
||||
'house_au', # https://github.com/alltheplaces/alltheplaces/issues/11699
|
||||
'medical_city_healthcare', # https://github.com/alltheplaces/alltheplaces/issues/11700
|
||||
'bens_cookies', # https://github.com/alltheplaces/alltheplaces/issues/11701
|
||||
'maserati', # has many actually empty entries
|
||||
'suzuki_marine_au', # looks tricky
|
||||
'coop_food_gb', # do not see how info may be recovered, maybe someone with local knowledge can
|
||||
|
@ -319,7 +325,7 @@ def is_missing_main_tag_worth_mentioning(atp_code):
|
|||
if atp_code in already_known:
|
||||
return False
|
||||
if len(already_known) > 25:
|
||||
return False # bottleneck is not in reporting, why care about this?
|
||||
return False # bottleneck is not in reporting, why care about this?
|
||||
if allow_low_priority_atp_logging():
|
||||
raise Exception("take into account date of file to avoid pointless reports, see latest_date_of_file_commit.py file and see https://github.com/alltheplaces/alltheplaces/issues/10990#issuecomment-2404066559")
|
||||
|
||||
|
@ -331,16 +337,16 @@ def is_null_specified_as_text_worth_mentioning(atp_code):
|
|||
if allow_very_low_priority_atp_logging():
|
||||
return False
|
||||
return atp_code not in [
|
||||
'ship_and_go_ro', # https://github.com/alltheplaces/alltheplaces/issues/11201
|
||||
'whataburger', # https://github.com/alltheplaces/alltheplaces/issues/11202
|
||||
'pandora', # https://github.com/alltheplaces/alltheplaces/issues/11203
|
||||
'ship_and_go_ro', # https://github.com/alltheplaces/alltheplaces/issues/11201
|
||||
'whataburger', # https://github.com/alltheplaces/alltheplaces/issues/11202
|
||||
'pandora', # https://github.com/alltheplaces/alltheplaces/issues/11203
|
||||
]
|
||||
|
||||
|
||||
def is_failed_geocoding_unexpected(atp_code):
|
||||
if atp_code in [
|
||||
'tegut_de', # https://github.com/alltheplaces/alltheplaces/issues/9212
|
||||
'fastned', 'ljsilvers', 'wells_fargo', 'coffee_like', # not bothered with reporting, remove here if someone else fixed some ATP issues I reported
|
||||
'fastned', 'ljsilvers', 'wells_fargo', 'coffee_like', # not bothered with reporting, remove here if someone else fixed some ATP issues I reported
|
||||
]:
|
||||
return False
|
||||
if is_addr_street_address_known_to_be_broken(atp_code):
|
||||
|
@ -353,114 +359,121 @@ def is_failed_geocoding_unexpected(atp_code):
|
|||
return False
|
||||
return allow_normal_priority_atp_logging()
|
||||
|
||||
|
||||
def is_addr_postcode_known_to_be_broken(atp_code):
|
||||
# addr:postcode
|
||||
return atp_code in [
|
||||
'century_21', # https://github.com/alltheplaces/alltheplaces/issues/11734
|
||||
]
|
||||
|
||||
|
||||
def is_addr_street_address_known_to_be_broken(atp_code):
|
||||
# addr:street_address
|
||||
return atp_code in [
|
||||
'skechers', # https://github.com/alltheplaces/alltheplaces/issues/10967
|
||||
'just_group', # https://github.com/alltheplaces/alltheplaces/issues/10360
|
||||
'petrol_bg', # https://github.com/alltheplaces/alltheplaces/issues/11186
|
||||
'mussala_bg', # https://github.com/alltheplaces/alltheplaces/issues/11244
|
||||
'otr_au', # https://github.com/alltheplaces/alltheplaces/pull/11733#issuecomment-2567035353
|
||||
'petrol_bg', # https://github.com/alltheplaces/alltheplaces/issues/11186
|
||||
'mussala_bg', # https://github.com/alltheplaces/alltheplaces/issues/11244
|
||||
'otr_au', # https://github.com/alltheplaces/alltheplaces/pull/11733#issuecomment-2567035353
|
||||
]
|
||||
|
||||
|
||||
def is_addr_full_known_to_be_broken(atp_code):
|
||||
# addr:full
|
||||
return atp_code in known_broken_addr_full_spiders()
|
||||
|
||||
|
||||
def known_broken_addr_full_spiders():
|
||||
# addr:full
|
||||
return [
|
||||
# 2+ weeks old ones
|
||||
'harcourts', # https://github.com/alltheplaces/alltheplaces/pull/11703 TODO merged
|
||||
'retail_apparel_group', # https://github.com/alltheplaces/alltheplaces/pull/11704 TODO merged
|
||||
'a1_bg', # https://github.com/alltheplaces/alltheplaces/pull/11751 TODO merged
|
||||
'ifly_ca_us', # https://github.com/alltheplaces/alltheplaces/pull/11760 TODO merged
|
||||
'buildit', # https://github.com/alltheplaces/alltheplaces/pull/11757 TODO merged
|
||||
'wells_fargo', # https://github.com/alltheplaces/alltheplaces/pull/11750 TODO merged
|
||||
'vallarta_us', # https://github.com/alltheplaces/alltheplaces/pull/11737 TODO merged
|
||||
'harcourts', # https://github.com/alltheplaces/alltheplaces/pull/11703 TODO merged
|
||||
'retail_apparel_group', # https://github.com/alltheplaces/alltheplaces/pull/11704 TODO merged
|
||||
'a1_bg', # https://github.com/alltheplaces/alltheplaces/pull/11751 TODO merged
|
||||
'ifly_ca_us', # https://github.com/alltheplaces/alltheplaces/pull/11760 TODO merged
|
||||
'buildit', # https://github.com/alltheplaces/alltheplaces/pull/11757 TODO merged
|
||||
'wells_fargo', # https://github.com/alltheplaces/alltheplaces/pull/11750 TODO merged
|
||||
'vallarta_us', # https://github.com/alltheplaces/alltheplaces/pull/11737 TODO merged
|
||||
|
||||
'play_pl', # https://github.com/alltheplaces/alltheplaces/pull/11796 TODO merged
|
||||
'rodda_paint_us', # https://github.com/alltheplaces/alltheplaces/pull/11795 TODO merged
|
||||
'ziko_apteka_pl', # https://github.com/alltheplaces/alltheplaces/pull/11794 TODO merged
|
||||
'videopro_au', # https://github.com/alltheplaces/alltheplaces/pull/11793 TODO merged
|
||||
'plus_pl', # https://github.com/alltheplaces/alltheplaces/pull/11779 TODO merged
|
||||
'carinos', # https://github.com/alltheplaces/alltheplaces/pull/11780 TODO merged
|
||||
'ljsilvers', # https://github.com/alltheplaces/alltheplaces/pull/11781 TODO merged
|
||||
'nissan_cz', # https://github.com/alltheplaces/alltheplaces/pull/11782 TODO merged
|
||||
'thelins_konditori_se', # https://github.com/alltheplaces/alltheplaces/pull/11783 TODO merged
|
||||
'move_yourself_au', # https://github.com/alltheplaces/alltheplaces/pull/11792 TODO merged
|
||||
'swedbank_ee', # https://github.com/alltheplaces/alltheplaces/pull/11805 TODO merged
|
||||
'paint_spot_au', # https://github.com/alltheplaces/alltheplaces/pull/11801 TODO merged
|
||||
'gamestop', # https://github.com/alltheplaces/alltheplaces/pull/11817 TODO merged
|
||||
'dsk_bank_bg', # https://github.com/alltheplaces/alltheplaces/pull/11810 TODO merged
|
||||
'empik_pl', # https://github.com/alltheplaces/alltheplaces/pull/11814 TODO merged
|
||||
'coop_alleanza_it', # https://github.com/alltheplaces/alltheplaces/pull/11815 TODO merged
|
||||
'two_men_and_a_truck', # https://github.com/alltheplaces/alltheplaces/issues/11797 https://github.com/alltheplaces/alltheplaces/pull/11816 TODO merged
|
||||
'mcdonalds_latin_america', # https://github.com/alltheplaces/alltheplaces/pull/11863 TODO merged
|
||||
'petstock_au', # https://github.com/alltheplaces/alltheplaces/pull/11864 TODO merged
|
||||
'crown_decorating_centres_gb', # https://github.com/alltheplaces/alltheplaces/pull/11865 TODO merged
|
||||
'torchys_tacos', # https://github.com/alltheplaces/alltheplaces/pull/11866 TODO merged
|
||||
'kaisercraft_au', # https://github.com/alltheplaces/alltheplaces/pull/11867 TODO merged
|
||||
'tag_heuer', # https://github.com/alltheplaces/alltheplaces/pull/11869 TODO merged
|
||||
'fastned', # https://github.com/alltheplaces/alltheplaces/pull/11857 TODO merged
|
||||
'snap_fitness', # https://github.com/alltheplaces/alltheplaces/pull/11858 TODO merged
|
||||
'kfc_it', # https://github.com/alltheplaces/alltheplaces/pull/11859 TODO merged
|
||||
'tops', # https://github.com/alltheplaces/alltheplaces/pull/11860 TODO merged
|
||||
'ccbank_bg', # https://github.com/alltheplaces/alltheplaces/pull/11861 TODO merged
|
||||
'ymca', # https://github.com/alltheplaces/alltheplaces/issues/11797 https://github.com/alltheplaces/alltheplaces/pull/11819 TODO merged
|
||||
'wingstop', # https://github.com/alltheplaces/alltheplaces/pull/11818 TODO merged
|
||||
'byd_auto_au', # https://github.com/alltheplaces/alltheplaces/pull/11854 TODO merged
|
||||
'unicredit_bulbank_bg', # https://github.com/alltheplaces/alltheplaces/pull/11884 TODO merged
|
||||
'alaska_commercial_company', # https://github.com/alltheplaces/alltheplaces/pull/11909 TODO merged
|
||||
'thiele_dk', # https://github.com/alltheplaces/alltheplaces/pull/11889 TODO merged
|
||||
'equatorial_coffee_za', # https://github.com/alltheplaces/alltheplaces/pull/11881 TODO merged
|
||||
'toyota_au', # https://github.com/alltheplaces/alltheplaces/pull/11800 TODO merged
|
||||
'play_pl', # https://github.com/alltheplaces/alltheplaces/pull/11796 TODO merged
|
||||
'rodda_paint_us', # https://github.com/alltheplaces/alltheplaces/pull/11795 TODO merged
|
||||
'ziko_apteka_pl', # https://github.com/alltheplaces/alltheplaces/pull/11794 TODO merged
|
||||
'videopro_au', # https://github.com/alltheplaces/alltheplaces/pull/11793 TODO merged
|
||||
'plus_pl', # https://github.com/alltheplaces/alltheplaces/pull/11779 TODO merged
|
||||
'carinos', # https://github.com/alltheplaces/alltheplaces/pull/11780 TODO merged
|
||||
'ljsilvers', # https://github.com/alltheplaces/alltheplaces/pull/11781 TODO merged
|
||||
'nissan_cz', # https://github.com/alltheplaces/alltheplaces/pull/11782 TODO merged
|
||||
'thelins_konditori_se', # https://github.com/alltheplaces/alltheplaces/pull/11783 TODO merged
|
||||
'move_yourself_au', # https://github.com/alltheplaces/alltheplaces/pull/11792 TODO merged
|
||||
'swedbank_ee', # https://github.com/alltheplaces/alltheplaces/pull/11805 TODO merged
|
||||
'paint_spot_au', # https://github.com/alltheplaces/alltheplaces/pull/11801 TODO merged
|
||||
'gamestop', # https://github.com/alltheplaces/alltheplaces/pull/11817 TODO merged
|
||||
'dsk_bank_bg', # https://github.com/alltheplaces/alltheplaces/pull/11810 TODO merged
|
||||
'empik_pl', # https://github.com/alltheplaces/alltheplaces/pull/11814 TODO merged
|
||||
'coop_alleanza_it', # https://github.com/alltheplaces/alltheplaces/pull/11815 TODO merged
|
||||
'two_men_and_a_truck', # https://github.com/alltheplaces/alltheplaces/issues/11797 https://github.com/alltheplaces/alltheplaces/pull/11816 TODO merged
|
||||
'mcdonalds_latin_america', # https://github.com/alltheplaces/alltheplaces/pull/11863 TODO merged
|
||||
'petstock_au', # https://github.com/alltheplaces/alltheplaces/pull/11864 TODO merged
|
||||
'crown_decorating_centres_gb', # https://github.com/alltheplaces/alltheplaces/pull/11865 TODO merged
|
||||
'torchys_tacos', # https://github.com/alltheplaces/alltheplaces/pull/11866 TODO merged
|
||||
'kaisercraft_au', # https://github.com/alltheplaces/alltheplaces/pull/11867 TODO merged
|
||||
'tag_heuer', # https://github.com/alltheplaces/alltheplaces/pull/11869 TODO merged
|
||||
'fastned', # https://github.com/alltheplaces/alltheplaces/pull/11857 TODO merged
|
||||
'snap_fitness', # https://github.com/alltheplaces/alltheplaces/pull/11858 TODO merged
|
||||
'kfc_it', # https://github.com/alltheplaces/alltheplaces/pull/11859 TODO merged
|
||||
'tops', # https://github.com/alltheplaces/alltheplaces/pull/11860 TODO merged
|
||||
'ccbank_bg', # https://github.com/alltheplaces/alltheplaces/pull/11861 TODO merged
|
||||
'ymca', # https://github.com/alltheplaces/alltheplaces/issues/11797 https://github.com/alltheplaces/alltheplaces/pull/11819 TODO merged
|
||||
'wingstop', # https://github.com/alltheplaces/alltheplaces/pull/11818 TODO merged
|
||||
'byd_auto_au', # https://github.com/alltheplaces/alltheplaces/pull/11854 TODO merged
|
||||
'unicredit_bulbank_bg', # https://github.com/alltheplaces/alltheplaces/pull/11884 TODO merged
|
||||
'alaska_commercial_company', # https://github.com/alltheplaces/alltheplaces/pull/11909 TODO merged
|
||||
'thiele_dk', # https://github.com/alltheplaces/alltheplaces/pull/11889 TODO merged
|
||||
'equatorial_coffee_za', # https://github.com/alltheplaces/alltheplaces/pull/11881 TODO merged
|
||||
'toyota_au', # https://github.com/alltheplaces/alltheplaces/pull/11800 TODO merged
|
||||
|
||||
'paris_baguette_kr', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'easybox_bg', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'twin_peaks', # some are not street address either
|
||||
'coen_markets_us', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'seven_eleven_ph', # https://github.com/alltheplaces/alltheplaces/issues/11804
|
||||
'sherwin_williams', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'spar_aspiag', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'united_dairy_farmers_us', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'woops_us', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'easybox_hu', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'woolworths_za', # https://github.com/alltheplaces/alltheplaces/issues/11146
|
||||
'philz_coffee_us', # https://github.com/alltheplaces/alltheplaces/issues/11147
|
||||
'anthropologie', # https://github.com/alltheplaces/alltheplaces/issues/11199
|
||||
'mussala_bg', # https://github.com/alltheplaces/alltheplaces/issues/11244
|
||||
'asics_us', # https://github.com/alltheplaces/alltheplaces/issues/11702
|
||||
'primaprix', # https://github.com/alltheplaces/alltheplaces/issues/11709
|
||||
'otr_au', # https://github.com/alltheplaces/alltheplaces/pull/11733 and https://github.com/alltheplaces/alltheplaces/pull/11733#issuecomment-2567035353
|
||||
'break_and_wash_pl', # city name included, country name missing
|
||||
'brico_ok_it', # https://github.com/alltheplaces/alltheplaces/pull/11738 - look at my own comments
|
||||
'kay_jewelers', # https://github.com/alltheplaces/alltheplaces/pull/11758
|
||||
'odido_pl', # includes locations, country missing
|
||||
'systeme_u', # https://github.com/alltheplaces/alltheplaces/pull/11883
|
||||
'easybox_ro' # see https://github.com/alltheplaces/alltheplaces/blob/master/locations/spiders/easybox_ro.py - needs changes to 13_generate_atp_issue_tracker_report.py to autofix it
|
||||
'paris_baguette_kr', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'easybox_bg', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'twin_peaks', # some are not street address either
|
||||
'coen_markets_us', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'seven_eleven_ph', # https://github.com/alltheplaces/alltheplaces/issues/11804
|
||||
'sherwin_williams', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'spar_aspiag', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'united_dairy_farmers_us', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'woops_us', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'easybox_hu', # https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
'woolworths_za', # https://github.com/alltheplaces/alltheplaces/issues/11146
|
||||
'philz_coffee_us', # https://github.com/alltheplaces/alltheplaces/issues/11147
|
||||
'anthropologie', # https://github.com/alltheplaces/alltheplaces/issues/11199
|
||||
'mussala_bg', # https://github.com/alltheplaces/alltheplaces/issues/11244
|
||||
'asics_us', # https://github.com/alltheplaces/alltheplaces/issues/11702
|
||||
'primaprix', # https://github.com/alltheplaces/alltheplaces/issues/11709
|
||||
'otr_au', # https://github.com/alltheplaces/alltheplaces/pull/11733 and https://github.com/alltheplaces/alltheplaces/pull/11733#issuecomment-2567035353
|
||||
'break_and_wash_pl', # city name included, country name missing
|
||||
'brico_ok_it', # https://github.com/alltheplaces/alltheplaces/pull/11738 - look at my own comments
|
||||
'kay_jewelers', # https://github.com/alltheplaces/alltheplaces/pull/11758
|
||||
'odido_pl', # includes locations, country missing
|
||||
'systeme_u', # https://github.com/alltheplaces/alltheplaces/pull/11883
|
||||
'easybox_ro' # see https://github.com/alltheplaces/alltheplaces/blob/master/locations/spiders/easybox_ro.py - needs changes to 13_generate_atp_issue_tracker_report.py to autofix it
|
||||
]
|
||||
|
||||
|
||||
def is_addr_city_known_to_be_broken(atp_code):
|
||||
# addr:city
|
||||
return atp_code in known_broken_addr_city_spiders()
|
||||
|
||||
|
||||
def known_broken_addr_city_spiders():
|
||||
return [
|
||||
'mussala_bg', # https://github.com/alltheplaces/alltheplaces/issues/11244
|
||||
'pandora', # https://github.com/alltheplaces/alltheplaces/issues/11245
|
||||
'mcdonalds_cz', # https://github.com/alltheplaces/alltheplaces/issues/11708 - not utterly invalid apparently
|
||||
'la_anonima_ar', # https://github.com/alltheplaces/alltheplaces/issues/11786
|
||||
'shell', # https://github.com/alltheplaces/alltheplaces/issues/11788
|
||||
'spar_bw_mz_na_sz_za', # https://github.com/alltheplaces/alltheplaces/issues/11789
|
||||
'mussala_bg', # https://github.com/alltheplaces/alltheplaces/issues/11244
|
||||
'pandora', # https://github.com/alltheplaces/alltheplaces/issues/11245
|
||||
'mcdonalds_cz', # https://github.com/alltheplaces/alltheplaces/issues/11708 - not utterly invalid apparently
|
||||
'la_anonima_ar', # https://github.com/alltheplaces/alltheplaces/issues/11786
|
||||
'shell', # https://github.com/alltheplaces/alltheplaces/issues/11788
|
||||
'spar_bw_mz_na_sz_za', # https://github.com/alltheplaces/alltheplaces/issues/11789
|
||||
]
|
||||
|
||||
|
||||
def is_missing_brand_field_worth_mentioning(atp_code):
|
||||
if atp_code in [
|
||||
'lukoil', 'mol', # too complex for me - multibrand spiders
|
||||
|
@ -496,10 +509,11 @@ def is_missing_brand_field_worth_mentioning(atp_code):
|
|||
|
||||
def missing_brand_wikidata_worth_mentioning(atp_code):
|
||||
if atp_code in [
|
||||
'conad_it', # https://github.com/alltheplaces/alltheplaces/issues/11950
|
||||
'conad_it', # https://github.com/alltheplaces/alltheplaces/issues/11950
|
||||
]:
|
||||
return False
|
||||
return allow_very_low_priority_atp_logging() # https://github.com/alltheplaces/alltheplaces/issues/11950
|
||||
return allow_very_low_priority_atp_logging() # https://github.com/alltheplaces/alltheplaces/issues/11950
|
||||
|
||||
|
||||
def is_empty_file_for_spider_worth_mentioning(atp_code):
|
||||
if atp_code in [
|
||||
|
@ -600,10 +614,10 @@ def opening_hours_key():
|
|||
|
||||
def keys_with_value_link():
|
||||
returned = ["website", 'website_may_be_broken', "operator:website", 'website:en', 'website:fr', 'website:de', 'website:kr', 'website:cn', 'website:menu', 'website:orders', 'website:orders:en', 'website:orders:ar', 'reservation:website',
|
||||
'contact:webcam', # should not be contact...
|
||||
'contact:tripadvisor', 'contact:yelp',
|
||||
'source:website', 'brand:website', # probably should be eliminated - TODO
|
||||
]
|
||||
'contact:webcam', # should not be contact...
|
||||
'contact:tripadvisor', 'contact:yelp',
|
||||
'source:website', 'brand:website', # probably should be eliminated - TODO
|
||||
]
|
||||
for code in language_tag_knowledge.all_iso_639_1_language_codes():
|
||||
if "website:" + code not in returned:
|
||||
returned.append("website:" + code)
|
||||
|
@ -616,11 +630,13 @@ def keys_with_value_link():
|
|||
# TODO what about website:orders
|
||||
# TODO website:fr
|
||||
|
||||
|
||||
def keys_with_possible_link():
|
||||
return ["image", "@source_uri", "atp_ref", 'ref', "contact:facebook", 'contact:youtube', 'contact:yelp', 'contact:twitter', 'contact:instagram', 'contact:linkedin', 'contact:tiktok', 'contact:tripadvisor', 'operator:facebook', 'operator:twitter',
|
||||
|
||||
'icon', # also in atp_tags_to_be_remove_completely_and_ignored
|
||||
]
|
||||
'icon', # also in atp_tags_to_be_remove_completely_and_ignored
|
||||
]
|
||||
|
||||
|
||||
def atp_tags_very_likely_not_usable_for_osm_import(tags):
|
||||
returned = [
|
||||
|
@ -663,7 +679,7 @@ def atp_tags_very_likely_not_usable_for_osm_import(tags):
|
|||
# bogus name tags has a higher priority anyway
|
||||
#
|
||||
# so lets throw it away without even attempts to use it
|
||||
'branch:en', # like branch
|
||||
'branch:en', # like branch
|
||||
|
||||
# extra detail not worth adding to OSM
|
||||
# fluctuates wildly
|
||||
|
@ -731,7 +747,7 @@ def atp_tags_to_be_remove_completely_and_ignored():
|
|||
"nsi_id", # internal data
|
||||
|
||||
# repeated entries that are not useful (at least for me and OSM) that ATP wants to keep
|
||||
'brand:logo', 'icon', # https://github.com/alltheplaces/alltheplaces/issues/11183
|
||||
'brand:logo', 'icon', # https://github.com/alltheplaces/alltheplaces/issues/11183
|
||||
|
||||
# more keys to be removed
|
||||
'storeClass', 'owner:type', 'ownership_type', 'kioskType', 'operator:facebook', 'operator:twitter',
|
||||
|
@ -745,6 +761,7 @@ def atp_tags_to_be_remove_completely_and_ignored():
|
|||
def dubious_keys_raising_alarm():
|
||||
return generic_type_keys()
|
||||
|
||||
|
||||
def generic_type_keys():
|
||||
return ['location_type', 'store_type', 'storeType', 'type']
|
||||
|
||||
|
@ -786,28 +803,28 @@ def ignored_atp_codes():
|
|||
# https://github.com/osmlab/name-suggestion-index/tags
|
||||
# (currently none)
|
||||
|
||||
'department_veterans_affairs', # https://github.com/alltheplaces/alltheplaces/pull/11905 TODO MERGED
|
||||
'billa', # https://github.com/alltheplaces/alltheplaces/pull/11706 TODO MERGED
|
||||
'coop_centro_italia_it', # https://github.com/alltheplaces/alltheplaces/pull/11942 TODO_MERGED
|
||||
'sony_gb', # https://github.com/alltheplaces/alltheplaces/issues/11710 TODO_MERGED
|
||||
'topgolf_us', # https://github.com/alltheplaces/alltheplaces/pull/11923 TODO_MERGED
|
||||
'department_veterans_affairs', # https://github.com/alltheplaces/alltheplaces/pull/11905 TODO MERGED
|
||||
'billa', # https://github.com/alltheplaces/alltheplaces/pull/11706 TODO MERGED
|
||||
'coop_centro_italia_it', # https://github.com/alltheplaces/alltheplaces/pull/11942 TODO_MERGED
|
||||
'sony_gb', # https://github.com/alltheplaces/alltheplaces/issues/11710 TODO_MERGED
|
||||
'topgolf_us', # https://github.com/alltheplaces/alltheplaces/pull/11923 TODO_MERGED
|
||||
|
||||
'puebloweb_pr_us', # https://github.com/alltheplaces/alltheplaces/pull/11908
|
||||
'cinnabon_ru', # cafe or fast food? maybe these should be considered as matching? TODO - see file:///media/mateusz/OSM_cache/ATP_matcher_cache/output_for_global_scan/missing_shops__cinnabon_ru.html (generated by 14_...) and https://www.openstreetmap.org/search?query=Cinnabon%2C+Russia#map=19/55.768162/37.598584
|
||||
'benchmarx_gb', # TODO_LOW_PRIORITY https://www.benchmarxkitchens.co.uk/branches/llanelli - "Located inside Travis Perkins" - should it be mapped as a separate shop then?
|
||||
'sklavenitis_gr', # https://github.com/alltheplaces/alltheplaces/pull/11904
|
||||
'halia_baluvana_ua', # https://github.com/alltheplaces/alltheplaces/issues/11902
|
||||
'spring_market_us', # https://github.com/alltheplaces/alltheplaces/issues/11903
|
||||
'tesla', # https://github.com/alltheplaces/alltheplaces/issues/11711
|
||||
'oscar_wylee', # https://github.com/alltheplaces/alltheplaces/issues/11862
|
||||
'okta_mk', # https://github.com/alltheplaces/alltheplaces/pull/11899
|
||||
'dierbergs', # https://github.com/alltheplaces/alltheplaces/pull/11898
|
||||
'mirabito_us', # https://github.com/alltheplaces/alltheplaces/issues/11900
|
||||
'puebloweb_pr_us', # https://github.com/alltheplaces/alltheplaces/pull/11908
|
||||
'cinnabon_ru', # cafe or fast food? maybe these should be considered as matching? TODO - see file:///media/mateusz/OSM_cache/ATP_matcher_cache/output_for_global_scan/missing_shops__cinnabon_ru.html (generated by 14_...) and https://www.openstreetmap.org/search?query=Cinnabon%2C+Russia#map=19/55.768162/37.598584
|
||||
'benchmarx_gb', # TODO_LOW_PRIORITY https://www.benchmarxkitchens.co.uk/branches/llanelli - "Located inside Travis Perkins" - should it be mapped as a separate shop then?
|
||||
'sklavenitis_gr', # https://github.com/alltheplaces/alltheplaces/pull/11904
|
||||
'halia_baluvana_ua', # https://github.com/alltheplaces/alltheplaces/issues/11902
|
||||
'spring_market_us', # https://github.com/alltheplaces/alltheplaces/issues/11903
|
||||
'tesla', # https://github.com/alltheplaces/alltheplaces/issues/11711
|
||||
'oscar_wylee', # https://github.com/alltheplaces/alltheplaces/issues/11862
|
||||
'okta_mk', # https://github.com/alltheplaces/alltheplaces/pull/11899
|
||||
'dierbergs', # https://github.com/alltheplaces/alltheplaces/pull/11898
|
||||
'mirabito_us', # https://github.com/alltheplaces/alltheplaces/issues/11900
|
||||
|
||||
# https://github.com/alltheplaces/alltheplaces/issues/11712
|
||||
# non-string values
|
||||
'mochachos',
|
||||
'eathappy', # https://github.com/alltheplaces/alltheplaces/pull/11137 - author notified
|
||||
'eathappy', # https://github.com/alltheplaces/alltheplaces/pull/11137 - author notified
|
||||
'indigo',
|
||||
'opendata_mos_hotels_ru',
|
||||
'marriott_hotels',
|
||||
|
@ -822,17 +839,17 @@ def ignored_atp_codes():
|
|||
# after merge or fix move to
|
||||
# 14_generate_atp_issue_reports_about_poorly_matched_entries.py
|
||||
# until lists are regenerated
|
||||
'kafkas_gr', # Kafkas vs ΚΑΥΚΑΣ, electrical vs electronic... https://github.com/alltheplaces/alltheplaces/issues/11041
|
||||
'hirebase_gb', # ask people is separate mapping like https://www.openstreetmap.org/node/8948452036 preferable
|
||||
'revolution_laundry', # is https://stores.revolution-laundry.com/fr-fr/france-fra/saint-honore-les-bains/laverie-revolution-laundry-89271199 shop=laundry self_service=yes ? Or some type of vending machine?
|
||||
'waynes_coffee', # https://www.openstreetmap.org/note/4476353
|
||||
'penske', # truck rental mapped as shop=rental, in OSM it seems to be mapped as amenity=car_rental
|
||||
'kafkas_gr', # Kafkas vs ΚΑΥΚΑΣ, electrical vs electronic... https://github.com/alltheplaces/alltheplaces/issues/11041
|
||||
'hirebase_gb', # ask people is separate mapping like https://www.openstreetmap.org/node/8948452036 preferable
|
||||
'revolution_laundry', # is https://stores.revolution-laundry.com/fr-fr/france-fra/saint-honore-les-bains/laverie-revolution-laundry-89271199 shop=laundry self_service=yes ? Or some type of vending machine?
|
||||
'waynes_coffee', # https://www.openstreetmap.org/note/4476353
|
||||
'penske', # truck rental mapped as shop=rental, in OSM it seems to be mapped as amenity=car_rental
|
||||
|
||||
# why not detected in 14? Not enough pharmacies?
|
||||
'dia_es', # https://github.com/alltheplaces/alltheplaces/issues/11253
|
||||
'dia_es', # https://github.com/alltheplaces/alltheplaces/issues/11253
|
||||
|
||||
'dodo_pizza', # https://github.com/alltheplaces/alltheplaces/issues/11066
|
||||
'van_wall_us', # https://github.com/alltheplaces/alltheplaces/issues/11071
|
||||
'dodo_pizza', # https://github.com/alltheplaces/alltheplaces/issues/11066
|
||||
'van_wall_us', # https://github.com/alltheplaces/alltheplaces/issues/11071
|
||||
|
||||
# TODO: likely needs different alphabet, remove here and rerun
|
||||
# 14_generate_atp_issue_reports_about_poorly_matched_entries.py
|
||||
|
@ -841,30 +858,30 @@ def ignored_atp_codes():
|
|||
|
||||
# remove and run following code to explore
|
||||
# 14_generate_atp_issue_reports_about_poorly_matched_entries.py
|
||||
'longchamp_eu', # require investigating proper shop value
|
||||
'longchamp_eu', # require investigating proper shop value
|
||||
|
||||
'mikucha_th', # claimed website tags seem to be 404ing
|
||||
'patisserie_valerie_gb', # at least atp_ref = 1057031 has bad website tag - links mall
|
||||
'right_at_home_gb', # reinvestigate tagging
|
||||
'crocs_es', # is it even listing standalone shops? From quick check: no
|
||||
'super_jimat_my', # confusing mix of two brands
|
||||
'sushi_express_tw', # poor match, foreign letters, not investigated further
|
||||
'la_vie_claire_fr', # in OSM seems listed as shop=convenience/supermarket
|
||||
'united_surgical_partners_international', # seems to miss actual names
|
||||
'asics_eu', # data looks dubious, very poor matching
|
||||
'boscovs_us', # locations mismatch locations on website - recheck in some time
|
||||
'landi_ch', # OSM IRC: 'Landi are a weird category of shops, supermarket is overstating it, country_store is not entirely true as Landi also sells food like a convenience store…' - see also https://github.com/alltheplaces/alltheplaces/pull/11097
|
||||
'mikucha_th', # claimed website tags seem to be 404ing
|
||||
'patisserie_valerie_gb', # at least atp_ref = 1057031 has bad website tag - links mall
|
||||
'right_at_home_gb', # reinvestigate tagging
|
||||
'crocs_es', # is it even listing standalone shops? From quick check: no
|
||||
'super_jimat_my', # confusing mix of two brands
|
||||
'sushi_express_tw', # poor match, foreign letters, not investigated further
|
||||
'la_vie_claire_fr', # in OSM seems listed as shop=convenience/supermarket
|
||||
'united_surgical_partners_international', # seems to miss actual names
|
||||
'asics_eu', # data looks dubious, very poor matching
|
||||
'boscovs_us', # locations mismatch locations on website - recheck in some time
|
||||
'landi_ch', # OSM IRC: 'Landi are a weird category of shops, supermarket is overstating it, country_store is not entirely true as Landi also sells food like a convenience store…' - see also https://github.com/alltheplaces/alltheplaces/pull/11097
|
||||
'soeder_ch', # https://github.com/alltheplaces/alltheplaces/issues/10860
|
||||
'coop_se', # https://github.com/alltheplaces/alltheplaces/issues/10890
|
||||
'brookshires_us', # https://github.com/alltheplaces/alltheplaces/pull/11065#pullrequestreview-2369249680
|
||||
'brookshires_us', # https://github.com/alltheplaces/alltheplaces/pull/11065#pullrequestreview-2369249680
|
||||
'moneygram', # https://github.com/alltheplaces/alltheplaces/issues/6784
|
||||
'bricofer_it', # https://community.openstreetmap.org/t/italia-aggiunta-al-matcher-sperimentale-di-all-the-places/117208/8
|
||||
'ocharleys_us', # https://github.com/alltheplaces/alltheplaces/issues/10995
|
||||
'pizza_express_gb', # https://github.com/alltheplaces/alltheplaces/issues/11007
|
||||
'dhl_express_us_ca', # https://github.com/alltheplaces/alltheplaces/issues/11009
|
||||
'ocharleys_us', # https://github.com/alltheplaces/alltheplaces/issues/10995
|
||||
'pizza_express_gb', # https://github.com/alltheplaces/alltheplaces/issues/11007
|
||||
'dhl_express_us_ca', # https://github.com/alltheplaces/alltheplaces/issues/11009
|
||||
"chorten_pl", # not actually branded, huge offsets
|
||||
# https://www.openstreetmap.org/?mlat=50.09744263&mlon=19.99892616#map=19/50.09744/19.99893 https://www.openstreetmap.org/node/1948024743 - 240m offset from its stated address
|
||||
'day_today_gb', # about 10% are outdated entries - see https://github.com/alltheplaces/alltheplaces/discussions/10941#discussioncomment-10883201 and https://github.com/alltheplaces/alltheplaces/pull/10974
|
||||
# https://www.openstreetmap.org/?mlat=50.09744263&mlon=19.99892616#map=19/50.09744/19.99893 https://www.openstreetmap.org/node/1948024743 - 240m offset from its stated address
|
||||
'day_today_gb', # about 10% are outdated entries - see https://github.com/alltheplaces/alltheplaces/discussions/10941#discussioncomment-10883201 and https://github.com/alltheplaces/alltheplaces/pull/10974
|
||||
"krakow_public_transport_vending_krk_pl", # requires matching by object type
|
||||
"abc_pl", # cannot be assumed to have brands, effectively dropped from ATP by qa.py anyway (once ATP branch vs name is solved it can be used again)
|
||||
'cukiernia_sowa_pl', # often amenity=cafe, not sure how to handle
|
||||
|
@ -873,19 +890,19 @@ def ignored_atp_codes():
|
|||
"nsw_ambulance_au", # no brand, not sure is brand applying to those...
|
||||
"victorian_government_road_safety_cameras_au", # no brand and it is really correct
|
||||
'mall_maverick', # suspect and low quality data, anyway has no brand fields
|
||||
'opel_rent_de', # looks like an aspect of a car rental place
|
||||
'opel_rent_de', # looks like an aspect of a car rental place
|
||||
|
||||
'asian_paints_beautiful_homes_in', # nonsense locations in ocean, no trust in other ones
|
||||
'kfc_hk', # bogus locations
|
||||
'botteg_aveneta', # Heathrow Airport Terminal 4, Hounslow, Middlesex put into central London, under London node (atp_ref = 60226)
|
||||
"forever_new_in", # I see location in China...
|
||||
'asian_paints_beautiful_homes_in', # nonsense locations in ocean, no trust in other ones
|
||||
'kfc_hk', # bogus locations
|
||||
'botteg_aveneta', # Heathrow Airport Terminal 4, Hounslow, Middlesex put into central London, under London node (atp_ref = 60226)
|
||||
"forever_new_in", # I see location in China...
|
||||
|
||||
'cvs_us', # very confusing data format
|
||||
# store_type = MinuteClinic ?
|
||||
# why pharmacy has supermarket?
|
||||
# and their website is blocked for me in Poland, I get https://www.cvs.com/international.html
|
||||
|
||||
'trade_point_gb', # this spider lists counters in actual shops, rather than standalone shops, has confusing store_type field
|
||||
'trade_point_gb', # this spider lists counters in actual shops, rather than standalone shops, has confusing store_type field
|
||||
|
||||
# see also https://github.com/osmlab/name-suggestion-index/issues/10028
|
||||
|
||||
|
@ -898,8 +915,8 @@ def ignored_atp_codes():
|
|||
|
||||
# code author got notified
|
||||
# Cj-Malone
|
||||
'gbfs', # https://github.com/alltheplaces/alltheplaces/issues/11008
|
||||
'leon', # https://github.com/alltheplaces/alltheplaces/issues/11952
|
||||
'gbfs', # https://github.com/alltheplaces/alltheplaces/issues/11008
|
||||
'leon', # https://github.com/alltheplaces/alltheplaces/issues/11952
|
||||
|
||||
# car shops
|
||||
# note: qa.py started throwing out all shop=car and shop=car_repair in general
|
||||
|
@ -908,25 +925,26 @@ def ignored_atp_codes():
|
|||
'renault', # https://github.com/alltheplaces/alltheplaces/issues/10244
|
||||
'kia', # show only aspect of dealer, see say https://www.openstreetmap.org/way/400059648 where they have multiple brands
|
||||
'mercedes_benz_group', # the same, see atp_ref = GS0008313 - in such case https://www.openstreetmap.org/way/639491972 should not get website = http://www.mercedes-benz-ibach.ch/
|
||||
'hyundai_no', # again, listing multi-brand car repair locations as Hyundai-specific
|
||||
'hyundai_no', # again, listing multi-brand car repair locations as Hyundai-specific
|
||||
# see https://www.openstreetmap.org/node/10199475251 https://www.hyundai.com/no/no/kjop/bil/forhandlere/fredrikstad.html
|
||||
'hyundai_de',
|
||||
'hyundai_us',
|
||||
|
||||
'super_dekk_no', # aspect of car_repair shops(s), dubious that it should be mapped separately
|
||||
'super_dekk_no', # aspect of car_repair shops(s), dubious that it should be mapped separately
|
||||
|
||||
# unclear licensing situation
|
||||
# see https://github.com/alltheplaces/alltheplaces/issues/8790
|
||||
'cbrfree_au', 'james_retail_gb', 'queensland_government_road_safety_cameras_au',
|
||||
'terrible_herbst', 'thales_fr',
|
||||
'worldcat', # https://github.com/alltheplaces/alltheplaces/pull/10923#issuecomment-2397362271
|
||||
'worldcat', # https://github.com/alltheplaces/alltheplaces/pull/10923#issuecomment-2397362271
|
||||
|
||||
]
|
||||
|
||||
def processing_plan(): # TODO remove this vestigal function
|
||||
|
||||
def processing_plan(): # TODO remove this vestigal function
|
||||
returned = {}
|
||||
known_data = shared.country_data()
|
||||
requested_codes = os.getenv("OSM_ATM_MATCHER_COUNTRY_CODE_LIST").split(",") # TODO - this is surely not used anymore? and can be removed from .env ?
|
||||
requested_codes = os.getenv("OSM_ATM_MATCHER_COUNTRY_CODE_LIST").split(",") # TODO - this is surely not used anymore? and can be removed from .env ?
|
||||
for code in requested_codes:
|
||||
for name, data in known_data.items():
|
||||
if data['country_code'] == code:
|
||||
|
@ -941,13 +959,16 @@ def good_match_distance_in_kilometers():
|
|||
def maximum_missing_shop_distance_in_kilometers():
|
||||
return 0.9
|
||||
|
||||
|
||||
def default_missing_shop_distance_in_kilometers():
|
||||
return maximum_missing_shop_distance_in_kilometers() / 3
|
||||
|
||||
|
||||
def increased_missing_shop_distance_in_kilometers():
|
||||
# for say supermarkets
|
||||
return maximum_missing_shop_distance_in_kilometers()
|
||||
|
||||
|
||||
def missing_shop_distance_in_kilometers_for_specific_case(object_tags, spider_code=None):
|
||||
if spider_code == None:
|
||||
spider_code = object_tags["@spider"]
|
||||
|
@ -977,7 +998,7 @@ def the_same_feature_type(tags_a, tags_b):
|
|||
values.sort()
|
||||
conflict = values[0] + " vs " + values[1]
|
||||
if conflict in matching_rather_than_type_conflict():
|
||||
return True # dubious conflict, lets report a match
|
||||
return True # dubious conflict, lets report a match
|
||||
if conflict not in clear_type_conflicts() and conflict not in undecided_type_conflicts():
|
||||
name_a = tags_a.get("name", None)
|
||||
if name_a == None:
|
||||
|
@ -1011,6 +1032,7 @@ def the_same_feature_type(tags_a, tags_b):
|
|||
raise
|
||||
return False
|
||||
|
||||
|
||||
def undecided_type_conflicts():
|
||||
# TODO: put them into clear_type_conflicts() or into matching_rather_than_type_conflict()
|
||||
return [
|
||||
|
@ -1038,6 +1060,7 @@ def undecided_type_conflicts():
|
|||
'amenity=pharmacy vs healthcare=audiologist',
|
||||
]
|
||||
|
||||
|
||||
def clear_type_conflicts():
|
||||
"""
|
||||
to help detecting dubious conflicts
|
||||
|
@ -3686,6 +3709,7 @@ def clear_type_conflicts():
|
|||
# TODO - autogenerate more of these?
|
||||
]
|
||||
|
||||
|
||||
def matching_rather_than_type_conflict():
|
||||
return [
|
||||
'shop=chocolate vs sweet bakery',
|
||||
|
@ -3725,14 +3749,15 @@ def matching_rather_than_type_conflict():
|
|||
'shop=gift vs shop=stationery',
|
||||
'office=accountant vs office=consulting',
|
||||
'office=accountant vs office=tax_advisor',
|
||||
'shop=electrical vs shop=electronics', # maybe even merge into one group
|
||||
'shop=electrical vs shop=electronics', # maybe even merge into one group
|
||||
]
|
||||
|
||||
|
||||
def canonical_feature(object_tags):
|
||||
# TODO: maybe should match
|
||||
# shop=car vs shop=motorcycle - wait, shop=car is banned, right? what about shop=motorcycle
|
||||
# amenity=bank vs office=financial
|
||||
# amenity=bank vs amenity=money_transfer
|
||||
|
||||
"""
|
||||
return string to allow comparing object types
|
||||
|
||||
|
@ -3819,23 +3844,29 @@ def return_info_about_spider_to_debug_it(atp_code):
|
|||
returned += atp_unpacked_folder() + atp_code + '.geojson'
|
||||
return returned
|
||||
|
||||
|
||||
def link_to_spider(atp_code):
|
||||
# TODO This assumption that atp_code matches filename cannot be really made here
|
||||
# TODO see https://github.com/alltheplaces/alltheplaces/issues/9687
|
||||
# TODO though as a hackinsh solution that mostly works, it actually works fine
|
||||
return "locations/spiders/" + atp_code + ".py"
|
||||
|
||||
|
||||
def get_github_link_to_spider(atp_code):
|
||||
return "https://github.com/alltheplaces/alltheplaces/blob/master/" + link_to_spider(atp_code)
|
||||
|
||||
|
||||
def linkified_markdown_atp(atp_code):
|
||||
return "[" + atp_code + "](" + get_github_link_to_spider(atp_code) + ")"
|
||||
|
||||
|
||||
def repo_location_atp():
|
||||
return "../___other/alltheplaces"
|
||||
|
||||
|
||||
def repo_location_nsi():
|
||||
return "../___other/name-suggestion-index"
|
||||
|
||||
|
||||
def git_user_credit_in_commits():
|
||||
return os.getenv("GIT_USER_IN_CREDITS_OPTIONAL_VALUE")
|
||||
|
|
|
@ -7,6 +7,7 @@ import dulwich
|
|||
import datetime
|
||||
config = __import__("0_config")
|
||||
|
||||
|
||||
def main():
|
||||
repo_url = "https://github.com/alltheplaces/alltheplaces"
|
||||
print(repo_url)
|
||||
|
@ -28,6 +29,7 @@ def main():
|
|||
get_get_and_or_update_repository(repo_url, repo_path)
|
||||
repository_tag_list(repo_path)
|
||||
|
||||
|
||||
def get_get_and_or_update_repository(repo_url, repo_path):
|
||||
try:
|
||||
porcelain.clone(repo_url, repo_path)
|
||||
|
@ -36,6 +38,7 @@ def get_get_and_or_update_repository(repo_url, repo_path):
|
|||
repo = Repo(repo_path)
|
||||
pull(repo, repo_url)
|
||||
|
||||
|
||||
def repository_tag_list(repo_path):
|
||||
repo = Repo(repo_path)
|
||||
|
||||
|
@ -60,4 +63,5 @@ def repository_tag_list(repo_path):
|
|||
tag_time_readable = datetime.datetime.utcfromtimestamp(tag_time).strftime('%Y-%m-%d %H:%M:%S')
|
||||
print(f"Tag: {tag_name}, Added on: {tag_time_readable}")
|
||||
|
||||
|
||||
main()
|
||||
|
|
|
@ -8,17 +8,17 @@ obtain_atp_data = __import__("2_obtain_atp_data")
|
|||
graticule_report = __import__("5_generate_graticule_reports")
|
||||
|
||||
|
||||
raise Exception("payment:mastercard_electronic - investigate") # TODO - investigate this ATP tagging issue
|
||||
raise Exception("mpesa discussed in https://github.com/alltheplaces/alltheplaces/commit/53b11551a30d16ccc4d16658b7b61bfbf66fe87c#r151203743") # TODO - investigate this ATP tagging issue
|
||||
raise Exception("fuel:electricity - https://wiki.openstreetmap.org/wiki/Talk:Key:fuel:*#fuel%3Aelectricity https://github.com/alltheplaces/alltheplaces/pull/11934#issuecomment-2585062788 ")# TODO - investigate this ATP tagging issue
|
||||
raise Exception("payment:mastercard_electronic - investigate") # TODO - investigate this ATP tagging issue
|
||||
raise Exception("mpesa discussed in https://github.com/alltheplaces/alltheplaces/commit/53b11551a30d16ccc4d16658b7b61bfbf66fe87c#r151203743") # TODO - investigate this ATP tagging issue
|
||||
raise Exception("fuel:electricity - https://wiki.openstreetmap.org/wiki/Talk:Key:fuel:*#fuel%3Aelectricity https://github.com/alltheplaces/alltheplaces/pull/11934#issuecomment-2585062788 ") # TODO - investigate this ATP tagging issue
|
||||
raise Exception("""rent:lpg_bottles
|
||||
|
||||
asked on IRC:
|
||||
|
||||
Does anyone knows how to tag that place (such as fuel station) allows rental/refilling of LPG bottles? I see https://taginfo.openstreetmap.org/keys/rent%3Alpg_bottles but looks like import from chronology
|
||||
|
||||
https://wiki.openstreetmap.org/w/index.php?search=Key%3Arent%3Alpg_bottles&title=Special%3ASearch&profile=default&fulltext=1""")# TODO - investigate this ATP tagging issue
|
||||
raise Exception("is there still brand_wikidata in published ATP after https://github.com/alltheplaces/alltheplaces/pull/11938 ?")# TODO - investigate this ATP tagging issue
|
||||
https://wiki.openstreetmap.org/w/index.php?search=Key%3Arent%3Alpg_bottles&title=Special%3ASearch&profile=default&fulltext=1""") # TODO - investigate this ATP tagging issue
|
||||
raise Exception("is there still brand_wikidata in published ATP after https://github.com/alltheplaces/alltheplaces/pull/11938 ?") # TODO - investigate this ATP tagging issue
|
||||
|
||||
raise Exception("name:al - https://github.com/alltheplaces/alltheplaces/pull/11939 - merged")
|
||||
raise Exception("https://github.com/alltheplaces/alltheplaces/pull/11935 - merged")
|
||||
|
@ -27,6 +27,7 @@ raise Exception("https://github.com/alltheplaces/alltheplaces/pull/11934 (for fu
|
|||
raise Exception("secondary - https://github.com/alltheplaces/alltheplaces/pull/11940 - waiting")
|
||||
raise Exception("https://github.com/alltheplaces/alltheplaces/pull/11936 - waiting - about urgent_care - see healthcare:speciality=urgent at https://wiki.openstreetmap.org/wiki/Key:healthcare:speciality and https://wiki.openstreetmap.org/wiki/Proposal:Urgent_care")
|
||||
|
||||
|
||||
def collect_data():
|
||||
"""
|
||||
list undocumented tags being used by ATP and not listed as dubious/skipped/supressed
|
||||
|
@ -69,18 +70,20 @@ def collect_data():
|
|||
used_tags[key][value] += 1
|
||||
return used_keys, used_tags
|
||||
|
||||
|
||||
def is_freeform_key(key):
|
||||
if tag_knowledge.is_freeform_key(key):
|
||||
return True
|
||||
if key in config.keys_with_value_link():
|
||||
return True
|
||||
if key in ['charging_station:output', 'socket:type2_combo'] or key in ["contact:sms", "ref:branch", "website:fr", "website:en", "website:de", "website:it", 'website:orders', "name:zh-Hans", "branch:ar", "addr:full:en", "addr:full:ar", 'addr:province', # TODO, move info about OSM tags upstream
|
||||
"directions", # ATPism but looks fairly reasonable under ATYL
|
||||
"@source_uri", # ATP-specific
|
||||
]:
|
||||
"directions", # ATPism but looks fairly reasonable under ATYL
|
||||
"@source_uri", # ATP-specific
|
||||
]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
supress_atpisms = []
|
||||
used_tags_listing = ""
|
||||
|
|
|
@ -1,17 +1,19 @@
|
|||
import re
|
||||
import qa_autofix_atp
|
||||
import shared
|
||||
import matcher
|
||||
import json
|
||||
import os
|
||||
import rich
|
||||
obtain_atp_data = __import__("2_obtain_atp_data")
|
||||
import matcher
|
||||
qa = __import__("qa")
|
||||
config = __import__("0_config")
|
||||
import shared
|
||||
import qa_autofix_atp
|
||||
import re
|
||||
|
||||
|
||||
def threshold(key):
|
||||
return 100
|
||||
|
||||
|
||||
def expected_unique_keys():
|
||||
returned = ['branch', 'image', 'phone', 'contact:facebook', 'contact:youtube', 'contact:yelp', 'contact:twitter', 'contact:instagram', 'contact:linkedin', 'contact:tiktok', 'contact:tripadvisor', 'email', 'website', 'website:en', 'website:fr', 'website:de', 'website:kr', 'website:cn', 'operator:facebook', 'operator:twitter', 'website:menu', 'website:orders']
|
||||
for code in shared.valid_country_codes():
|
||||
|
@ -19,6 +21,7 @@ def expected_unique_keys():
|
|||
returned.append("website:" + code)
|
||||
return returned
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
loads ATP files directly and detects some systematic issues
|
||||
|
@ -44,6 +47,7 @@ def main():
|
|||
reports = process_atp(atp_code, reports)
|
||||
show_reports(reports)
|
||||
|
||||
|
||||
def show_reports(reports):
|
||||
#rich.print(reports['repeated_machine_readable_for_config_updates']) # TODO consider listing them?
|
||||
for bad_image in reports['repeated_machine_readable_for_config_updates']['image']:
|
||||
|
@ -58,7 +62,7 @@ def show_reports(reports):
|
|||
print("Maybe `image` or `phone` and similar tags that is repeated over 10 times should be thrown out automatically? Without throwing it out manually by changing spider?\n\nNote that in case where former logos are replaced by actual images they would not continue to be thrown out.")
|
||||
for key in reports:
|
||||
banned = ['repeated_machine_readable_for_config_updates', 'repeated_for_atp_issue_tracker']
|
||||
banned.append('whitespace_suffix_or_prefix_report') # see https://github.com/alltheplaces/alltheplaces/issues/11790
|
||||
banned.append('whitespace_suffix_or_prefix_report') # see https://github.com/alltheplaces/alltheplaces/issues/11790
|
||||
if config.allow_low_priority_atp_logging():
|
||||
raise Exception("should I reeenable whitespace checks?")
|
||||
if key not in banned:
|
||||
|
@ -92,6 +96,7 @@ def record_bad_entry(key, value, counter_dict, examples_dict_of_lists):
|
|||
examples_dict_of_lists[key] = []
|
||||
examples_dict_of_lists[key].append(value)
|
||||
|
||||
|
||||
def this_addr_full_is_like_street_address(value):
|
||||
if qa.is_this_address_suspiciously_short_for_addr_full(value) == False:
|
||||
return False
|
||||
|
@ -101,7 +106,8 @@ def this_addr_full_is_like_street_address(value):
|
|||
has_number = True
|
||||
if has_number:
|
||||
return True
|
||||
return False # `addr:full=France` is a problem but a different one
|
||||
return False # `addr:full=France` is a problem but a different one
|
||||
|
||||
|
||||
def check_for_problems(reports, atp_data, atp_code):
|
||||
repeated_key_check = {}
|
||||
|
@ -157,7 +163,7 @@ def check_for_problems(reports, atp_data, atp_code):
|
|||
record_bad_entry(key='addr:full', value=value, counter_dict=full_address_without_such_suspicion, examples_dict_of_lists=full_address_without_such_suspicion_value_examples)
|
||||
else:
|
||||
record_bad_entry(key='addr:full', value=value, counter_dict=full_address_with_unclassified_state, examples_dict_of_lists=full_address_with_unclassified_state_value_examples)
|
||||
for key in repeated_key_check: # some values, like image=* are expected to be unique
|
||||
for key in repeated_key_check: # some values, like image=* are expected to be unique
|
||||
if key in tags:
|
||||
value = tags[key]
|
||||
try:
|
||||
|
@ -185,20 +191,20 @@ def check_for_problems(reports, atp_data, atp_code):
|
|||
continue
|
||||
reports["repeated_for_atp_issue_tracker"][key] += "* [ ] " + key + " = " + value + " repeated " + str(repeated_key_check[key][value]) + " times in " + config.linkified_markdown_atp(atp_code) + "\n"
|
||||
for key, count in keys_with_whitespace_suffix_or_prefix.items():
|
||||
if count > threshold(key)/20: # really blatantly wrong
|
||||
if count > threshold(key)/20: # really blatantly wrong
|
||||
reports['whitespace_suffix_or_prefix_report'] += "* [ ] " + config.linkified_markdown_atp(atp_code) + " " + str(count) + " times has whitespace prefix/suffix, for " + key + " with values such as `" + whitespace_suffix_or_prefix_example[key] + "`\n"
|
||||
for key, count in none_value.items():
|
||||
if count > 0: # an obvious problem, always problematic
|
||||
if count > 0: # an obvious problem, always problematic
|
||||
reports['none_value_report'] += "* [ ] " + config.linkified_markdown_atp(atp_code) + " " + str(count) + " times has None value, for example for" + key + "`\n"
|
||||
for key, count in numeric_not_string_value.items():
|
||||
if count > 0: # an obvious problem, always problematic
|
||||
if count > 0: # an obvious problem, always problematic
|
||||
reports['numeric_not_string_value_report'] += "* [ ] " + config.linkified_markdown_atp(atp_code) + " " + str(count) + " times has non-string values (numbers/booleans etc), for " + key + " with values such as `" + str(numeric_not_string_value_example[key]) + "`\n"
|
||||
for key, count in suspiciously_street_address_like_full_address.items():
|
||||
if count == 0:
|
||||
continue
|
||||
nonsuspicious_count = full_address_without_such_suspicion.get(key, 0)
|
||||
example_value = suspiciously_street_address_like_full_address_value_examples[key][0]
|
||||
if count > nonsuspicious_count * 10: # of high importance for me, it breaks geocoding but many spiders have a very occasional breakage so not every single one should end there
|
||||
if count > nonsuspicious_count * 10: # of high importance for me, it breaks geocoding but many spiders have a very occasional breakage so not every single one should end there
|
||||
# see https://github.com/alltheplaces/alltheplaces/issues/11797
|
||||
# does not make sense to report what appears only rarely, unless these reports are acted on
|
||||
reports['suspiciously_short_addr:full'] += "* [ ] " + config.linkified_markdown_atp(atp_code) + " " + str(count) + " times has suspiciously short values, for " + key + " with example value `" + example_value + "`\n"
|
||||
|
@ -222,6 +228,7 @@ def check_for_problems(reports, atp_data, atp_code):
|
|||
)
|
||||
return reports
|
||||
|
||||
|
||||
def show_addr_values(atp_code, key, suspiciously_street_address_like_full_address_value_examples, full_address_without_such_suspicion_value_examples, full_address_with_unclassified_state_value_examples):
|
||||
print()
|
||||
print()
|
||||
|
@ -233,4 +240,5 @@ def show_addr_values(atp_code, key, suspiciously_street_address_like_full_addres
|
|||
print("unclassified")
|
||||
print(full_address_with_unclassified_state_value_examples.get(key))
|
||||
|
||||
|
||||
main()
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import os
|
||||
import serializing
|
||||
import show_data
|
||||
graticule_report = __import__("5_generate_graticule_reports")
|
||||
obtain_atp_data = __import__("2_obtain_atp_data")
|
||||
config = __import__("0_config")
|
||||
import show_data
|
||||
import serializing
|
||||
import os
|
||||
|
||||
|
||||
def multiplier(atp_code):
|
||||
for part in atp_code.split("_"):
|
||||
|
@ -13,24 +14,26 @@ def multiplier(atp_code):
|
|||
return 10
|
||||
return 1
|
||||
|
||||
|
||||
def should_be_shown(atp_code, failed_matches, matched, cumulated_likelyhood_of_missing_from_osm):
|
||||
if failed_matches + matched < 20:
|
||||
return False # TODO, review also that
|
||||
return False # TODO, review also that
|
||||
if failed_matches > cumulated_likelyhood_of_missing_from_osm + matched * 10 * multiplier(atp_code):
|
||||
# TODO lower requirements here a bit
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def ignored_atp_codes():
|
||||
return [
|
||||
'mcdonalds_eg', # TODO delete on rerun
|
||||
'mcdonalds_eg', # TODO delete on rerun
|
||||
|
||||
# reportedly mapped wrong in OSM
|
||||
'unicredit_bulbank_bg', # https://github.com/alltheplaces/alltheplaces/pull/11901#issuecomment-2577769704
|
||||
'unicredit_bulbank_bg', # https://github.com/alltheplaces/alltheplaces/pull/11901#issuecomment-2577769704
|
||||
|
||||
# seems simply not mapped in OSM
|
||||
'dhl_express_es', # though not sure what is going on
|
||||
'pk_equipment', # https://www.openstreetmap.org/note/4581845
|
||||
'dhl_express_es', # though not sure what is going on
|
||||
'pk_equipment', # https://www.openstreetmap.org/note/4581845
|
||||
'burger_king_eg',
|
||||
'gap_trade_gb',
|
||||
'mercy',
|
||||
|
@ -51,7 +54,7 @@ def ignored_atp_codes():
|
|||
'oggi_sorvetes_br',
|
||||
'digi_telecommunications_my',
|
||||
'agnvet_au',
|
||||
'removery_us', # though also location accuracy is dubious
|
||||
'removery_us', # though also location accuracy is dubious
|
||||
'go_games_toys_us', # though also location accuracy is dubious
|
||||
'loxam_fr',
|
||||
'sunbelt_rentals_us_ca',
|
||||
|
@ -60,9 +63,9 @@ def ignored_atp_codes():
|
|||
'glasfit_za',
|
||||
'mussala_bg',
|
||||
'kpmg',
|
||||
'hifi_corp', # also South Africa :)
|
||||
'easybox_bg', # and they seem to use English language brand - https://sameday.bg/easybox/
|
||||
'cashterminal_bg', # and they seem to use English language brand
|
||||
'hifi_corp', # also South Africa :)
|
||||
'easybox_bg', # and they seem to use English language brand - https://sameday.bg/easybox/
|
||||
'cashterminal_bg', # and they seem to use English language brand
|
||||
'manhattan_bagel',
|
||||
'rogers_communications',
|
||||
'completude',
|
||||
|
@ -71,7 +74,7 @@ def ignored_atp_codes():
|
|||
'euromobil_nl',
|
||||
'side_step',
|
||||
'billini_au',
|
||||
'asics_us', # though also location accuracy is dubious
|
||||
'asics_us', # though also location accuracy is dubious
|
||||
'tui',
|
||||
'dunkin_sa',
|
||||
'reebok',
|
||||
|
@ -117,6 +120,7 @@ def ignored_atp_codes():
|
|||
'myhouse_au',
|
||||
]
|
||||
|
||||
|
||||
def how_likely_that_it_is_not_mapped(entry):
|
||||
"""
|
||||
this exists to reduce false positive ratio, and to guess cases which are likely simply not mapped
|
||||
|
@ -174,17 +178,19 @@ def how_likely_that_it_is_not_mapped(entry):
|
|||
if entry.atp_center['lon'] > 40:
|
||||
# far Asia is even less mapped then Europe
|
||||
probability += 0.1
|
||||
probability = probability * 1.1 # reduce to further rescale, TODO_LOW_PRIORITY
|
||||
probability = probability * 1.1 # reduce to further rescale, TODO_LOW_PRIORITY
|
||||
if probability > 1:
|
||||
probability = 1
|
||||
return probability
|
||||
|
||||
|
||||
def nothing_to_report_marker_filepath(atp_code):
|
||||
folder = config.build_storage_folder() + "validation_runs/"
|
||||
if os.path.isdir(folder) == False:
|
||||
os.makedirs(folder)
|
||||
return folder + "systematic_mismatch_not_triggered_" + atp_code + ".success"
|
||||
|
||||
|
||||
area = graticule_report.global_graticule_coverage()
|
||||
skipped_as_on_ignore_list_or_empty = []
|
||||
for atp_code in obtain_atp_data.all_spider_codes_iterator():
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
import webbrowser
|
||||
import os
|
||||
import serializing
|
||||
import show_data
|
||||
graticule_report = __import__("5_generate_graticule_reports")
|
||||
obtain_atp_data = __import__("2_obtain_atp_data")
|
||||
config = __import__("0_config")
|
||||
import show_data
|
||||
import serializing
|
||||
import os
|
||||
import webbrowser
|
||||
|
||||
|
||||
def main():
|
||||
shown_spiders = []
|
||||
|
@ -16,7 +17,7 @@ def main():
|
|||
area = graticule_report.global_graticule_coverage()
|
||||
for atp_code in obtain_atp_data.all_spider_codes_iterator():
|
||||
if atp_code in [
|
||||
]:
|
||||
]:
|
||||
continue
|
||||
great_brand_match = 0
|
||||
poor_brand_match = 0
|
||||
|
@ -68,4 +69,5 @@ def main():
|
|||
if len(shown_spiders) >= 30 and len(shown_spiders) % 10 == 0:
|
||||
print(shown_spiders)
|
||||
|
||||
|
||||
main()
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
obtain_atp_data = __import__("2_obtain_atp_data")
|
||||
import qa
|
||||
obtain_atp_data = __import__("2_obtain_atp_data")
|
||||
|
||||
|
||||
def main():
|
||||
for atp_code, parsed_content in obtain_atp_data.spider_codes_iterator_with_data():
|
||||
|
@ -9,5 +10,6 @@ def main():
|
|||
continue
|
||||
qa.remove_bad_data(entry['properties'], atp_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -1,27 +1,30 @@
|
|||
import matcher
|
||||
import rich
|
||||
import diskcache
|
||||
import requests
|
||||
import os
|
||||
import serializing
|
||||
import show_data
|
||||
graticule_report = __import__("5_generate_graticule_reports")
|
||||
obtain_atp_data = __import__("2_obtain_atp_data")
|
||||
config = __import__("0_config")
|
||||
import show_data
|
||||
import serializing
|
||||
import os
|
||||
import requests
|
||||
import diskcache
|
||||
import rich
|
||||
import matcher
|
||||
|
||||
# mismatches seem to be caused by
|
||||
# duplicated wikidata entries (wikidata problem)
|
||||
# editors changing just name tag, leaving brand, brand:wikidata etc in an inconsistent state
|
||||
# - see https://github.com/orgs/organicmaps/discussions/10043 where I started attempt to report this bug
|
||||
|
||||
|
||||
def cache_path():
|
||||
return config.cache_folder() + 'wikidata_cache'
|
||||
|
||||
|
||||
wikidata_cache = diskcache.Cache(cache_path(), eviction_policy="none")
|
||||
print(len(wikidata_cache), "entries cached by Wikidata cache")
|
||||
|
||||
|
||||
def get_wikidata_label(wikidata_id):
|
||||
index = wikidata_id + "_label" # switch to using index
|
||||
index = wikidata_id + "_label" # switch to using index
|
||||
if index in wikidata_cache:
|
||||
return wikidata_cache[index]
|
||||
fetched = download_wikidata_label(wikidata_id)
|
||||
|
@ -30,6 +33,7 @@ def get_wikidata_label(wikidata_id):
|
|||
return "Label not found"
|
||||
return fetched
|
||||
|
||||
|
||||
def download_wikidata_label(wikidata_id):
|
||||
url = "https://www.wikidata.org/w/api.php"
|
||||
|
||||
|
@ -56,6 +60,7 @@ def download_wikidata_label(wikidata_id):
|
|||
else:
|
||||
raise Exception("Error querying Wikidata:", response.status_code)
|
||||
|
||||
|
||||
def get_wikidata_part_of(wikidata_id):
|
||||
index = wikidata_id + "_part_of"
|
||||
if index in wikidata_cache:
|
||||
|
@ -64,6 +69,7 @@ def get_wikidata_part_of(wikidata_id):
|
|||
wikidata_cache[index] = fetched
|
||||
return fetched
|
||||
|
||||
|
||||
def get_wikidata_owned_by(wikidata_id):
|
||||
index = wikidata_id + "_owned_by"
|
||||
if index in wikidata_cache:
|
||||
|
@ -72,6 +78,7 @@ def get_wikidata_owned_by(wikidata_id):
|
|||
wikidata_cache[index] = fetched
|
||||
return fetched
|
||||
|
||||
|
||||
def get_wikidata_parent_organization(wikidata_id):
|
||||
index = wikidata_id + "_parent_organization"
|
||||
if index in wikidata_cache:
|
||||
|
@ -122,6 +129,7 @@ def download_wikidata_property(wikidata_id, property_id):
|
|||
else:
|
||||
raise Exception("Error querying Wikidata:", response.status_code)
|
||||
|
||||
|
||||
def get_direct_parents(wikidata_id):
|
||||
all_parents = []
|
||||
all_parents += get_wikidata_part_of(wikidata_id)
|
||||
|
@ -129,6 +137,7 @@ def get_direct_parents(wikidata_id):
|
|||
all_parents += get_wikidata_parent_organization(wikidata_id)
|
||||
return all_parents
|
||||
|
||||
|
||||
def get_structure(wikidata_id):
|
||||
direct_parents = get_direct_parents(wikidata_id)
|
||||
# may have multiple steps...
|
||||
|
@ -141,6 +150,7 @@ def get_structure(wikidata_id):
|
|||
indirect += get_direct_parents(parent_id)
|
||||
return direct_parents + indirect
|
||||
|
||||
|
||||
def show_structure(wikidata_id):
|
||||
parents = get_wikidata_part_of(wikidata_id)
|
||||
if len(parents) > 0:
|
||||
|
@ -160,6 +170,7 @@ def show_structure(wikidata_id):
|
|||
for parent_wikidata_id in parents:
|
||||
print(" ", get_wikidata_label(parent_wikidata_id), parent_wikidata_id)
|
||||
|
||||
|
||||
def skipped_osm_cases():
|
||||
return [
|
||||
# fixed in OSM, TODO remove on rerun
|
||||
|
@ -170,14 +181,15 @@ def skipped_osm_cases():
|
|||
'https://www.openstreetmap.org/way/730418419',
|
||||
# ones above are fixed in OSM, TODO remove on rerun
|
||||
|
||||
'https://www.openstreetmap.org/node/11932473689', # see https://www.openstreetmap.org/changeset/151797641
|
||||
'https://www.openstreetmap.org/way/687923205', # https://www.openstreetmap.org/changeset/157497573
|
||||
'https://www.openstreetmap.org/node/11932473689', # see https://www.openstreetmap.org/changeset/151797641
|
||||
'https://www.openstreetmap.org/way/687923205', # https://www.openstreetmap.org/changeset/157497573
|
||||
]
|
||||
|
||||
|
||||
area = graticule_report.global_graticule_coverage()
|
||||
print(area)
|
||||
matching_via_parentage = 0
|
||||
for atp_code in obtain_atp_data.all_spider_codes_iterator(): # note, data is fetched from matches which may be based on a different version of ATP, with different spiders being broken or some missing/not yet present. But looking at files directly seems to be an overkill for such diagnosis tool.
|
||||
for atp_code in obtain_atp_data.all_spider_codes_iterator(): # note, data is fetched from matches which may be based on a different version of ATP, with different spiders being broken or some missing/not yet present. But looking at files directly seems to be an overkill for such diagnosis tool.
|
||||
for lat_anchor in range(area['min_lat'], area['max_lat']):
|
||||
for lon_anchor in range(area['min_lon'], area['max_lon']):
|
||||
#print(atp_code, lat_anchor, lon_anchor)
|
||||
|
@ -249,6 +261,7 @@ for atp_code in obtain_atp_data.all_spider_codes_iterator(): # note, data is fet
|
|||
rich.print(entry.atp_tags)
|
||||
print("OSM")
|
||||
rich.print(entry.osm_match_tags)
|
||||
|
||||
def package_tags_into_mock(tags):
|
||||
return {'tags': tags, 'center': {'lat': 0, 'lon': 0}, 'osm_link': 'dummy'}
|
||||
atp_data = [package_tags_into_mock(entry.atp_tags)]
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
config = __import__("0_config")
|
||||
from collections import Counter
|
||||
config = __import__("0_config")
|
||||
|
||||
|
||||
def main():
|
||||
with open(config.poi_type_conflict_list_skipping_obvious_mismatches_with_potential_false_conflicts(), 'r') as infile:
|
||||
|
@ -11,8 +12,6 @@ def main():
|
|||
print(line)
|
||||
print(len(unique_lines), "unique lines")
|
||||
|
||||
|
||||
|
||||
line_counts = Counter(lines)
|
||||
|
||||
filtered_lines = {
|
||||
|
@ -27,6 +26,7 @@ def main():
|
|||
for line, count in sorted_lines:
|
||||
print(f"\n{line.strip()}\nappears {count} times.\n")
|
||||
|
||||
|
||||
def is_conflict_handled(line):
|
||||
if line[1:-2] in config.clear_type_conflicts():
|
||||
return True
|
||||
|
@ -36,4 +36,5 @@ def is_conflict_handled(line):
|
|||
return True
|
||||
return False
|
||||
|
||||
|
||||
main()
|
||||
|
|
|
@ -1,25 +1,25 @@
|
|||
import rich
|
||||
import json
|
||||
config = __import__("0_config")
|
||||
obtain_atp_data = __import__("2_obtain_atp_data")
|
||||
import json
|
||||
import rich
|
||||
|
||||
reported = {}
|
||||
|
||||
|
||||
def log_if_unhandled_cascading_found(tags):
|
||||
shop_value = tags.get("shop")
|
||||
if shop_value in [
|
||||
"fuel", # see test_do_not_distinguish_between_various_fuel_shops
|
||||
"fuel", # see test_do_not_distinguish_between_various_fuel_shops
|
||||
]:
|
||||
return
|
||||
if shop_value == "clothes" and tags.get("clothes") in ["men", "women"]:
|
||||
# see test_do_not_use_clothes_key_for_usual_clothes_shops
|
||||
return
|
||||
if shop_value == "trade" and tags.get("trade") in [
|
||||
"electrical", "tiles", # see test_group_electrical_shops_tagged_in_a_different_way
|
||||
]:
|
||||
"electrical", "tiles", # see test_group_electrical_shops_tagged_in_a_different_way
|
||||
]:
|
||||
return
|
||||
|
||||
|
||||
if shop_value != None:
|
||||
if tags.get(shop_value) != None:
|
||||
report = "shop = " + shop_value + " " + shop_value + " = " + tags.get(shop_value)
|
||||
|
@ -28,6 +28,7 @@ def log_if_unhandled_cascading_found(tags):
|
|||
reported[report] = 0
|
||||
reported[report] += 1
|
||||
|
||||
|
||||
def main():
|
||||
for atp_code in obtain_atp_data.all_spider_codes_iterator():
|
||||
if atp_code in config.ignored_atp_codes():
|
||||
|
@ -46,4 +47,5 @@ def main():
|
|||
raise e
|
||||
rich.print(reported)
|
||||
|
||||
|
||||
main()
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
import qa
|
||||
import random
|
||||
import shared
|
||||
import matcher
|
||||
import rich
|
||||
import osm_bot_abstraction_layer.util_download_file
|
||||
import osm_bot_abstraction_layer.tag_knowledge as tag_knowledge
|
||||
|
@ -6,10 +10,7 @@ import os
|
|||
import requests
|
||||
import functools
|
||||
config = __import__("0_config")
|
||||
import matcher
|
||||
import shared
|
||||
import random
|
||||
import qa
|
||||
|
||||
|
||||
def main():
|
||||
download_entire_atp_dataset()
|
||||
|
@ -48,7 +49,7 @@ def find_missing_listing_of_commonly_shared_name_parts():
|
|||
print(part, spider_list)
|
||||
found_count += 1
|
||||
if found_count > 0:
|
||||
raise Exception("look at common_shared_name_parts()") # TODO move this verification to a separate script, I guess
|
||||
raise Exception("look at common_shared_name_parts()") # TODO move this verification to a separate script, I guess
|
||||
|
||||
|
||||
def remove_country_codes_from_spider_code(atp_code):
|
||||
|
@ -59,7 +60,6 @@ def remove_country_codes_from_spider_code(atp_code):
|
|||
return "_".join(returned_parts)
|
||||
|
||||
|
||||
|
||||
def do_not_remind_that_this_tagging_may_be_worth_supporting():
|
||||
# maybe move it to 0_config ?
|
||||
# to make it more findable ?
|
||||
|
@ -100,9 +100,9 @@ def do_not_remind_that_this_tagging_may_be_worth_supporting():
|
|||
# look into this # TODO
|
||||
notified_about_tag['shop'].append('grocery')
|
||||
notified_about_tag['shop'].append('grocer')
|
||||
notified_about_tag['landuse'].append('residential') # looks like ATP is silly here, or data is unusable
|
||||
notified_about_tag['landuse'].append('residential') # looks like ATP is silly here, or data is unusable
|
||||
notified_about_tag['shop'].append('truck_parts') # seems to be from NSI? https://wiki.openstreetmap.org/wiki/Tag:shop%3Dtruck_parts has no page
|
||||
notified_about_tag['amenity'].append('marketplace') # may be too large, though also mappable as a point...
|
||||
notified_about_tag['amenity'].append('marketplace') # may be too large, though also mappable as a point...
|
||||
|
||||
# kind also shoplike? I want to support them
|
||||
# requires change to osm_bot_abstraction_layer.tag_knowledge.is_shoplike
|
||||
|
@ -161,9 +161,9 @@ def do_not_remind_that_this_tagging_may_be_worth_supporting():
|
|||
notified_about_tag['craft'].append('brewery')
|
||||
notified_about_tag['amenity'].append('post_depot') # internal facility, not post_office
|
||||
notified_about_tag['amenity'].append('mortuary')
|
||||
notified_about_tag['amenity'].append('mailroom') # often an internal facility
|
||||
notified_about_tag['amenity'].append('mailroom') # often an internal facility
|
||||
notified_about_tag['man_made'].append('works')
|
||||
notified_about_tag['telecom'].append('data_center') # internal facilities
|
||||
notified_about_tag['telecom'].append('data_center') # internal facilities
|
||||
|
||||
# DO NOT WADE INTO THIS TAGGING MESS WITH THIS TOOL!
|
||||
# and anyway is likely not in iD presets so would not be supported anyway
|
||||
|
@ -205,7 +205,7 @@ def warn_about_new_tags_that_are_neither_shoplike_nor_ignored(entry, notified_ab
|
|||
if key in entry['properties']:
|
||||
value = entry['properties'][key]
|
||||
if value in ["yes", "no"]:
|
||||
continue # used as a property, not as a main tag
|
||||
continue # used as a property, not as a main tag
|
||||
if ";" in value:
|
||||
# see https://github.com/alltheplaces/alltheplaces/pull/11608#issuecomment-2585053764
|
||||
# see https://github.com/alltheplaces/alltheplaces/pull/11942
|
||||
|
@ -246,7 +246,7 @@ def warn_about_new_tags_that_are_neither_shoplike_nor_ignored(entry, notified_ab
|
|||
return notified_about_tag
|
||||
|
||||
|
||||
def spider_codes_iterator(): # TODO: try to deprecate as it is extremely slow - adds several seconds on first use which loads and parses entire ATP dataset
|
||||
def spider_codes_iterator(): # TODO: try to deprecate as it is extremely slow - adds several seconds on first use which loads and parses entire ATP dataset
|
||||
for entry in spider_codes_check_for_valid_data():
|
||||
yield entry
|
||||
|
||||
|
@ -258,6 +258,7 @@ def spider_codes_check_for_valid_data():
|
|||
returned.append(code)
|
||||
return returned
|
||||
|
||||
|
||||
def spider_codes_and_filepaths_iterator_including_broken_data_ones():
|
||||
"""
|
||||
this one is not parsing .geojson files so will be faster
|
||||
|
@ -281,10 +282,12 @@ def spider_codes_and_filepaths_iterator_including_broken_data_ones():
|
|||
continue
|
||||
yield item_path, atp_code
|
||||
|
||||
|
||||
def all_spider_codes_iterator():
|
||||
for _item_path, atp_code in spider_codes_and_filepaths_iterator_including_broken_data_ones():
|
||||
yield atp_code
|
||||
|
||||
|
||||
def spider_codes_iterator_with_data():
|
||||
for item_path, atp_code in spider_codes_and_filepaths_iterator_including_broken_data_ones():
|
||||
with open(item_path, 'r') as file:
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import shared
|
||||
config = __import__("0_config")
|
||||
import os
|
||||
|
||||
|
||||
def delete_requests_to_fetch_data_requested_by_processing():
|
||||
# check links or run nominatim requests
|
||||
|
@ -11,20 +12,24 @@ def delete_requests_to_fetch_data_requested_by_processing():
|
|||
if os.path.isfile(config.nominatim_structured_requests_missing_from_cache()):
|
||||
os.remove(config.nominatim_structured_requests_missing_from_cache())
|
||||
|
||||
|
||||
def delete_output_files():
|
||||
if os.path.isdir(config.output_folder()):
|
||||
shared.delete_files_in_folder(config.output_folder())
|
||||
if os.path.isdir(config.published_output_folder()):
|
||||
shared.delete_files_in_folder(config.published_output_folder())
|
||||
|
||||
|
||||
def delete_build_files():
|
||||
if os.path.isdir(config.build_storage_folder()):
|
||||
shared.delete_nested_files_folders_in_folder(config.build_storage_folder())
|
||||
|
||||
|
||||
def main():
|
||||
delete_requests_to_fetch_data_requested_by_processing()
|
||||
delete_output_files()
|
||||
delete_build_files()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
import rich
|
||||
import show_data
|
||||
import matcher
|
||||
import url_checker
|
||||
import shared
|
||||
import json
|
||||
|
@ -7,9 +10,6 @@ import os
|
|||
import math
|
||||
import shutil
|
||||
obtain_atp_data = __import__("2_obtain_atp_data")
|
||||
import matcher
|
||||
import show_data
|
||||
import rich
|
||||
process_planet = __import__("4_process_planet_file")
|
||||
config = __import__("0_config")
|
||||
|
||||
|
@ -17,10 +17,12 @@ config = __import__("0_config")
|
|||
def graticule_id(lat, lon, lat_span, lon_span, margin_in_kilometers):
|
||||
return str(lat) + "_" + str(lon) + "_x_" + str(lat_span) + "_" + str(lon_span) + "|" + str(margin_in_kilometers)
|
||||
|
||||
|
||||
def global_graticule_coverage():
|
||||
#return {'min_lat': 50, 'min_lon': 20, 'max_lat': 51, 'max_lon': 21}
|
||||
return {'min_lat': -84, 'min_lon': -180, 'max_lat': 84, 'max_lon': 180}
|
||||
|
||||
|
||||
def main():
|
||||
check_is_any_graticule_having_margin_greater_than_entire_graticule()
|
||||
# global coverage run
|
||||
|
@ -50,15 +52,19 @@ def graticule_cache(area):
|
|||
# such separation allows to mark given generated dataset as done and do not regenerate it again
|
||||
return config.build_storage_folder() + "per_graticule_data/" + area_text_identifier(area) + "/"
|
||||
|
||||
|
||||
def specific_graticule_cache_for_report_success(area, lat, lon):
|
||||
return graticule_cache(area) + "report_success_marker/" + str(lat) + " lat/" + str(lon) + " lon/"
|
||||
|
||||
|
||||
def specific_graticule_cache_for_atp_osm_input(area, lat, lon):
|
||||
return graticule_cache(area) + "osm_atp_split_by_graticule/" + str(lat) + " lat/" + str(lon) + " lon/"
|
||||
|
||||
|
||||
def specific_graticule_cache_for_match_lists(area, lat, lon):
|
||||
return graticule_cache(area) + "match_lists/" + str(lat) + " lat/" + str(lon) + " lon/"
|
||||
|
||||
|
||||
def area_name_for_graticule(lat_anchor, lon_anchor):
|
||||
return str(lat_anchor) + " " + str(lon_anchor)
|
||||
|
||||
|
@ -137,6 +143,7 @@ def prepare_graticule_data_files(graticule_coverage):
|
|||
myfile.write("data prepared")
|
||||
print("split data across graticules")
|
||||
|
||||
|
||||
def prepare_osm_graticule_files(graticule_coverage):
|
||||
"""
|
||||
list OSM data in its graticule and surrounding ones
|
||||
|
@ -172,6 +179,7 @@ def prepare_osm_graticule_files(graticule_coverage):
|
|||
offset_lon = 179
|
||||
add_entry_to_graticule_file(entry, 'osm', offset_lat, offset_lon, graticule_coverage)
|
||||
|
||||
|
||||
def prepare_atp_graticule_files(graticule_coverage):
|
||||
for atp_code in obtain_atp_data.all_spider_codes_iterator():
|
||||
if atp_code in config.ignored_atp_codes():
|
||||
|
@ -189,7 +197,7 @@ def prepare_atp_graticule_files(graticule_coverage):
|
|||
# expensive qa check for any ATP entry across the world
|
||||
cleaned_atp = matcher.clean_atp_entry_with_procesed_geometry(atp, atp_code)
|
||||
if cleaned_atp == None:
|
||||
continue # failed to pass qa
|
||||
continue # failed to pass qa
|
||||
try:
|
||||
add_entry_to_graticule_file(cleaned_atp, 'atp', lat_floor, lon_floor, graticule_coverage)
|
||||
except FileNotFoundError:
|
||||
|
@ -198,6 +206,7 @@ def prepare_atp_graticule_files(graticule_coverage):
|
|||
print("entry was accepted as within range, then it crashed on adding to file")
|
||||
raise
|
||||
|
||||
|
||||
def specific_file(origin, lat, lon, area):
|
||||
# TODO: better function name
|
||||
"""
|
||||
|
@ -264,6 +273,7 @@ def prepare_graticule_coverage_map(graticule_coverage):
|
|||
shutil.copy(source, destination)
|
||||
return graticule_index_filename_output
|
||||
|
||||
|
||||
def generate_graticule_coverage_map(graticule_coverage, graticule_index_path):
|
||||
with open(graticule_index_path, 'w') as outfile:
|
||||
area = graticule_coverage
|
||||
|
@ -289,6 +299,7 @@ def generate_graticule_coverage_map(graticule_coverage, graticule_index_path):
|
|||
outfile.write(leafleter.generator.get_polygon(shape, color="green", fill_color="green", link=file))
|
||||
outfile.write(leafleter.generator.get_html_page_suffix())
|
||||
|
||||
|
||||
def osm_link_locator(lat, lon):
|
||||
return "https://www.openstreetmap.org/?mlat=" + str(lon) + "&mlon=" + str(lat) + "#map=13/" + str(lon) + "/" + str(lat) + ""
|
||||
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
import os
|
||||
import matplotlib.pyplot as plt
|
||||
import warnings
|
||||
# hide following message:
|
||||
# UserWarning: Unable to import Axes3D. This may be due to multiple versions of Matplotlib being installed (e.g. as a system package and as a pip package). As a result, the 3D projection is not available.
|
||||
# TODO: fix it properly
|
||||
warnings.filterwarnings("ignore", category=UserWarning, message=".*Axes3D.*")
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import os
|
||||
|
||||
|
||||
class MatchDistanceDestributionReportCreator:
|
||||
def __init__(self, identifier, area_name):
|
||||
|
|
|
@ -5,6 +5,7 @@ from dulwich.objects import Commit
|
|||
from datetime import datetime
|
||||
import time
|
||||
|
||||
|
||||
def get_latest_commit_date_for_file(repo_path, file_path):
|
||||
r = Repo(repo_path)
|
||||
p = b"the/file/to/look/for"
|
||||
|
|
11
matcher.py
11
matcher.py
|
@ -77,10 +77,10 @@ def common_shared_name_parts():
|
|||
# in English
|
||||
"furniture", "pharmacy", "store", "shop", 'cafe', "bar", "company", 'storage',
|
||||
'opticians', 'jewelers', 'bakery',
|
||||
"car", # to prevent matching unrelated car rentals
|
||||
"car", # to prevent matching unrelated car rentals
|
||||
"burger", "kitchen", 'paper',
|
||||
|
||||
'self', 'storage', # "Self Storage" is common in names
|
||||
'self', 'storage', # "Self Storage" is common in names
|
||||
|
||||
'house',
|
||||
|
||||
|
@ -101,6 +101,7 @@ def get_name_sources(atp_tags):
|
|||
name_sources.append(short_name)
|
||||
return name_sources
|
||||
|
||||
|
||||
def get_filter_names_from_atp_dataset(current_atp):
|
||||
filter_names = []
|
||||
for atp in current_atp:
|
||||
|
@ -143,6 +144,7 @@ def entries_in_range(osm_index, distance_scan_in_kilometers, atp):
|
|||
):
|
||||
yield osm
|
||||
|
||||
|
||||
def get_matches(osm_data, atp_data):
|
||||
match_list = []
|
||||
filter_names = get_filter_names_from_atp_dataset(atp_data)
|
||||
|
@ -199,12 +201,14 @@ def load_atp_without_qa(atp_code):
|
|||
return []
|
||||
return load_atp_from_json_without_qa(data, atp_code)
|
||||
|
||||
|
||||
def load_and_clean_atp(atp_code):
|
||||
data = open_atp_file(atp_code)
|
||||
if data == None:
|
||||
return []
|
||||
return load_atp_from_json_and_clean_it(data, atp_code)
|
||||
|
||||
|
||||
def open_atp_file(atp_code):
|
||||
filename = config.atp_unpacked_folder() + atp_code + '.geojson'
|
||||
if os.path.isfile(filename) == False:
|
||||
|
@ -218,6 +222,7 @@ def open_atp_file(atp_code):
|
|||
# no need to report also here, so lets fail silently
|
||||
return None
|
||||
|
||||
|
||||
def load_atp_from_json_without_qa(data, atp_code):
|
||||
returned = []
|
||||
for entry in data['features']:
|
||||
|
@ -239,6 +244,7 @@ def load_atp_from_json_and_clean_it(data, atp_code):
|
|||
returned.append(clean_atp_entry_with_procesed_geometry(atp, atp_code))
|
||||
return returned
|
||||
|
||||
|
||||
def clean_atp_entry_with_procesed_geometry(atp, atp_code):
|
||||
if atp['center'] == None:
|
||||
# normal, especially as ATP may suppress blatantly wrong locations
|
||||
|
@ -251,6 +257,7 @@ def clean_atp_entry_with_procesed_geometry(atp, atp_code):
|
|||
return None
|
||||
return atp
|
||||
|
||||
|
||||
def is_location_clearly_implausible(object_data, center):
|
||||
if "addr:country" in object_data:
|
||||
if object_data["addr:country"].lower() not in shared.valid_country_codes():
|
||||
|
|
|
@ -11,6 +11,7 @@ config = __import__("0_config")
|
|||
def cache_path():
|
||||
return config.cache_folder() + 'nominatim_cache'
|
||||
|
||||
|
||||
nominatim_cache = diskcache.Cache(cache_path(), eviction_policy="none")
|
||||
print(len(nominatim_cache), "entries cached by Nominatim cache")
|
||||
|
||||
|
|
38
qa.py
38
qa.py
|
@ -4,6 +4,7 @@ import shops
|
|||
import shared
|
||||
config = __import__("0_config")
|
||||
|
||||
|
||||
def remove_bad_data(data, atp_code):
|
||||
"""
|
||||
removes bad data: dubious, broken and low quality tags
|
||||
|
@ -19,6 +20,7 @@ def remove_bad_data(data, atp_code):
|
|||
config.show_info_about_spider_to_debug_it(atp_code)
|
||||
raise
|
||||
|
||||
|
||||
def remove_bad_data_wrapped(data, atp_code):
|
||||
for key in list(data.keys()):
|
||||
if data[key] == None:
|
||||
|
@ -42,7 +44,6 @@ def remove_bad_data_wrapped(data, atp_code):
|
|||
if key in data:
|
||||
del data[key]
|
||||
|
||||
|
||||
data = remove_whitespace_suffix_prefix(data, atp_code)
|
||||
|
||||
data = remove_or_fix_bad_links(data, atp_code)
|
||||
|
@ -100,6 +101,7 @@ def remove_bad_data_wrapped(data, atp_code):
|
|||
|
||||
return data
|
||||
|
||||
|
||||
def handle_type_keys(data, atp_code):
|
||||
data = remove_type_keys_where_it_is_a_duplicate(data, atp_code)
|
||||
|
||||
|
@ -117,6 +119,7 @@ def handle_type_keys(data, atp_code):
|
|||
return None
|
||||
return data
|
||||
|
||||
|
||||
def remove_fields_or_entire_elements_on_html_weird_characters_or_empty_or_similar_bad_fields(data, atp_code):
|
||||
for key in list(data.keys()):
|
||||
value = data.get(key)
|
||||
|
@ -126,7 +129,7 @@ def remove_fields_or_entire_elements_on_html_weird_characters_or_empty_or_simila
|
|||
return None
|
||||
for banned_character in "<>[]{}?=":
|
||||
if banned_character in value:
|
||||
internal_atp = ['@source_uri', 'atp_ref'] # atp_ref is set by handle_ref_tag
|
||||
internal_atp = ['@source_uri', 'atp_ref'] # atp_ref is set by handle_ref_tag
|
||||
if key in (config.keys_with_value_link() + internal_atp) and banned_character in ["?", "="]:
|
||||
continue
|
||||
if config.is_bogus_key_worth_mentioning(key, atp_code):
|
||||
|
@ -144,7 +147,7 @@ def remove_fields_or_entire_elements_on_html_weird_characters_or_empty_or_simila
|
|||
continue
|
||||
if "undefined" in value.lower():
|
||||
if len(value) > 20 and key in [
|
||||
'@source_uri', # @source_uri = https://www.gamestop.com/on/demandware.store/Sites-gamestop-us-Site/default/Stores-FindStores?hasCondition=false&hasVariantsAvailableForLookup=false&hasVariantsAvailableForPickup=false&source=plp&showMap=false&products=undefined:1
|
||||
'@source_uri', # @source_uri = https://www.gamestop.com/on/demandware.store/Sites-gamestop-us-Site/default/Stores-FindStores?hasCondition=false&hasVariantsAvailableForLookup=false&hasVariantsAvailableForPickup=false&source=plp&showMap=false&products=undefined:1
|
||||
]:
|
||||
continue
|
||||
if config.is_bogus_key_worth_mentioning(key, atp_code):
|
||||
|
@ -153,6 +156,7 @@ def remove_fields_or_entire_elements_on_html_weird_characters_or_empty_or_simila
|
|||
continue
|
||||
return data
|
||||
|
||||
|
||||
def is_object_actually_not_open_at_all(data, atp_code):
|
||||
if data.get("opening_hours") == "Mo-Su closed":
|
||||
# not an actual active shop, marked in highly cryptic way
|
||||
|
@ -165,8 +169,8 @@ def is_object_actually_not_open_at_all(data, atp_code):
|
|||
# as they are already scheduled to disappear, so what is the point of adding them?
|
||||
return True
|
||||
for indicator in ["store is opening", "permanently closed", "< closed", "> closed", "service areas only",
|
||||
'temporarily closed', 'Book now', ' opens ', # https://github.com/alltheplaces/alltheplaces/issues/11868
|
||||
]:
|
||||
'temporarily closed', 'Book now', ' opens ', # https://github.com/alltheplaces/alltheplaces/issues/11868
|
||||
]:
|
||||
# 'addr:street_address': 'This store is opening September/October 2024'
|
||||
# https://github.com/alltheplaces/alltheplaces/issues/9055
|
||||
# https://github.com/alltheplaces/alltheplaces/issues/11199
|
||||
|
@ -185,6 +189,7 @@ def is_object_actually_not_open_at_all(data, atp_code):
|
|||
return True
|
||||
return False
|
||||
|
||||
|
||||
def remove_unwanted_object_types(data, atp_code):
|
||||
if atp_code == "woolworths_au":
|
||||
if data.get("type") in ["PETROL", "AMPOL", "CALTEXWOW"]:
|
||||
|
@ -195,7 +200,7 @@ def remove_unwanted_object_types(data, atp_code):
|
|||
if data.get("amenity") == "vending_machine":
|
||||
if "vending" not in data:
|
||||
if atp_code not in [
|
||||
'tymebank_za', # https://github.com/alltheplaces/alltheplaces/pull/10180#issue-2514530869 - ATPTODO
|
||||
'tymebank_za', # https://github.com/alltheplaces/alltheplaces/pull/10180#issue-2514530869 - ATPTODO
|
||||
]:
|
||||
print()
|
||||
print("amenity = vending_machine without vending tag, skipping it")
|
||||
|
@ -208,7 +213,7 @@ def remove_unwanted_object_types(data, atp_code):
|
|||
if data.get("post_office") == "post_partner":
|
||||
# If it is a post partner, then it is not a post office but just an additional aspect/facet of another POI
|
||||
if config.allow_very_low_priority_atp_logging():
|
||||
if atp_code != 'midcounties_cooperative_gb': # see https://github.com/alltheplaces/alltheplaces/pull/11944
|
||||
if atp_code != 'midcounties_cooperative_gb': # see https://github.com/alltheplaces/alltheplaces/pull/11944
|
||||
print()
|
||||
print("amenity=post_office post_office=post_partner - skipping this entry")
|
||||
rich.print(data)
|
||||
|
@ -247,6 +252,7 @@ def remove_unwanted_object_types(data, atp_code):
|
|||
return None
|
||||
return data
|
||||
|
||||
|
||||
def remove_whitespace_suffix_prefix(data, atp_code):
|
||||
for key in list(data.keys()):
|
||||
if data[key].strip() != data[key]:
|
||||
|
@ -271,6 +277,7 @@ def remove_whitespace_suffix_prefix(data, atp_code):
|
|||
del data[key]
|
||||
return data
|
||||
|
||||
|
||||
def remove_or_fix_bad_links(data, atp_code):
|
||||
if "website_2" in data and "website" not in data:
|
||||
if config.is_bogus_key_worth_mentioning("website_2", atp_code):
|
||||
|
@ -313,6 +320,7 @@ def remove_or_fix_bad_links(data, atp_code):
|
|||
del data[key]
|
||||
return data
|
||||
|
||||
|
||||
def remove_bad_address(data, atp_code):
|
||||
field = 'addr:street_address'
|
||||
if field in data:
|
||||
|
@ -381,6 +389,7 @@ def remove_bad_address(data, atp_code):
|
|||
data = remove_duplicated_state_country_fields(data, atp_code)
|
||||
return data
|
||||
|
||||
|
||||
def is_this_address_suspiciously_short_for_addr_full(value):
|
||||
if "," in value:
|
||||
return False
|
||||
|
@ -397,6 +406,7 @@ def is_this_address_suspiciously_short_for_addr_full(value):
|
|||
value = value.replace(entry, "")
|
||||
return len(value.strip()) < 14
|
||||
|
||||
|
||||
def remove_duplicated_state_country_fields(data, atp_code):
|
||||
if "addr:state" not in data:
|
||||
return data
|
||||
|
@ -405,8 +415,8 @@ def remove_duplicated_state_country_fields(data, atp_code):
|
|||
if data["addr:state"] != data["addr:country"]:
|
||||
return data
|
||||
if data["addr:state"] in [
|
||||
"PR", # Puerto Rico, it seems to be valid there!
|
||||
"IM", # Isle of Man
|
||||
"PR", # Puerto Rico, it seems to be valid there!
|
||||
"IM", # Isle of Man
|
||||
]:
|
||||
return data
|
||||
if config.is_bogus_key_worth_mentioning("addr:state", atp_code):
|
||||
|
@ -419,6 +429,7 @@ def remove_duplicated_state_country_fields(data, atp_code):
|
|||
del data["addr:state"]
|
||||
return data
|
||||
|
||||
|
||||
def throw_away_name_from_atp(data, atp_code):
|
||||
if data.get("brand") == data["name"]:
|
||||
# no need to do this if name matches brand anyway
|
||||
|
@ -444,6 +455,7 @@ def throw_away_name_from_atp(data, atp_code):
|
|||
# many many such cases, see atp_listing_name-based reports
|
||||
return True
|
||||
|
||||
|
||||
def handle_name_and_brand_tags(data, atp_code):
|
||||
if "brand" not in data:
|
||||
if config.is_missing_brand_field_worth_mentioning(atp_code):
|
||||
|
@ -475,6 +487,7 @@ def handle_name_and_brand_tags(data, atp_code):
|
|||
data["name"] = data["brand"]
|
||||
return data
|
||||
|
||||
|
||||
def remove_type_keys_where_it_is_a_duplicate(data, atp_code):
|
||||
for type_key in config.generic_type_keys():
|
||||
# these could be reported back to ATP, I guess
|
||||
|
@ -497,8 +510,8 @@ def remove_type_keys_where_it_is_a_duplicate(data, atp_code):
|
|||
del data["type"]
|
||||
elif atp_code == "nike":
|
||||
if data.get("type") in [
|
||||
"BEACON", # maybe indicates franchise?
|
||||
]:
|
||||
"BEACON", # maybe indicates franchise?
|
||||
]:
|
||||
del data["type"]
|
||||
elif atp_code == "department_veterans_affairs":
|
||||
if data.get("type") in ["facility"]:
|
||||
|
@ -522,6 +535,7 @@ def remove_type_keys_where_it_is_a_duplicate(data, atp_code):
|
|||
del data[key] # an useless duplicate
|
||||
return data
|
||||
|
||||
|
||||
def remove_bad_email_data(data, atp_code):
|
||||
if 'email' not in data:
|
||||
return data
|
||||
|
@ -796,7 +810,7 @@ def is_empty_value(key, value, atp_code):
|
|||
return True
|
||||
if value.lower() == "na":
|
||||
if key == "addr:country":
|
||||
return False # Namibia
|
||||
return False # Namibia
|
||||
return True
|
||||
useless_values = {
|
||||
# see now fixed https://github.com/alltheplaces/alltheplaces/issues/8978
|
||||
|
|
|
@ -29,16 +29,19 @@ def commit_file(repo_path, file_path, author, commit_message):
|
|||
dulwich.porcelain.add(repo=repo, paths=[file_path])
|
||||
dulwich.porcelain.commit(repo=repo, message=commit_message.encode(), author=author.encode())
|
||||
|
||||
|
||||
def push_changes():
|
||||
command = 'cd "' + config.repo_location_atp() + '" && git push my_repo'
|
||||
print(command)
|
||||
os.system(command) #dulwich.porcelain.push is unable to use credentials
|
||||
os.system(command) # dulwich.porcelain.push is unable to use credentials
|
||||
|
||||
|
||||
def reset_active_work_in_repo(repo_path):
|
||||
repo = Repo(repo_path)
|
||||
dulwich.porcelain.update_head(repo, 'master', detached=False)
|
||||
dulwich.porcelain.reset(repo, 'hard', 'HEAD')
|
||||
|
||||
|
||||
def share_changes(repo_path, spider_filepath, atp_code, value_examples):
|
||||
branch_name = atp_code + "_suspiciously_short_addrfull"
|
||||
checkout_branch(repo_path, branch_name)
|
||||
|
@ -47,6 +50,7 @@ def share_changes(repo_path, spider_filepath, atp_code, value_examples):
|
|||
webbrowser.open("https://github.com/alltheplaces/alltheplaces/compare/master...matkoniecz:" + branch_name + "?expand=1")
|
||||
reset_active_work_in_repo(repo_path)
|
||||
|
||||
|
||||
def fix_addr_street_set_as_addr_full(atp_code, value_examples, counter_examples):
|
||||
repo_path = config.repo_location_atp()
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ import webbrowser
|
|||
import urllib
|
||||
import regex
|
||||
|
||||
|
||||
def has_japanese_or_chinese_or_korean_or_arabic_text(value):
|
||||
# https://stackoverflow.com/a/66601628/4130619
|
||||
# https://stackoverflow.com/a/30100900/4130619
|
||||
|
@ -13,6 +14,7 @@ def has_japanese_or_chinese_or_korean_or_arabic_text(value):
|
|||
return True
|
||||
return False
|
||||
|
||||
|
||||
def link_to_point_in_osm(lat, lon):
|
||||
return 'https://www.openstreetmap.org/?mlat=' + str(lat) + "&mlon=" + str(lon) + "#map=19/" + str(lat) + '/' + str(lon)
|
||||
|
||||
|
@ -30,6 +32,7 @@ def delete_files_in_folder(folder):
|
|||
if os.path.isfile(file_path):
|
||||
os.remove(file_path)
|
||||
|
||||
|
||||
def delete_nested_files_folders_in_folder(folder):
|
||||
for filename in os.listdir(folder):
|
||||
item_path = os.path.join(folder, filename)
|
||||
|
@ -42,6 +45,7 @@ def delete_nested_files_folders_in_folder(folder):
|
|||
delete_nested_files_folders_in_folder(item_path)
|
||||
os.rmdir(item_path)
|
||||
|
||||
|
||||
def get_free_space_in_mb(path):
|
||||
_total, _used, free = shutil.disk_usage(path)
|
||||
return free / 1024 / 1024
|
||||
|
@ -51,6 +55,7 @@ def open_prepared_issue_form(title, body):
|
|||
url = f'https://github.com/alltheplaces/alltheplaces/issues/new?title={urllib.parse.quote(title)}&body={urllib.parse.quote(body)}'
|
||||
webbrowser.open(url)
|
||||
|
||||
|
||||
def valid_country_codes():
|
||||
# NSI lists only part of them
|
||||
# https://github.com/alltheplaces/alltheplaces/blob/8c28db93cb6df154ae3a4651b57d175e272ae416/ci/check_spider_naming_consistency.py#L9
|
||||
|
@ -308,6 +313,7 @@ def valid_country_codes():
|
|||
"zw",
|
||||
]
|
||||
|
||||
|
||||
def country_data():
|
||||
return {
|
||||
'Poland': {
|
||||
|
|
|
@ -552,7 +552,6 @@ class NominatimMismatchReportCreator:
|
|||
</body>
|
||||
</html>"""
|
||||
|
||||
|
||||
def generate_geojson_report(self):
|
||||
with open(config.output_folder() + self.output_geojson_file(), 'w') as f:
|
||||
json.dump(serializing.generate_geojson_structure(self.only_atp_match_list()), f)
|
||||
|
@ -725,7 +724,6 @@ def produce_map_analysis_for_atp_data(atp_code, area_name, match_list, cache_onl
|
|||
}
|
||||
|
||||
|
||||
|
||||
def get_center(dataset):
|
||||
max_lat = -90
|
||||
max_lon = -180
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import unittest
|
||||
config = __import__("0_config")
|
||||
|
||||
|
||||
class ConfigTests(unittest.TestCase):
|
||||
def test_mathworks(self):
|
||||
self.assertEqual(2 + 1, 3)
|
||||
|
|
|
@ -315,12 +315,12 @@ website = http://www.dunkindonuts.sa"""
|
|||
'name': 'Avia XPress',
|
||||
'website': 'https://avia.nl/tankstations/avia-xpress-nieuw-weerdinge',
|
||||
'brand': 'Avia XPress',
|
||||
'brand:wikidata': 'Q124611203', # AVIA XPress, P127 set (owned by AVIA International Q300147)
|
||||
'brand:wikidata': 'Q124611203', # AVIA XPress, P127 set (owned by AVIA International Q300147)
|
||||
}
|
||||
osm_tags = {
|
||||
'amenity': 'fuel',
|
||||
'brand': 'Avia',
|
||||
'brand:wikidata': 'Q300147', # AVIA International
|
||||
'brand:wikidata': 'Q300147', # AVIA International
|
||||
'name': 'Avia',
|
||||
}
|
||||
self.assertEqual(self.this_tag_lists_match(atp_tags, osm_tags), True)
|
||||
|
@ -332,16 +332,17 @@ website = http://www.dunkindonuts.sa"""
|
|||
'name': 'Tamoil express',
|
||||
'website': 'https://avia.nl/tankstations/tamoil-oegstgeest',
|
||||
'brand': 'Tamoil express',
|
||||
'brand:wikidata': 'Q124658477', # Tamoil express, P127 set (owned by Tamoil Q706793)
|
||||
'brand:wikidata': 'Q124658477', # Tamoil express, P127 set (owned by Tamoil Q706793)
|
||||
}
|
||||
osm_tags = {
|
||||
'amenity': 'fuel',
|
||||
'brand': 'Tamoil',
|
||||
'brand:wikidata': 'Q706793', # Tamoil
|
||||
'brand:wikidata': 'Q706793', # Tamoil
|
||||
'name': 'Tamoil',
|
||||
}
|
||||
self.assertEqual(self.this_tag_lists_match(atp_tags, osm_tags), True)
|
||||
|
||||
|
||||
class CanonicalValueTests(unittest.TestCase):
|
||||
def test_simple_canonical_value(self):
|
||||
self.assertEqual(config.canonical_feature({'shop': 'butcher'}), "shop=butcher")
|
||||
|
@ -389,15 +390,15 @@ class CanonicalValueTests(unittest.TestCase):
|
|||
|
||||
def test_group_electrical_shops_tagged_in_a_different_way(self):
|
||||
pass
|
||||
#TODO - actually handle this
|
||||
#self.assertEqual(config.the_same_feature_type({'shop': 'electrical'}, {'shop': 'electric'}), True)
|
||||
#self.assertEqual(config.the_same_feature_type({'shop': 'trade', 'trade': 'electrical'}, {'shop': 'electrical'}), True)
|
||||
# TODO - actually handle this
|
||||
# self.assertEqual(config.the_same_feature_type({'shop': 'electrical'}, {'shop': 'electric'}), True)
|
||||
# self.assertEqual(config.the_same_feature_type({'shop': 'trade', 'trade': 'electrical'}, {'shop': 'electrical'}), True)
|
||||
|
||||
def test_group_building_materials_shops_tagged_in_a_different_way(self):
|
||||
pass
|
||||
#TODO - actually handle this
|
||||
#self.assertEqual(config.the_same_feature_type({'shop': 'trade', 'trade': 'building_materials'}, {'shop': 'building_materials'}), True)
|
||||
#self.assertEqual(config.the_same_feature_type({'shop': 'trade', 'trade': 'tiles'}, {'shop': 'tiles'}), True)
|
||||
# TODO - actually handle this
|
||||
# self.assertEqual(config.the_same_feature_type({'shop': 'trade', 'trade': 'building_materials'}, {'shop': 'building_materials'}), True)
|
||||
# self.assertEqual(config.the_same_feature_type({'shop': 'trade', 'trade': 'tiles'}, {'shop': 'tiles'}), True)
|
||||
|
||||
def test_do_not_use_clothes_key_for_usual_clothes_shops(self):
|
||||
self.assertEqual(config.the_same_feature_type({'shop': 'clothes', 'clothes': 'men'}, {'shop': 'clothes', 'clothes': 'women'}), True)
|
||||
|
@ -411,6 +412,7 @@ class CanonicalValueTests(unittest.TestCase):
|
|||
# TODO: deal with shop=trade trade=
|
||||
# TODO: see 19_detect_unhandled_cascading_values_for_canonical_poi_types.py
|
||||
|
||||
|
||||
class TestSupportFunctions(unittest.TestCase):
|
||||
def test_null_island_is_not_in_poland(self):
|
||||
self.assertEqual(True, matcher.is_location_clearly_implausible({"addr:country": "PL"}, {'lat': 0, "lon": 0}))
|
||||
|
|
|
@ -70,50 +70,50 @@ class ProcessingTests(unittest.TestCase):
|
|||
|
||||
def test_useless_store_type_key_removal_example(self):
|
||||
data = {
|
||||
'ref': '10966',
|
||||
'delivery': 'yes',
|
||||
'storeType': 'pret', # to be removed
|
||||
'wheelchair': 'no',
|
||||
'internet_access': 'wlan',
|
||||
'@source_uri': 'https://api1.pret.com/v1/shops',
|
||||
'@spider': 'pret_a_manger',
|
||||
'amenity': 'fast_food',
|
||||
'cuisine': 'sandwich',
|
||||
'short_name': 'Pret',
|
||||
'takeaway': 'yes',
|
||||
'addr:street_address': 'Vault 1 Station Building, Park Place, Park Street Upper',
|
||||
'addr:city': 'Dublin',
|
||||
'addr:postcode': 'D02R2H5',
|
||||
'addr:country': 'IE',
|
||||
'name': 'Hatch Street',
|
||||
'phone': '+353 1 517 0158',
|
||||
'brand': 'Pret A Manger',
|
||||
'brand:wikidata': 'Q2109109',
|
||||
'nsi_id': 'pretamanger-4f61b1',
|
||||
'opening_hours_in_atp_format': 'Mo-Fr 07:00-18:00; Sa 08:00-18:00; Su 09:00-18:00'
|
||||
}
|
||||
'ref': '10966',
|
||||
'delivery': 'yes',
|
||||
'storeType': 'pret', # to be removed
|
||||
'wheelchair': 'no',
|
||||
'internet_access': 'wlan',
|
||||
'@source_uri': 'https://api1.pret.com/v1/shops',
|
||||
'@spider': 'pret_a_manger',
|
||||
'amenity': 'fast_food',
|
||||
'cuisine': 'sandwich',
|
||||
'short_name': 'Pret',
|
||||
'takeaway': 'yes',
|
||||
'addr:street_address': 'Vault 1 Station Building, Park Place, Park Street Upper',
|
||||
'addr:city': 'Dublin',
|
||||
'addr:postcode': 'D02R2H5',
|
||||
'addr:country': 'IE',
|
||||
'name': 'Hatch Street',
|
||||
'phone': '+353 1 517 0158',
|
||||
'brand': 'Pret A Manger',
|
||||
'brand:wikidata': 'Q2109109',
|
||||
'nsi_id': 'pretamanger-4f61b1',
|
||||
'opening_hours_in_atp_format': 'Mo-Fr 07:00-18:00; Sa 08:00-18:00; Su 09:00-18:00'
|
||||
}
|
||||
expected_new_data = {
|
||||
'ref': '10966',
|
||||
'delivery': 'yes',
|
||||
'wheelchair': 'no',
|
||||
'internet_access': 'wlan',
|
||||
'@source_uri': 'https://api1.pret.com/v1/shops',
|
||||
'@spider': 'pret_a_manger',
|
||||
'amenity': 'fast_food',
|
||||
'cuisine': 'sandwich',
|
||||
'short_name': 'Pret',
|
||||
'takeaway': 'yes',
|
||||
'addr:street_address': 'Vault 1 Station Building, Park Place, Park Street Upper',
|
||||
'addr:city': 'Dublin',
|
||||
'addr:postcode': 'D02R2H5',
|
||||
'addr:country': 'IE',
|
||||
'name': 'Hatch Street',
|
||||
'phone': '+353 1 517 0158',
|
||||
'brand': 'Pret A Manger',
|
||||
'brand:wikidata': 'Q2109109',
|
||||
'nsi_id': 'pretamanger-4f61b1',
|
||||
'opening_hours_in_atp_format': 'Mo-Fr 07:00-18:00; Sa 08:00-18:00; Su 09:00-18:00'
|
||||
}
|
||||
'ref': '10966',
|
||||
'delivery': 'yes',
|
||||
'wheelchair': 'no',
|
||||
'internet_access': 'wlan',
|
||||
'@source_uri': 'https://api1.pret.com/v1/shops',
|
||||
'@spider': 'pret_a_manger',
|
||||
'amenity': 'fast_food',
|
||||
'cuisine': 'sandwich',
|
||||
'short_name': 'Pret',
|
||||
'takeaway': 'yes',
|
||||
'addr:street_address': 'Vault 1 Station Building, Park Place, Park Street Upper',
|
||||
'addr:city': 'Dublin',
|
||||
'addr:postcode': 'D02R2H5',
|
||||
'addr:country': 'IE',
|
||||
'name': 'Hatch Street',
|
||||
'phone': '+353 1 517 0158',
|
||||
'brand': 'Pret A Manger',
|
||||
'brand:wikidata': 'Q2109109',
|
||||
'nsi_id': 'pretamanger-4f61b1',
|
||||
'opening_hours_in_atp_format': 'Mo-Fr 07:00-18:00; Sa 08:00-18:00; Su 09:00-18:00'
|
||||
}
|
||||
|
||||
new_data = qa.remove_type_keys_where_it_is_a_duplicate(data, 'pret_a_manger')
|
||||
self.assertEqual(new_data, expected_new_data)
|
||||
|
@ -122,35 +122,35 @@ class ProcessingTests(unittest.TestCase):
|
|||
def test_crash(self):
|
||||
# to prevent regressions
|
||||
data = {
|
||||
'ref': '49418',
|
||||
'delivery': 'yes',
|
||||
'addr:city:en': 'HAERBIN',
|
||||
'addr:city:zh': '哈尔滨市',
|
||||
'addr:street_address:en': 'NO.299,Haxi Str.,Nangang District,Harbin\n',
|
||||
'addr:street_address:zh': '黑龙江省哈尔滨市',
|
||||
'addr:full:en': 'NO.299, Haxi Str., Nangang District, Harbin, Heilongjiang Province Harbin, HAERBIN, 150000',
|
||||
'addr:full:zh': '黑龙江省哈尔滨市, 哈尔滨市南岗区哈西大街299号哈西红博购物广场一层F1023单元, 哈尔滨市, 150000',
|
||||
'branch:en': 'Harbin Haxi Hongbo Store',
|
||||
'branch:zh': '哈尔滨西城红场店',
|
||||
'@source_uri': 'https://www.starbucks.com.cn/api/stores/nearby?lat=46.0310334&lon=127.6518582&limit=1000&locale=ZH&features=&radius=100000',
|
||||
'amenity': 'cafe',
|
||||
'cuisine': 'coffee_shop',
|
||||
'@spider': 'starbucks_cn',
|
||||
'brand:en': 'Starbucks',
|
||||
'brand:zh': '星巴克',
|
||||
'name:en': 'Starbucks',
|
||||
'name:zh': '星巴克',
|
||||
'takeaway': 'yes',
|
||||
'addr:full': '黑龙江省哈尔滨市, 哈尔滨市南岗区哈西大街299号哈西红博购物广场一层F1023单元, 哈尔滨市, 150000',
|
||||
'addr:street_address': '黑龙江省哈尔滨市',
|
||||
'addr:city': '哈尔滨市',
|
||||
'addr:postcode': '150000',
|
||||
'addr:country': 'CN',
|
||||
'name': '星巴克',
|
||||
'branch': '哈尔滨西城红场店',
|
||||
'brand': '星巴克',
|
||||
'brand:wikidata': 'Q37158',
|
||||
'nsi_id': 'starbucks-823e31'
|
||||
'ref': '49418',
|
||||
'delivery': 'yes',
|
||||
'addr:city:en': 'HAERBIN',
|
||||
'addr:city:zh': '哈尔滨市',
|
||||
'addr:street_address:en': 'NO.299,Haxi Str.,Nangang District,Harbin\n',
|
||||
'addr:street_address:zh': '黑龙江省哈尔滨市',
|
||||
'addr:full:en': 'NO.299, Haxi Str., Nangang District, Harbin, Heilongjiang Province Harbin, HAERBIN, 150000',
|
||||
'addr:full:zh': '黑龙江省哈尔滨市, 哈尔滨市南岗区哈西大街299号哈西红博购物广场一层F1023单元, 哈尔滨市, 150000',
|
||||
'branch:en': 'Harbin Haxi Hongbo Store',
|
||||
'branch:zh': '哈尔滨西城红场店',
|
||||
'@source_uri': 'https://www.starbucks.com.cn/api/stores/nearby?lat=46.0310334&lon=127.6518582&limit=1000&locale=ZH&features=&radius=100000',
|
||||
'amenity': 'cafe',
|
||||
'cuisine': 'coffee_shop',
|
||||
'@spider': 'starbucks_cn',
|
||||
'brand:en': 'Starbucks',
|
||||
'brand:zh': '星巴克',
|
||||
'name:en': 'Starbucks',
|
||||
'name:zh': '星巴克',
|
||||
'takeaway': 'yes',
|
||||
'addr:full': '黑龙江省哈尔滨市, 哈尔滨市南岗区哈西大街299号哈西红博购物广场一层F1023单元, 哈尔滨市, 150000',
|
||||
'addr:street_address': '黑龙江省哈尔滨市',
|
||||
'addr:city': '哈尔滨市',
|
||||
'addr:postcode': '150000',
|
||||
'addr:country': 'CN',
|
||||
'name': '星巴克',
|
||||
'branch': '哈尔滨西城红场店',
|
||||
'brand': '星巴克',
|
||||
'brand:wikidata': 'Q37158',
|
||||
'nsi_id': 'starbucks-823e31'
|
||||
}
|
||||
qa.remove_or_fix_bad_links(data, 'starbucks_cn')
|
||||
qa.remove_bad_data_wrapped(data, 'starbucks_cn')
|
||||
|
|
Loading…
Add table
Reference in a new issue