mirror of
https://codeberg.org/matkoniecz/list_how_openstreetmap_can_be_improved_with_alltheplaces_data.git
synced 2025-04-11 10:09:29 +02:00
one more diagnostic script
This commit is contained in:
parent
520894e559
commit
05e8565893
1 changed files with 79 additions and 0 deletions
79
22_find_where_multiple_atp_match_to_one_osm.py
Normal file
79
22_find_where_multiple_atp_match_to_one_osm.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
import rich
|
||||
import data_iterator
|
||||
from collections import defaultdict
|
||||
obtain_atp_data = __import__("2_obtain_atp_data")
|
||||
graticule_report = __import__("5_generate_graticule_reports")
|
||||
|
||||
def main():
|
||||
crossspider_multimatch_check()
|
||||
#regular_multimatch_check() # this is normal and happens very often
|
||||
|
||||
def crossspider_multimatch_check(): # TODO - near duplicate of regular_multimatch_check - delete regular_multimatch_check? deduplicate?
|
||||
matched_osm_entries = set()
|
||||
multimatches = set()
|
||||
limited_cache_of_full_matchings = defaultdict(list)
|
||||
area = graticule_report.global_graticule_coverage()
|
||||
for atp_code in obtain_atp_data.all_spider_codes_iterator():
|
||||
matched_osm_entries_for_this_spider = set()
|
||||
for entry in data_iterator.iterate_over_all_matches_for_specific_spider(area, atp_code):
|
||||
if len(limited_cache_of_full_matchings) > 1_000_000: #500_000:
|
||||
# to avoid busting memory
|
||||
# maybe we can keep more?
|
||||
# TODO
|
||||
limited_cache_of_full_matchings = defaultdict(list)
|
||||
if entry.osm_link != None:
|
||||
if entry.osm_link in matched_osm_entries:
|
||||
print("matched multiple times, for example on")
|
||||
log_entry(entry)
|
||||
multimatches.add(entry.osm_link)
|
||||
if entry.osm_link in limited_cache_of_full_matchings:
|
||||
print("also in limited_cache_of_full_matchings")
|
||||
for from_limited_cache in limited_cache_of_full_matchings[entry.osm_link]:
|
||||
log_entry(from_limited_cache)
|
||||
print()
|
||||
print()
|
||||
print()
|
||||
print()
|
||||
matched_osm_entries.add(entry.osm_link)
|
||||
matched_osm_entries_for_this_spider.add(entry.osm_link)
|
||||
limited_cache_of_full_matchings[entry.osm_link].append(entry)
|
||||
matched_osm_entries = matched_osm_entries | matched_osm_entries_for_this_spider
|
||||
|
||||
def regular_multimatch_check():
|
||||
matched_osm_entries = set()
|
||||
multimatches = set()
|
||||
limited_cache_of_full_matchings = defaultdict(list)
|
||||
area = graticule_report.global_graticule_coverage()
|
||||
for atp_code in obtain_atp_data.all_spider_codes_iterator():
|
||||
for entry in data_iterator.iterate_over_all_matches_for_specific_spider(area, atp_code):
|
||||
if len(limited_cache_of_full_matchings) > 100_000:
|
||||
# to avoid busting memory
|
||||
# maybe we can keep more?
|
||||
# TODO
|
||||
limited_cache_of_full_matchings = defaultdict(list)
|
||||
if entry.osm_link != None:
|
||||
if entry.osm_link in matched_osm_entries:
|
||||
print("matched multiple times, for example on")
|
||||
log_entry(entry)
|
||||
multimatches.add(entry.osm_link)
|
||||
if entry.osm_link in limited_cache_of_full_matchings:
|
||||
print("also in limited_cache_of_full_matchings")
|
||||
for from_limited_cache in limited_cache_of_full_matchings[entry.osm_link]:
|
||||
log_entry(from_limited_cache)
|
||||
print()
|
||||
print()
|
||||
print()
|
||||
print()
|
||||
else:
|
||||
matched_osm_entries.add(entry.osm_link)
|
||||
limited_cache_of_full_matchings[entry.osm_link].append(entry)
|
||||
rich.print(multimatches)
|
||||
|
||||
def log_entry(entry):
|
||||
print(entry.osm_link)
|
||||
print(entry.link_to_point_in_atp())
|
||||
print(entry.link_to_point_in_osm())
|
||||
rich.print(entry.atp_tags)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Add table
Reference in a new issue