1
0
Fork 0

one more diagnostic script

This commit is contained in:
Mateusz Konieczny 2025-03-06 10:24:21 +01:00
parent 520894e559
commit 05e8565893

View file

@ -0,0 +1,79 @@
import rich
import data_iterator
from collections import defaultdict
obtain_atp_data = __import__("2_obtain_atp_data")
graticule_report = __import__("5_generate_graticule_reports")
def main():
crossspider_multimatch_check()
#regular_multimatch_check() # this is normal and happens very often
def crossspider_multimatch_check(): # TODO - near duplicate of regular_multimatch_check - delete regular_multimatch_check? deduplicate?
matched_osm_entries = set()
multimatches = set()
limited_cache_of_full_matchings = defaultdict(list)
area = graticule_report.global_graticule_coverage()
for atp_code in obtain_atp_data.all_spider_codes_iterator():
matched_osm_entries_for_this_spider = set()
for entry in data_iterator.iterate_over_all_matches_for_specific_spider(area, atp_code):
if len(limited_cache_of_full_matchings) > 1_000_000: #500_000:
# to avoid busting memory
# maybe we can keep more?
# TODO
limited_cache_of_full_matchings = defaultdict(list)
if entry.osm_link != None:
if entry.osm_link in matched_osm_entries:
print("matched multiple times, for example on")
log_entry(entry)
multimatches.add(entry.osm_link)
if entry.osm_link in limited_cache_of_full_matchings:
print("also in limited_cache_of_full_matchings")
for from_limited_cache in limited_cache_of_full_matchings[entry.osm_link]:
log_entry(from_limited_cache)
print()
print()
print()
print()
matched_osm_entries.add(entry.osm_link)
matched_osm_entries_for_this_spider.add(entry.osm_link)
limited_cache_of_full_matchings[entry.osm_link].append(entry)
matched_osm_entries = matched_osm_entries | matched_osm_entries_for_this_spider
def regular_multimatch_check():
matched_osm_entries = set()
multimatches = set()
limited_cache_of_full_matchings = defaultdict(list)
area = graticule_report.global_graticule_coverage()
for atp_code in obtain_atp_data.all_spider_codes_iterator():
for entry in data_iterator.iterate_over_all_matches_for_specific_spider(area, atp_code):
if len(limited_cache_of_full_matchings) > 100_000:
# to avoid busting memory
# maybe we can keep more?
# TODO
limited_cache_of_full_matchings = defaultdict(list)
if entry.osm_link != None:
if entry.osm_link in matched_osm_entries:
print("matched multiple times, for example on")
log_entry(entry)
multimatches.add(entry.osm_link)
if entry.osm_link in limited_cache_of_full_matchings:
print("also in limited_cache_of_full_matchings")
for from_limited_cache in limited_cache_of_full_matchings[entry.osm_link]:
log_entry(from_limited_cache)
print()
print()
print()
print()
else:
matched_osm_entries.add(entry.osm_link)
limited_cache_of_full_matchings[entry.osm_link].append(entry)
rich.print(multimatches)
def log_entry(entry):
print(entry.osm_link)
print(entry.link_to_point_in_atp())
print(entry.link_to_point_in_osm())
rich.print(entry.atp_tags)
if __name__ == '__main__':
main()