1
0
Fork 0

fix matches that were using names from other parts of dataset

This commit is contained in:
Mateusz Konieczny 2025-02-09 09:21:24 +01:00
parent 99b71d8a65
commit cb8bbbe50c
2 changed files with 5 additions and 1 deletions

View file

@ -172,12 +172,17 @@ def get_matches(osm_data, atp_data):
best_match = None
best_match_distance = None
all_very_good_matches = []
# entire ATP dataset may have varied names - we will need to check whether
# this specific entry is still actually matching
name_part_list_for_specific_entry = get_filter_names_from_atp_dataset([atp])
for osm in entries_in_range(osm_index, distance_scan_in_kilometers, atp):
if config.the_same_feature_type(atp['tags'], osm['tags']) == False:
# note, this check needs to be here as single spider can have for example various shop types
# or convenience shops at fuel stations and fuel stations
continue
distance = shared.calculate_distance(atp['center'], osm['center'])
if is_matching_any_name_part_to_osm_tags(name_part_list_for_specific_entry, osm["tags"]) == False:
continue
if distance < config.good_match_distance_in_kilometers():
all_very_good_matches.append(osm)
if best_match == None or best_match_distance > distance:

View file

@ -163,7 +163,6 @@ class MatchingTests(unittest.TestCase):
self.assertEqual(self.this_tag_lists_match({'brand': "Zażółć Bank", 'amenity': 'bank'}, {'brand': "Gęśla Jaźń Bank", 'amenity': 'bank'}), False)
@unittest.expectedFailure # TODO: fix this serious bug!
def test_matches_using_specific_name_not_one_from_other_objects(self):
atp_data = [
{'tags': {'shop': 'foobar', 'brand': 'ABC', 'name': 'ABC'}, 'center': {'lat': 0, 'lon': 0}, 'osm_link': 'dummy'},