From 6f2aa1bb66d6e53b661a2a9aece8c3d7b98117c5 Mon Sep 17 00:00:00 2001 From: Mateusz Konieczny <matkoniecz@gmail.com> Date: Thu, 3 Apr 2025 15:33:56 +0200 Subject: [PATCH] improve type matching --- 0_config.py | 170 ++++++++++++++++++++++++++++++++++++++++- test_matching_logic.py | 63 ++++++++++++++- 2 files changed, 229 insertions(+), 4 deletions(-) diff --git a/0_config.py b/0_config.py index acaefc3..88d59cf 100644 --- a/0_config.py +++ b/0_config.py @@ -982,6 +982,9 @@ def the_same_feature_type(tags_a, tags_b): return True bare_trade = None other_one = None + if tags_a.get("amenity") == "fast_food" and tags_b.get("amenity") == "fast_food": + # one of them may have say shop=bakery + return True if tags_a.get("shop") == "trade" and "trade" not in tags_a: bare_trade = tags_a other_one = tags_b @@ -989,7 +992,7 @@ def the_same_feature_type(tags_a, tags_b): bare_trade = tags_b other_one = tags_a if bare_trade != None: - if other_one.get("shop") in ["trade", "tractor", "agrarian"] or other_one.get("craft") in ["agricultural_engines"]: + if other_one.get("shop") in ["trade", "tractor", "agrarian", "tiles", "flooring"] or other_one.get("craft") in ["agricultural_engines"]: # we want to match say # shop=trade trade=tiles # with @@ -1301,7 +1304,30 @@ def clear_type_conflicts(): 'amenity=animal_boarding vs shop=money_lender', 'amenity=animal_boarding vs shop=storage_rental', 'amenity=animal_boarding vs shop=tyres', + 'shop=money_lender vs shop=motorcycle_repair', + 'shop=money_lender vs shop=paint', + 'shop=money_lender vs shop=perfumery', + 'shop=money_lender vs shop=shoes', + 'shop=money_lender vs shop=sports', + 'shop=money_lender vs shop=storage_rental', + 'shop=money_lender vs shop=tyres', + 'shop=mobile_phone_accessories vs shop=window_blind', + 'shop=mobile_phone_accessories vs shop=windows', + 'shop=mobile_phone_accessories vs sweet bakery', + 'shop=mobile_phone_accessories vs toy_shop', 'amenity=bank vs shop=outdoor', + 'shop=health_food vs toy_shop', + 'shop=health_food vs shop=tobacco', + 'shop=health_food vs shop=travel_agency', + 'shop=health_food vs shop=weapons', + 'shop=motorcycle vs shop=stationery', + 'shop=motorcycle vs shop=storage_rental', + 'shop=motorcycle vs shop=travel_agency', + 'shop=motorcycle vs shop=window_blind', + 'shop=kitchen vs shop=travel_agency', + 'shop=kitchen vs shop=tyres', + 'shop=kitchen vs shop=travel_agency', + 'shop=kitchen vs shop=tyres', 'amenity=atm vs amenity=bank', 'amenity=atm vs convenience/supermarket', 'amenity=bank vs amenity=biergarten', @@ -3776,7 +3802,52 @@ def clear_type_conflicts(): 'amenity=post_office vs shop=bakery', 'amenity=post_office vs craft=cleaning', 'amenity=post_office vs craft=electronics_repair', + 'shop=art vs shop=travel_agency', + 'shop=art vs shop=travel_agency', + 'amenity=post_office vs craft=handicraft', + 'amenity=post_office vs craft=handicraft', + 'craft=handicraft vs parcel_locker', + 'craft=handicraft vs parcel_locker', + 'craft=handicraft vs parcel_locker', + 'craft=handicraft vs parcel_locker', + 'craft=handicraft vs parcel_locker', + 'craft=handicraft vs parcel_locker', + 'craft=handicraft vs parcel_locker', + 'craft=handicraft vs parcel_locker', + 'craft=handicraft vs parcel_locker', + 'craft=handicraft vs parcel_locker', + 'shop=sports vs shop=tailor', + 'shop=sports vs shop=tattoo', + 'shop=sports vs shop=ticket', + 'shop=sports vs shop=tiles', + 'shop=sports vs shop=trade', + 'shop=sports vs shop=travel_agency', + 'shop=sports vs shop=tyres', + 'shop=sports vs shop=video_games', + 'shop=paint vs shop=pawnbroker', + 'shop=paint vs shop=pet', + 'shop=paint vs shop=pet_grooming', + 'shop=paint vs shop=photo', + 'shop=paint vs shop=pottery', + 'shop=paint vs shop=psychic', + 'shop=paint vs shop=scuba_diving', + 'shop=paint vs shop=sewing', + 'shop=paint vs shop=swimming_pool', + 'shop=paint vs shop=trophy', + 'shop=paint vs shop=truck_repair', + 'shop=paint vs shop=tyres', + 'shop=paint vs shop=window_blind', + 'shop=party vs shop=swimming_pool', 'amenity=post_office vs craft=glaziery', + 'shop=new_age vs shop=storage_rental', + 'shop=newsagent vs shop=optician', + 'shop=newsagent vs shop=pawnbroker', + 'shop=newsagent vs shop=perfumery', + 'shop=newsagent vs shop=pet', + 'shop=newsagent vs shop=pet_grooming', + 'shop=newsagent vs shop=photo', + 'shop=newsagent vs shop=printing', + 'shop=newsagent vs shop=radiotechnics', 'amenity=post_office vs craft=hvac', 'amenity=post_office vs craft=metal_construction', 'amenity=post_office vs craft=pottery', @@ -6654,11 +6725,100 @@ def clear_type_conflicts(): 'shop=ticket vs sweet bakery', 'shop=video vs sweet bakery', 'shop=weapons vs toy_shop', + 'shop=nuts vs shop=paint', + 'shop=optician vs shop=pet_grooming', + 'shop=optician vs shop=radiotechnics', + 'shop=optician vs shop=seafood', + 'shop=optician vs shop=sewing', + 'shop=optician vs shop=tattoo', + 'shop=optician vs shop=tea', + 'shop=optician vs shop=truck', + 'shop=mobile_phone_accessories vs shop=tailor', + 'shop=mobile_phone_accessories vs shop=tea', + 'shop=mobile_phone_accessories vs shop=ticket', + 'shop=mobile_phone_accessories vs shop=tiles', + 'shop=mobile_phone_accessories vs shop=tobacco', + 'shop=mobile_phone_accessories vs shop=trade', + 'shop=mobile_phone_accessories vs shop=travel_agency', + 'shop=mobile_phone_accessories vs shop=video', + 'shop=mobile_phone_accessories vs shop=weapons', + 'shop=medical_supply vs toy_shop', + 'shop=mall vs shop=mobile_phone_accessories', + 'shop=mall vs shop=newsagent', + 'shop=mall vs shop=nutrition_supplements', + 'shop=mall vs shop=optician', + 'shop=mall vs shop=outdoor', + 'shop=mall vs shop=paint', + 'shop=mall vs shop=perfumery', + 'shop=mall vs shop=pet', + 'shop=mall vs shop=shoes', + 'shop=mall vs shop=sports', + 'shop=mall vs shop=stationery', + 'shop=mall vs shop=storage_rental', + 'shop=mall vs shop=travel_agency', + 'shop=mall vs shop=tyres', + 'shop=maps vs shop=travel_agency', + 'shop=massage vs shop=mobile_phone_accessories', + 'shop=massage vs shop=optician', + 'shop=massage vs shop=paint', + 'shop=massage vs shop=shoes', + 'shop=massage vs shop=sports', + 'shop=massage vs shop=stationery', + 'shop=massage vs shop=travel_agency', + 'shop=massage vs shop=tyres', + 'shop=lighting vs shop=mobile_phone_accessories', + 'shop=lighting vs shop=newsagent', + 'shop=lighting vs shop=optician', + 'shop=lighting vs shop=perfumery', + 'shop=lighting vs shop=shoes', + 'shop=lighting vs shop=sports', + 'shop=lighting vs shop=travel_agency', + 'shop=lighting vs shop=tyres', + 'shop=jewelry vs shop=perfumery', + 'shop=jewelry vs shop=pet', + 'shop=health_food vs shop=paint', + 'shop=health_food vs shop=photo', + 'shop=hairdresser vs shop=pet_grooming', + 'shop=frozen_food vs shop=kitchen', + 'shop=frozen_food vs shop=locksmith', + 'shop=frozen_food vs shop=mall', + 'shop=frozen_food vs shop=optician', + 'shop=frozen_food vs shop=paint', + 'shop=frozen_food vs shop=travel_agency', + 'shop=florist vs shop=greengrocer', + 'shop=florist vs shop=hairdresser', + 'shop=florist vs shop=household_linen', + 'shop=florist vs shop=hvac', + 'shop=florist vs shop=jewelry', + 'shop=florist vs shop=kiosk', + 'shop=florist vs shop=kitchen', + 'shop=florist vs shop=mall', + 'shop=florist vs shop=medical_supply', + 'shop=florist vs shop=newsagent', + 'shop=florist vs shop=outdoor', + 'shop=florist vs shop=perfumery', + 'shop=florist vs shop=repair', + 'shop=florist vs shop=sports', + 'shop=florist vs shop=tea', + 'shop=florist vs shop=tyres', + 'shop=flooring vs shop=travel_agency', + 'shop=flooring vs shop=tyres', + 'shop=flooring vs shop=video', + 'shop=flooring vs shop=video_games', ] def matching_rather_than_type_conflict(): return [ + 'shop=food vs shop=frozen_food', + 'shop=deli vs shop=frozen_food', + + 'shop=health_food vs shop=herbalist', + + 'shop=flooring vs shop=tiles', + + 'shop=safety_equipment vs shop=trade', + 'shop=cosmetics vs shop=perfumery', 'shop=appliance vs shop=electronics', @@ -6700,6 +6860,12 @@ def matching_rather_than_type_conflict(): 'shop=money_lender vs shop=pawnbroker', + 'shop=car_parts vs shop=tyres', + 'shop=truck vs shop=tyres', + + 'shop=motorcycle_repair vs shop=tyres', + 'shop=motorcycle vs shop=tyres', + 'shop=motorcycle vs shop=motorcycle_repair', 'shop=photo vs shop=photo_studio', @@ -6727,8 +6893,6 @@ def matching_rather_than_type_conflict(): 'shop=doityourself vs shop=wholesale', 'shop=doityourself vs shop=window_blind', - 'shop=car_parts vs shop=tyres', - 'shop=building_materials vs shop=doityourself', 'shop=building_materials vs shop=hardware', diff --git a/test_matching_logic.py b/test_matching_logic.py index 1717ebd..6c85812 100644 --- a/test_matching_logic.py +++ b/test_matching_logic.py @@ -722,10 +722,71 @@ class CanonicalValueTests(unittest.TestCase): 'name': 'Rituals' }), True) - + def test_prepared_food_shops_frozen_vs_deli(self): + self.assertEqual(config.the_same_feature_type({ + '@source_uri': 'https://wesolapani.com/shops', + '@spider': 'wesola_pani', + 'shop': 'frozen_food', + 'addr:full': 'Mariana Domagały 45', + 'addr:state': 'Lesser Poland Voivodeship', + 'addr:country': 'PL', + 'phone': '+48 518 841 784', + 'brand': 'Wesoła Pani', + 'brand:wikidata': 'Q123240454', + 'atp_ref': '53', + 'opening_hours_in_atp_format': 'Mo-Fr 09:00-21:00; Sa 09:00-20:00', + 'name': 'Wesoła Pani' +}, { + 'food': 'prepared_meals', + 'name': 'Wesoła Pani', + 'opening_hours': 'Mo-Fr 09:00-21:00; Sa 09:00-20:00', + 'payment:credit_cards': 'yes', + 'payment:debit_cards': 'yes', + 'shop': 'deli' +}), True) + + def test_prepared_food_shops_frozen_vs_food_shop(self): + self.assertEqual(config.the_same_feature_type({ + '@source_uri': 'https://wesolapani.com/shops', + '@spider': 'wesola_pani', + 'shop': 'frozen_food', + 'addr:full': 'Mariana Domagały 45', + 'addr:state': 'Lesser Poland Voivodeship', + 'addr:country': 'PL', + 'phone': '+48 518 841 784', + 'brand': 'Wesoła Pani', + 'brand:wikidata': 'Q123240454', + 'atp_ref': '53', + 'opening_hours_in_atp_format': 'Mo-Fr 09:00-21:00; Sa 09:00-20:00', + 'name': 'Wesoła Pani' +}, { + 'food': 'prepared_meals', + 'name': 'Wesoła Pani', + 'opening_hours': 'Mo-Fr 09:00-21:00; Sa 09:00-20:00', + 'payment:credit_cards': 'yes', + 'payment:debit_cards': 'yes', + 'shop': 'food' +}), True) + + def test_bakery_fast_food_place(self): + # (1) amenity=fast_food + # (2) amenity=fast_food + shop=bakery + # (3) shop=bakery + # (1) must match with (2) + # (2) must match with (3) + # (1) may match with (3) - or may not match, not matching is preferable, I think + self.assertEqual(config.the_same_feature_type({'shop': 'bakery', 'amenity': 'fast_food'}, {'shop': 'bakery'}), True) + self.assertEqual(config.the_same_feature_type({'shop': 'bakery', 'amenity': 'fast_food'}, {'amenity': 'fast_food'}), True) + def test_match_tile_shops_tagged_in_a_different_way(self): self.assertEqual(config.the_same_feature_type({'shop': 'trade', 'trade': 'tiles'}, {'shop': 'trade'}), True) self.assertEqual(config.the_same_feature_type({'shop': 'trade', 'trade': 'tiles'}, {'shop': 'tiles'}), True) + self.assertEqual(config.the_same_feature_type({'shop': 'trade', 'trade': 'tiles'}, {'shop': 'flooring'}), True) + self.assertEqual(config.the_same_feature_type({'shop': 'tiles'}, {'shop': 'flooring'}), True) + + def test_match_unqualified_trade_shop_with_tile_shop(self): + self.assertEqual(config.the_same_feature_type({'shop': 'trade'}, {'shop': 'tiles'}), True) + self.assertEqual(config.the_same_feature_type({'shop': 'trade'}, {'shop': 'flooring'}), True) def test_do_not_use_clothes_key_for_usual_clothes_shops(self): self.assertEqual(config.the_same_feature_type({'shop': 'clothes', 'clothes': 'men'}, {'shop': 'clothes', 'clothes': 'women'}), True)