1
0
Fork 0

fix some wikidata problem on Wikidata itself

This commit is contained in:
Mateusz Konieczny 2025-01-30 17:35:13 +01:00
parent d1bab12968
commit 6c5e983d98
4 changed files with 6 additions and 15 deletions

View file

@ -34,12 +34,8 @@ def skipping_any_of_this_wikidata_ids(osm_wikidata_id, atp_wikidata_id):
# test_wikidata.py covers these, test is skipped as long as entries are
# listed here in skipping_any_of_this_wikidata_ids
"Q5835668", "Q806653", # Santander vs Santander Bank Polska
"Q117547405", "Q7592120", # https://github.com/alltheplaces/alltheplaces/pull/12053
"Q117547405", "Q7592120", # https://github.com/alltheplaces/alltheplaces/pull/12053 TODO_MERGED
'Q4748528', 'Q5023980', # argh? TODO_LOW_PRIORITY
'Q123015694', 'Q527877', # strongly related, Wikidata fails to model it TODO_WIKIDATA https://www.wikidata.org/w/index.php?title=Talk%3AQ123015694#Carhartt_Work_in_Progress_is_related_to_Carhartt
'Q9345945', 'Q110801183', 'Q110801119', # all entries describe single brand (including subbrands) TODO_WIKIDATA
"Q3212934", "Q533415", # former competitor, now subbbrand TODO_WIKIDATA
]:
return True

View file

@ -228,8 +228,6 @@ class ATPGivesTagsReportCreator:
tested_key = "website"
if tested_key not in atp.atp_tags:
return False
if atp.atp_tags['@spider'] in ['paczkomat_inpost_pl']: # https://github.com/alltheplaces/alltheplaces/issues/9721
return False
returned = self.url_checker.is_website_eligible(atp, cache_only)
if returned == None:
return False # not cached, instructed to use only cache

View file

@ -23,8 +23,6 @@ class ConfigTests(unittest.TestCase):
self.assertEqual(wikidata.is_actually_the_same_according_to_wikidata_structure("Q110970254", "Q3182097"), True)
def test_national_subbrand(self):
if mismatch_detector_script.skipping_any_of_this_wikidata_ids("Q5835668", "Q806653"):
return
# https://www.wikidata.org/wiki/Q806653 Santander Bank Polska
# https://www.wikidata.org/wiki/Q5835668 Santander Bank
self.assertEqual(wikidata.is_actually_the_same_according_to_wikidata_structure("Q5835668", "Q806653"), True)
@ -43,14 +41,8 @@ class ConfigTests(unittest.TestCase):
self.assertEqual(wikidata.is_actually_the_same_according_to_wikidata_structure("Q5023980", "Q4748528"), True)
def test_subbrands(self):
if mismatch_detector_script.skipping_any_of_this_wikidata_ids("Q9345945", "Q110801183"):
return
if mismatch_detector_script.skipping_any_of_this_wikidata_ids("Q110801119", "Q110801183"):
return
self.assertEqual(wikidata.is_actually_the_same_according_to_wikidata_structure("Q9345945", "Q110801183"), True)
self.assertEqual(wikidata.is_actually_the_same_according_to_wikidata_structure("Q110801183", "Q110801119"), True)
def test_subbrands_of_former_competitor(self):
if mismatch_detector_script.skipping_any_of_this_wikidata_ids("Q3212934", "Q533415"):
return
self.assertEqual(wikidata.is_actually_the_same_according_to_wikidata_structure("Q3212934", "Q533415"), True)

View file

@ -10,6 +10,11 @@ def cache_path():
wikidata_cache = diskcache.Cache(cache_path(), eviction_policy="none")
print(len(wikidata_cache), "entries cached by Wikidata cache")
def purge_from_cache(wikidata_id):
for entry in wikidata_cache:
if wikidata_id + "_" in entry:
del wikidata_cache[entry]
print(entry)
def get_wikidata_label(wikidata_id):
index = wikidata_id + "_label"