mirror of
https://codeberg.org/matkoniecz/list_how_openstreetmap_can_be_improved_with_alltheplaces_data.git
synced 2025-04-11 10:09:29 +02:00
miserable but working opening hours parser
This commit is contained in:
parent
7706d3ec0f
commit
bff7c10792
2 changed files with 392 additions and 0 deletions
315
opening_hours_parser.py
Normal file
315
opening_hours_parser.py
Normal file
|
@ -0,0 +1,315 @@
|
|||
"""
|
||||
This is a low-quality parser of a tiny part of opening_hours syntax
|
||||
It definitely can be done better. I would appreciate pointers to better
|
||||
ways to achive it in Python.
|
||||
It is definitely miserably slow.
|
||||
|
||||
It may be enough here as ATP does not emit elaborate opening_hours syntax
|
||||
So any opening hours that need them will mismatch with ATP opening hours by default
|
||||
|
||||
opening_hours key is described at https://wiki.openstreetmap.org/wiki/Key:opening_hours
|
||||
formal specification is at https://wiki.openstreetmap.org/wiki/Key:opening_hours/specification
|
||||
verification tool is at https://openingh.openstreetmap.de/evaluation_tool/?setLng=en
|
||||
|
||||
For supported examples see test_opening_hours_parser.py file
|
||||
"""
|
||||
import re
|
||||
import rich
|
||||
import time
|
||||
|
||||
class OpeningHours():
|
||||
class WeekdaySelector():
|
||||
def is_valid_day(self, day):
|
||||
return day in ['Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa', 'Su']
|
||||
|
||||
@staticmethod
|
||||
def next_day(day):
|
||||
return {
|
||||
'Mo': 'Tu',
|
||||
'Tu': 'We',
|
||||
'We': 'Th',
|
||||
'Th': 'Fr',
|
||||
'Fr': 'Sa',
|
||||
'Sa': 'Su',
|
||||
'Su': 'Mo',
|
||||
}[day]
|
||||
|
||||
def __init__(self, range_from=None, range_to=None, list_of_days=None):
|
||||
# example parameters
|
||||
# range_from=None, range_to=None, list_of_days=['Mo', 'Su']
|
||||
# range_from='Mo', range_to='Tu', list_of_days=['Fr', 'Su']
|
||||
self.selected_days = {
|
||||
'Mo': False,
|
||||
'Tu': False,
|
||||
'We': False,
|
||||
'Th': False,
|
||||
'Fr': False,
|
||||
'Sa': False,
|
||||
'Su': False,
|
||||
}
|
||||
if range_from != None:
|
||||
if self.is_valid_day(range_from) != True:
|
||||
raise Exception("`"+str(range_from) + "` is not valid") #TODO be resistant to borked data, or maybe exceptions are fine?
|
||||
if self.is_valid_day(range_to) != True:
|
||||
raise Exception("`"+str(range_to) + "` is not valid") #TODO be resistant to borked data, or maybe exceptions are fine?
|
||||
day = range_from
|
||||
self.selected_days[day] = True
|
||||
while True:
|
||||
day = OpeningHours.WeekdaySelector.next_day(day)
|
||||
self.selected_days[day] = True
|
||||
if day == range_to:
|
||||
break
|
||||
if list_of_days != None:
|
||||
for day in list_of_days:
|
||||
if self.is_valid_day(day) != True:
|
||||
raise Exception("`"+str(day) + "` is not valid") #TODO be resistant to borked data, or maybe exceptions are fine?
|
||||
self.selected_days[day] = True
|
||||
|
||||
class TimeSelector():
|
||||
def __init__(self, from_hours, from_minutes, to_hours, to_minutes):
|
||||
self.from_hours = int(from_hours)
|
||||
self.from_minutes = int(from_minutes)
|
||||
self.to_hours = int(to_hours)
|
||||
self.to_minutes = int(to_minutes)
|
||||
def from_time(self):
|
||||
return self.from_hours * 60 + self.from_minutes
|
||||
def to_time(self):
|
||||
return self.to_hours * 60 + self.to_minutes
|
||||
def __str__(self):
|
||||
return f"{self.from_hours}:{self.from_minutes}-{self.to_hours}:{self.to_minutes}"
|
||||
def __rich__(self):
|
||||
return self.__str__()
|
||||
def __lt__(self, other):
|
||||
return self.from_hours * 60 + self.from_minutes < other.from_hours * 60 + other.from_minutes
|
||||
def __init__(self, opening_hours_string, respect_semicolon_override=True):
|
||||
self.respect_semicolon_override = respect_semicolon_override # TODO remove that workaround for ATP
|
||||
self.raw = opening_hours_string
|
||||
print("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
self.parsed = self.parse(self.raw)
|
||||
print("parsed - shown below")
|
||||
self.display()
|
||||
print("parsed")
|
||||
|
||||
def parse(self, opening_hours_string):
|
||||
print("trying to parse", opening_hours_string)
|
||||
if '"' in self.raw:
|
||||
# comment? giving up immediately
|
||||
return
|
||||
if opening_hours_string == "24/7":
|
||||
return self.parse("Mo-Su 00:00-24:00")
|
||||
ongoing_parsing_per_rule_sequence = []
|
||||
for rule_sequence in opening_hours_string.split(";"):
|
||||
ongoing_parsing = []
|
||||
remaining_part = rule_sequence
|
||||
while True:
|
||||
parsed_part = self.parse_small_range_selectors(remaining_part)
|
||||
if parsed_part == None:
|
||||
return None
|
||||
if 'time_selector_off_mode' in parsed_part:
|
||||
# TODO off marker may override earlier groups! Just ignoring them is problematic Add test
|
||||
remaining_part = parsed_part['remaining_part']
|
||||
else:
|
||||
ongoing_parsing.append({'weekdays_selector': parsed_part['weekdays_selector'], 'time_selectors': [parsed_part['time_selector']]})
|
||||
remaining_part = parsed_part['remaining_part']
|
||||
# either remaining_part is empty and we finished parsing of this part
|
||||
#
|
||||
# or what remains may look like
|
||||
# ,Sa 06:00-23:00
|
||||
# then it is simply another day range
|
||||
# but it may also look like
|
||||
# ,06:00-23:00
|
||||
# then it is another time range to be applied here
|
||||
#
|
||||
# anything else? we give up
|
||||
while True:
|
||||
if remaining_part == "":
|
||||
# finished parsing of this part
|
||||
break
|
||||
print(remaining_part)
|
||||
if remaining_part[0] != ",":
|
||||
print("EXPECTED COMMA, SORRY!")
|
||||
return None # unexpected
|
||||
remaining_part = remaining_part[1:]
|
||||
print(remaining_part)
|
||||
day_probe = r"^\s*(Mo|Tu|We|Th|Fr|Sa|Su)"
|
||||
day_probe_match = re.search(day_probe, remaining_part)
|
||||
if day_probe_match != None:
|
||||
break
|
||||
# ready for next round of parsing
|
||||
else:
|
||||
# next hours of the same day
|
||||
parsed_part = self.parse_time_selector(remaining_part)
|
||||
if parsed_part == None:
|
||||
print("SOMETHING FAILED!")
|
||||
return None
|
||||
remaining_part = parsed_part['remaining_part']
|
||||
# applies to the same range as previous time selector already addded to the list
|
||||
ongoing_parsing[-1]['time_selectors'].append(parsed_part['time_selector'])
|
||||
if remaining_part == "":
|
||||
# finished parsing of this part
|
||||
break
|
||||
ongoing_parsing_per_rule_sequence.append(ongoing_parsing)
|
||||
# decompose ranges into specific days
|
||||
returned = {
|
||||
'Mo': [],
|
||||
'Tu': [],
|
||||
'We': [],
|
||||
'Th': [],
|
||||
'Fr': [],
|
||||
'Sa': [],
|
||||
'Su': [],
|
||||
}
|
||||
# apply overrides caused by semicolons
|
||||
rich.print(ongoing_parsing_per_rule_sequence)
|
||||
for from_one_rule_sequence in ongoing_parsing_per_rule_sequence:
|
||||
rich.print("&&&&&&&&&&&&&&&& from_one_rule_sequence")
|
||||
for entry in from_one_rule_sequence:
|
||||
for day, enabled in entry['weekdays_selector'].selected_days.items():
|
||||
if enabled:
|
||||
if self.respect_semicolon_override:
|
||||
# note that over-midnight overhang does not reset previous days
|
||||
# see say We 09:00-10:00,Tu 06:00-07:00;Tu 10:00-02:00
|
||||
# ";Tu 10:00-02:00" resets Tuesday part from previous rule
|
||||
# but not Wednesday part despite modifying both days
|
||||
returned[day] = []
|
||||
for entry in from_one_rule_sequence:
|
||||
for day, enabled in entry['weekdays_selector'].selected_days.items():
|
||||
if enabled:
|
||||
for time_selector in entry['time_selectors']:
|
||||
if time_selector.from_time() < time_selector.to_time() and time_selector.to_hours <= 23:
|
||||
# regular range
|
||||
returned[day].append(time_selector)
|
||||
else:
|
||||
# over midnight range
|
||||
canonical_to_hours_for_next_day = time_selector.to_hours
|
||||
if canonical_to_hours_for_next_day > 24:
|
||||
canonical_to_hours_for_next_day -= 24
|
||||
canonical_to_minutes_for_next_day = time_selector.to_minutes
|
||||
time_selector.to_hours = 24
|
||||
time_selector.to_minutes = 0
|
||||
returned[day].append(time_selector)
|
||||
returned[OpeningHours.WeekdaySelector.next_day(day)].append(OpeningHours.TimeSelector(from_hours=0, from_minutes=0, to_hours=canonical_to_hours_for_next_day, to_minutes=canonical_to_minutes_for_next_day))
|
||||
# merge multiple redundant time selectors for each day
|
||||
for day in returned.keys():
|
||||
print(day)
|
||||
merged_time_selectors = []
|
||||
for time_selector in sorted(returned[day]):
|
||||
print(time_selector)
|
||||
if time_selector.from_time() == time_selector.to_time():
|
||||
# drop degenerated empty range
|
||||
continue
|
||||
if len(merged_time_selectors) == 0:
|
||||
# first one, so nothing to merge with
|
||||
merged_time_selectors.append(time_selector)
|
||||
elif merged_time_selectors[-1].to_time() < time_selector.from_time():
|
||||
# gap between ranges
|
||||
merged_time_selectors.append(time_selector)
|
||||
else:
|
||||
# merging!
|
||||
# we sorted by start time, so new range starts later or at the same time
|
||||
# so we may need to extend end time, but never start time
|
||||
if time_selector.to_time() > merged_time_selectors[-1].to_time():
|
||||
# lets extend!
|
||||
merged_time_selectors[-1].to_hours = time_selector.to_hours
|
||||
merged_time_selectors[-1].to_minutes = time_selector.to_minutes
|
||||
else:
|
||||
# new time_selector is within already specified range
|
||||
pass
|
||||
returned[day] = merged_time_selectors
|
||||
return returned
|
||||
|
||||
def parse_time_selector(self, remaining_part):
|
||||
print("searching for time_selector in", remaining_part)
|
||||
time_selector_pattern = r"^\s*((off|closed)|(\d+):(\d+)\s*-\s*(\d+):(\d+))" # 10:00-18:00
|
||||
time_selector = re.search(time_selector_pattern, remaining_part)
|
||||
if not time_selector:
|
||||
print("time_selector not found")
|
||||
print()
|
||||
return None
|
||||
|
||||
consumed_length = len(time_selector.group(0))
|
||||
off_marker = time_selector.group(2)
|
||||
remaining_part = remaining_part[consumed_length:]
|
||||
if off_marker != None:
|
||||
print("found off marker! Returning remaining_part=",remaining_part)
|
||||
return {'time_selector_off_mode': None, 'remaining_part': remaining_part}
|
||||
# TODO off marker may still have time range!
|
||||
else:
|
||||
from_hours = time_selector.group(3)
|
||||
from_minutes = time_selector.group(4)
|
||||
to_hours = time_selector.group(5)
|
||||
to_minutes = time_selector.group(6)
|
||||
print(f"Matched value: {from_hours}:{from_minutes} - {to_hours}:{to_minutes}")
|
||||
parsed_time_selector = OpeningHours.TimeSelector(from_hours=from_hours, from_minutes=from_minutes, to_hours=to_hours, to_minutes=to_minutes)
|
||||
print(f"Updated string: `{remaining_part}`")
|
||||
print()
|
||||
print()
|
||||
return {'time_selector': parsed_time_selector, 'remaining_part': remaining_part}
|
||||
|
||||
def parse_small_range_selectors(self, remaining_part):
|
||||
print("searching for weekday_selector in", remaining_part)
|
||||
weekday_selector_pattern = r"^\s*(Mo|Tu|We|Th|Fr|Sa|Su)(([,|-])(Mo|Tu|We|Th|Fr|Sa|Su)|)" #Mo,Tu or Mo-Su
|
||||
weekday_selector = re.search(weekday_selector_pattern, remaining_part)
|
||||
if not weekday_selector:
|
||||
return None
|
||||
matched_value = weekday_selector.group(0)
|
||||
remaining_part = remaining_part[len(matched_value):].strip()
|
||||
from_day = weekday_selector.group(1)
|
||||
separator = weekday_selector.group(3)
|
||||
to_day = weekday_selector.group(4)
|
||||
applicable_days = None
|
||||
if separator == "-":
|
||||
print("day range from", from_day, "to", to_day)
|
||||
applicable_days = OpeningHours.WeekdaySelector(range_from=from_day, range_to=to_day, list_of_days=None)
|
||||
rich.print(applicable_days.selected_days)
|
||||
elif separator == None and to_day == None:
|
||||
print("day listing having single day", from_day)
|
||||
applicable_days = OpeningHours.WeekdaySelector(range_from=None, range_to=None, list_of_days=[from_day])
|
||||
rich.print(applicable_days.selected_days)
|
||||
else:
|
||||
print("day listing having", from_day, "and", to_day)
|
||||
applicable_days = OpeningHours.WeekdaySelector(range_from=None, range_to=None, list_of_days=[from_day, to_day])
|
||||
rich.print(applicable_days.selected_days)
|
||||
print(f"Updated string: `{remaining_part}`")
|
||||
|
||||
result = self.parse_time_selector(remaining_part)
|
||||
if result == None:
|
||||
return None
|
||||
result['weekdays_selector'] = applicable_days
|
||||
return result
|
||||
|
||||
def display(self):
|
||||
print()
|
||||
print(self.raw)
|
||||
for day in ['Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa', 'Su']:
|
||||
print(day)
|
||||
for entry in self.parsed[day]:
|
||||
print(" " , entry)
|
||||
|
||||
def __eq__(self, other):
|
||||
print("__eq__")
|
||||
if self.parsed == None:
|
||||
print("Failed to parse", self.raw)
|
||||
return self.raw == other.raw
|
||||
self.display()
|
||||
other.display()
|
||||
for day in ['Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa', 'Su']:
|
||||
if len(self.parsed[day]) != len(other.parsed[day]):
|
||||
return False
|
||||
|
||||
for index in range(len(self.parsed[day])):
|
||||
# both have their time selectors orderedm, so we can compare matching ones
|
||||
if self.parsed[day][index].from_hours != other.parsed[day][index].from_hours:
|
||||
return False
|
||||
if self.parsed[day][index].from_minutes != other.parsed[day][index].from_minutes:
|
||||
return False
|
||||
if self.parsed[day][index].to_hours != other.parsed[day][index].to_hours:
|
||||
return False
|
||||
if self.parsed[day][index].to_minutes != other.parsed[day][index].to_minutes:
|
||||
return False
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
a = OpeningHours("Mo 06:00-23:00;Sa 06:00-23:00")
|
||||
b = OpeningHours("Sa 06:00-23:00;Mo 06:00-23:00")
|
||||
print(a == b)
|
77
test_opening_hours_parser.py
Normal file
77
test_opening_hours_parser.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
import unittest
|
||||
import opening_hours_parser as parser
|
||||
|
||||
class TestOpeningHoursParser(unittest.TestCase):
|
||||
def test_matches_itself(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo-Sa 06:00-23:00"), parser.OpeningHours("Mo-Sa 06:00-23:00"))
|
||||
|
||||
def test_trivial_reordering_match(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo 06:00-23:00;Sa 06:00-23:00"), parser.OpeningHours("Sa 06:00-23:00;Mo 06:00-23:00"))
|
||||
|
||||
def test_simple_over_midnight(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo 06:00-01:00"), parser.OpeningHours("Mo 06:00-24:00;Tu 00:00-01:00"))
|
||||
|
||||
def test_semicolon_and_comma_separator_may_mean_the_same(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo-Fr 06:00-23:00;Sa 06:00-23:00"), parser.OpeningHours("Mo-Fr 06:00-23:00,Sa 06:00-23:00"))
|
||||
self.assertEqual(parser.OpeningHours("Mo-Sa 06:00-23:00; Su off"), parser.OpeningHours("Mo-Sa 06:00-23:00, Su off"))
|
||||
|
||||
def test_semicolon_and_comma_separator_may_change_meaning_for_overnight_range_being_overriden(self):
|
||||
"""
|
||||
Note that
|
||||
Mo-Fr 06:00-02:00;Sa 06:00-23:00
|
||||
and
|
||||
Mo-Fr 06:00-02:00,Sa 06:00-23:00
|
||||
have a different meaning as with `;` early morning on Saturday is not open, as "Sa 06:00-23:00" rule overwrote previous one.
|
||||
While with `,` union product is made and also early Saturday object is open
|
||||
"""
|
||||
#self.assertNotEqual(parser.OpeningHours("Mo-Fr 06:00-02:00;Sa 06:00-23:00"), parser.OpeningHours("Mo-Fr 06:00-02:00,Sa 06:00-23:00")) # TODO ENABLE
|
||||
|
||||
def test_day_ranges_may_be_split_in_pointless_parts(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo-Fr 06:00-23:00,Sa 06:00-23:00"), parser.OpeningHours("Mo-Sa 06:00-23:00"))
|
||||
|
||||
def test_split_overnight_time_ranges(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo 20:00-02:00, Tu 08:00-16:00"), parser.OpeningHours("Mo 20:00-24:00; Tu 00:00-02:00, 08:00-16:00"))
|
||||
|
||||
def test_day_range_vs_comma_separated_days(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo-Sa 06:00-23:00"), parser.OpeningHours("Mo 06:00-23:00,Tu 06:00-23:00, We 06:00-23:00, Th 06:00-23:00, Fr 06:00-23:00, Sa 06:00-23:00"))
|
||||
|
||||
def test_semicolon_separated_day_ranges(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo 06:00-23:00;Tu 06:00-23:00;We 06:00-23:00;Th 06:00-23:00;Fr 06:00-23:00;Sa 06:00-23:00"), parser.OpeningHours("Mo-Sa 06:00-23:00"))
|
||||
|
||||
def test_day_range_and_comma_separated_day_range(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo-Fr 06:00-23:00;Sa 06:00-23:00"), parser.OpeningHours("Mo-Sa 06:00-23:00"))
|
||||
|
||||
def test_comma_separated_time_ranges_on_the_same_day(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo-Sa 06:00-10:00,Mo-Sa 16:00-20:00"), parser.OpeningHours("Mo-Sa 06:00-10:00,16:00-20:00"))
|
||||
|
||||
@unittest.expectedFailure # TODO: implement
|
||||
def test_implicit_day_range(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo-Su 06:00-10:00"), parser.OpeningHours("06:00-10:00"))
|
||||
|
||||
def test_twenty_four_seven(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo-Su 00:00-24:00"), parser.OpeningHours("24/7"))
|
||||
|
||||
def test_different_day_groupings(self):
|
||||
self.assertEqual(parser.OpeningHours("Th-Fr 09:00-21:00"), parser.OpeningHours("Th,Fr 09:00-21:00"))
|
||||
self.assertEqual(parser.OpeningHours("Mo-We 09:00-17:30; Th-Fr 09:00-21:00; Sa 09:00-17:00; Su 10:00-17:00"), parser.OpeningHours("Mo-We 09:00-17:30; Th,Fr 09:00-21:00; Sa 09:00-17:00; Su 10:00-17:00"))
|
||||
|
||||
@unittest.expectedFailure # TODO: implement
|
||||
def test_list_of_days_may_be_longer_than_two(self):
|
||||
self.assertEqual(parser.OpeningHours("Th-Sa 09:00-21:00"), parser.OpeningHours("Th,Fr,Sa 09:00-21:00"))
|
||||
|
||||
def test_noting_days_off_is_optional(self):
|
||||
self.assertEqual(parser.OpeningHours("Mo-Sa 06:00-23:00; Su off"), parser.OpeningHours("Mo-Sa 06:00-23:00"))
|
||||
|
||||
def test_two_ways_of_writting_off(self):
|
||||
self.assertEqual(parser.OpeningHours("Su off"), parser.OpeningHours("Su closed"))
|
||||
|
||||
def test_different_meaning_different_day_range(self):
|
||||
self.assertNotEqual(parser.OpeningHours("Mo-Sa 06:00-23:00"), parser.OpeningHours("Mo-Su 06:00-23:00"))
|
||||
|
||||
# We 09:00-10:00,Tu 10:00-02:00
|
||||
# https://openingh.openstreetmap.de/evaluation_tool/?EXP=We%2009%3A00-10%3A00%2CTu%2010%3A00-02%3A00&lat=48.7769&lon=9.1844&mode=0&DATE=1737977880000
|
||||
# TODO
|
||||
|
||||
# We 09:00-09:00
|
||||
# https://openingh.openstreetmap.de/evaluation_tool/?EXP=We%2009%3A00-09%3A00&lat=48.7769&lon=9.1844&mode=0&DATE=1737977880000
|
||||
# TODO
|
Loading…
Add table
Reference in a new issue