diff --git a/opening_hours_parser.py b/opening_hours_parser.py new file mode 100644 index 0000000..9c23b59 --- /dev/null +++ b/opening_hours_parser.py @@ -0,0 +1,315 @@ +""" +This is a low-quality parser of a tiny part of opening_hours syntax +It definitely can be done better. I would appreciate pointers to better +ways to achive it in Python. +It is definitely miserably slow. + +It may be enough here as ATP does not emit elaborate opening_hours syntax +So any opening hours that need them will mismatch with ATP opening hours by default + +opening_hours key is described at https://wiki.openstreetmap.org/wiki/Key:opening_hours +formal specification is at https://wiki.openstreetmap.org/wiki/Key:opening_hours/specification +verification tool is at https://openingh.openstreetmap.de/evaluation_tool/?setLng=en + +For supported examples see test_opening_hours_parser.py file +""" +import re +import rich +import time + +class OpeningHours(): + class WeekdaySelector(): + def is_valid_day(self, day): + return day in ['Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa', 'Su'] + + @staticmethod + def next_day(day): + return { + 'Mo': 'Tu', + 'Tu': 'We', + 'We': 'Th', + 'Th': 'Fr', + 'Fr': 'Sa', + 'Sa': 'Su', + 'Su': 'Mo', + }[day] + + def __init__(self, range_from=None, range_to=None, list_of_days=None): + # example parameters + # range_from=None, range_to=None, list_of_days=['Mo', 'Su'] + # range_from='Mo', range_to='Tu', list_of_days=['Fr', 'Su'] + self.selected_days = { + 'Mo': False, + 'Tu': False, + 'We': False, + 'Th': False, + 'Fr': False, + 'Sa': False, + 'Su': False, + } + if range_from != None: + if self.is_valid_day(range_from) != True: + raise Exception("`"+str(range_from) + "` is not valid") #TODO be resistant to borked data, or maybe exceptions are fine? + if self.is_valid_day(range_to) != True: + raise Exception("`"+str(range_to) + "` is not valid") #TODO be resistant to borked data, or maybe exceptions are fine? + day = range_from + self.selected_days[day] = True + while True: + day = OpeningHours.WeekdaySelector.next_day(day) + self.selected_days[day] = True + if day == range_to: + break + if list_of_days != None: + for day in list_of_days: + if self.is_valid_day(day) != True: + raise Exception("`"+str(day) + "` is not valid") #TODO be resistant to borked data, or maybe exceptions are fine? + self.selected_days[day] = True + + class TimeSelector(): + def __init__(self, from_hours, from_minutes, to_hours, to_minutes): + self.from_hours = int(from_hours) + self.from_minutes = int(from_minutes) + self.to_hours = int(to_hours) + self.to_minutes = int(to_minutes) + def from_time(self): + return self.from_hours * 60 + self.from_minutes + def to_time(self): + return self.to_hours * 60 + self.to_minutes + def __str__(self): + return f"{self.from_hours}:{self.from_minutes}-{self.to_hours}:{self.to_minutes}" + def __rich__(self): + return self.__str__() + def __lt__(self, other): + return self.from_hours * 60 + self.from_minutes < other.from_hours * 60 + other.from_minutes + def __init__(self, opening_hours_string, respect_semicolon_override=True): + self.respect_semicolon_override = respect_semicolon_override # TODO remove that workaround for ATP + self.raw = opening_hours_string + print("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") + self.parsed = self.parse(self.raw) + print("parsed - shown below") + self.display() + print("parsed") + + def parse(self, opening_hours_string): + print("trying to parse", opening_hours_string) + if '"' in self.raw: + # comment? giving up immediately + return + if opening_hours_string == "24/7": + return self.parse("Mo-Su 00:00-24:00") + ongoing_parsing_per_rule_sequence = [] + for rule_sequence in opening_hours_string.split(";"): + ongoing_parsing = [] + remaining_part = rule_sequence + while True: + parsed_part = self.parse_small_range_selectors(remaining_part) + if parsed_part == None: + return None + if 'time_selector_off_mode' in parsed_part: + # TODO off marker may override earlier groups! Just ignoring them is problematic Add test + remaining_part = parsed_part['remaining_part'] + else: + ongoing_parsing.append({'weekdays_selector': parsed_part['weekdays_selector'], 'time_selectors': [parsed_part['time_selector']]}) + remaining_part = parsed_part['remaining_part'] + # either remaining_part is empty and we finished parsing of this part + # + # or what remains may look like + # ,Sa 06:00-23:00 + # then it is simply another day range + # but it may also look like + # ,06:00-23:00 + # then it is another time range to be applied here + # + # anything else? we give up + while True: + if remaining_part == "": + # finished parsing of this part + break + print(remaining_part) + if remaining_part[0] != ",": + print("EXPECTED COMMA, SORRY!") + return None # unexpected + remaining_part = remaining_part[1:] + print(remaining_part) + day_probe = r"^\s*(Mo|Tu|We|Th|Fr|Sa|Su)" + day_probe_match = re.search(day_probe, remaining_part) + if day_probe_match != None: + break + # ready for next round of parsing + else: + # next hours of the same day + parsed_part = self.parse_time_selector(remaining_part) + if parsed_part == None: + print("SOMETHING FAILED!") + return None + remaining_part = parsed_part['remaining_part'] + # applies to the same range as previous time selector already addded to the list + ongoing_parsing[-1]['time_selectors'].append(parsed_part['time_selector']) + if remaining_part == "": + # finished parsing of this part + break + ongoing_parsing_per_rule_sequence.append(ongoing_parsing) + # decompose ranges into specific days + returned = { + 'Mo': [], + 'Tu': [], + 'We': [], + 'Th': [], + 'Fr': [], + 'Sa': [], + 'Su': [], + } + # apply overrides caused by semicolons + rich.print(ongoing_parsing_per_rule_sequence) + for from_one_rule_sequence in ongoing_parsing_per_rule_sequence: + rich.print("&&&&&&&&&&&&&&&& from_one_rule_sequence") + for entry in from_one_rule_sequence: + for day, enabled in entry['weekdays_selector'].selected_days.items(): + if enabled: + if self.respect_semicolon_override: + # note that over-midnight overhang does not reset previous days + # see say We 09:00-10:00,Tu 06:00-07:00;Tu 10:00-02:00 + # ";Tu 10:00-02:00" resets Tuesday part from previous rule + # but not Wednesday part despite modifying both days + returned[day] = [] + for entry in from_one_rule_sequence: + for day, enabled in entry['weekdays_selector'].selected_days.items(): + if enabled: + for time_selector in entry['time_selectors']: + if time_selector.from_time() < time_selector.to_time() and time_selector.to_hours <= 23: + # regular range + returned[day].append(time_selector) + else: + # over midnight range + canonical_to_hours_for_next_day = time_selector.to_hours + if canonical_to_hours_for_next_day > 24: + canonical_to_hours_for_next_day -= 24 + canonical_to_minutes_for_next_day = time_selector.to_minutes + time_selector.to_hours = 24 + time_selector.to_minutes = 0 + returned[day].append(time_selector) + returned[OpeningHours.WeekdaySelector.next_day(day)].append(OpeningHours.TimeSelector(from_hours=0, from_minutes=0, to_hours=canonical_to_hours_for_next_day, to_minutes=canonical_to_minutes_for_next_day)) + # merge multiple redundant time selectors for each day + for day in returned.keys(): + print(day) + merged_time_selectors = [] + for time_selector in sorted(returned[day]): + print(time_selector) + if time_selector.from_time() == time_selector.to_time(): + # drop degenerated empty range + continue + if len(merged_time_selectors) == 0: + # first one, so nothing to merge with + merged_time_selectors.append(time_selector) + elif merged_time_selectors[-1].to_time() < time_selector.from_time(): + # gap between ranges + merged_time_selectors.append(time_selector) + else: + # merging! + # we sorted by start time, so new range starts later or at the same time + # so we may need to extend end time, but never start time + if time_selector.to_time() > merged_time_selectors[-1].to_time(): + # lets extend! + merged_time_selectors[-1].to_hours = time_selector.to_hours + merged_time_selectors[-1].to_minutes = time_selector.to_minutes + else: + # new time_selector is within already specified range + pass + returned[day] = merged_time_selectors + return returned + + def parse_time_selector(self, remaining_part): + print("searching for time_selector in", remaining_part) + time_selector_pattern = r"^\s*((off|closed)|(\d+):(\d+)\s*-\s*(\d+):(\d+))" # 10:00-18:00 + time_selector = re.search(time_selector_pattern, remaining_part) + if not time_selector: + print("time_selector not found") + print() + return None + + consumed_length = len(time_selector.group(0)) + off_marker = time_selector.group(2) + remaining_part = remaining_part[consumed_length:] + if off_marker != None: + print("found off marker! Returning remaining_part=",remaining_part) + return {'time_selector_off_mode': None, 'remaining_part': remaining_part} + # TODO off marker may still have time range! + else: + from_hours = time_selector.group(3) + from_minutes = time_selector.group(4) + to_hours = time_selector.group(5) + to_minutes = time_selector.group(6) + print(f"Matched value: {from_hours}:{from_minutes} - {to_hours}:{to_minutes}") + parsed_time_selector = OpeningHours.TimeSelector(from_hours=from_hours, from_minutes=from_minutes, to_hours=to_hours, to_minutes=to_minutes) + print(f"Updated string: `{remaining_part}`") + print() + print() + return {'time_selector': parsed_time_selector, 'remaining_part': remaining_part} + + def parse_small_range_selectors(self, remaining_part): + print("searching for weekday_selector in", remaining_part) + weekday_selector_pattern = r"^\s*(Mo|Tu|We|Th|Fr|Sa|Su)(([,|-])(Mo|Tu|We|Th|Fr|Sa|Su)|)" #Mo,Tu or Mo-Su + weekday_selector = re.search(weekday_selector_pattern, remaining_part) + if not weekday_selector: + return None + matched_value = weekday_selector.group(0) + remaining_part = remaining_part[len(matched_value):].strip() + from_day = weekday_selector.group(1) + separator = weekday_selector.group(3) + to_day = weekday_selector.group(4) + applicable_days = None + if separator == "-": + print("day range from", from_day, "to", to_day) + applicable_days = OpeningHours.WeekdaySelector(range_from=from_day, range_to=to_day, list_of_days=None) + rich.print(applicable_days.selected_days) + elif separator == None and to_day == None: + print("day listing having single day", from_day) + applicable_days = OpeningHours.WeekdaySelector(range_from=None, range_to=None, list_of_days=[from_day]) + rich.print(applicable_days.selected_days) + else: + print("day listing having", from_day, "and", to_day) + applicable_days = OpeningHours.WeekdaySelector(range_from=None, range_to=None, list_of_days=[from_day, to_day]) + rich.print(applicable_days.selected_days) + print(f"Updated string: `{remaining_part}`") + + result = self.parse_time_selector(remaining_part) + if result == None: + return None + result['weekdays_selector'] = applicable_days + return result + + def display(self): + print() + print(self.raw) + for day in ['Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa', 'Su']: + print(day) + for entry in self.parsed[day]: + print(" " , entry) + + def __eq__(self, other): + print("__eq__") + if self.parsed == None: + print("Failed to parse", self.raw) + return self.raw == other.raw + self.display() + other.display() + for day in ['Mo', 'Tu', 'We', 'Th', 'Fr', 'Sa', 'Su']: + if len(self.parsed[day]) != len(other.parsed[day]): + return False + + for index in range(len(self.parsed[day])): + # both have their time selectors orderedm, so we can compare matching ones + if self.parsed[day][index].from_hours != other.parsed[day][index].from_hours: + return False + if self.parsed[day][index].from_minutes != other.parsed[day][index].from_minutes: + return False + if self.parsed[day][index].to_hours != other.parsed[day][index].to_hours: + return False + if self.parsed[day][index].to_minutes != other.parsed[day][index].to_minutes: + return False + return True + +if __name__ == "__main__": + a = OpeningHours("Mo 06:00-23:00;Sa 06:00-23:00") + b = OpeningHours("Sa 06:00-23:00;Mo 06:00-23:00") + print(a == b) diff --git a/test_opening_hours_parser.py b/test_opening_hours_parser.py new file mode 100644 index 0000000..95c5315 --- /dev/null +++ b/test_opening_hours_parser.py @@ -0,0 +1,77 @@ +import unittest +import opening_hours_parser as parser + +class TestOpeningHoursParser(unittest.TestCase): + def test_matches_itself(self): + self.assertEqual(parser.OpeningHours("Mo-Sa 06:00-23:00"), parser.OpeningHours("Mo-Sa 06:00-23:00")) + + def test_trivial_reordering_match(self): + self.assertEqual(parser.OpeningHours("Mo 06:00-23:00;Sa 06:00-23:00"), parser.OpeningHours("Sa 06:00-23:00;Mo 06:00-23:00")) + + def test_simple_over_midnight(self): + self.assertEqual(parser.OpeningHours("Mo 06:00-01:00"), parser.OpeningHours("Mo 06:00-24:00;Tu 00:00-01:00")) + + def test_semicolon_and_comma_separator_may_mean_the_same(self): + self.assertEqual(parser.OpeningHours("Mo-Fr 06:00-23:00;Sa 06:00-23:00"), parser.OpeningHours("Mo-Fr 06:00-23:00,Sa 06:00-23:00")) + self.assertEqual(parser.OpeningHours("Mo-Sa 06:00-23:00; Su off"), parser.OpeningHours("Mo-Sa 06:00-23:00, Su off")) + + def test_semicolon_and_comma_separator_may_change_meaning_for_overnight_range_being_overriden(self): + """ + Note that + Mo-Fr 06:00-02:00;Sa 06:00-23:00 + and + Mo-Fr 06:00-02:00,Sa 06:00-23:00 + have a different meaning as with `;` early morning on Saturday is not open, as "Sa 06:00-23:00" rule overwrote previous one. + While with `,` union product is made and also early Saturday object is open + """ + #self.assertNotEqual(parser.OpeningHours("Mo-Fr 06:00-02:00;Sa 06:00-23:00"), parser.OpeningHours("Mo-Fr 06:00-02:00,Sa 06:00-23:00")) # TODO ENABLE + + def test_day_ranges_may_be_split_in_pointless_parts(self): + self.assertEqual(parser.OpeningHours("Mo-Fr 06:00-23:00,Sa 06:00-23:00"), parser.OpeningHours("Mo-Sa 06:00-23:00")) + + def test_split_overnight_time_ranges(self): + self.assertEqual(parser.OpeningHours("Mo 20:00-02:00, Tu 08:00-16:00"), parser.OpeningHours("Mo 20:00-24:00; Tu 00:00-02:00, 08:00-16:00")) + + def test_day_range_vs_comma_separated_days(self): + self.assertEqual(parser.OpeningHours("Mo-Sa 06:00-23:00"), parser.OpeningHours("Mo 06:00-23:00,Tu 06:00-23:00, We 06:00-23:00, Th 06:00-23:00, Fr 06:00-23:00, Sa 06:00-23:00")) + + def test_semicolon_separated_day_ranges(self): + self.assertEqual(parser.OpeningHours("Mo 06:00-23:00;Tu 06:00-23:00;We 06:00-23:00;Th 06:00-23:00;Fr 06:00-23:00;Sa 06:00-23:00"), parser.OpeningHours("Mo-Sa 06:00-23:00")) + + def test_day_range_and_comma_separated_day_range(self): + self.assertEqual(parser.OpeningHours("Mo-Fr 06:00-23:00;Sa 06:00-23:00"), parser.OpeningHours("Mo-Sa 06:00-23:00")) + + def test_comma_separated_time_ranges_on_the_same_day(self): + self.assertEqual(parser.OpeningHours("Mo-Sa 06:00-10:00,Mo-Sa 16:00-20:00"), parser.OpeningHours("Mo-Sa 06:00-10:00,16:00-20:00")) + + @unittest.expectedFailure # TODO: implement + def test_implicit_day_range(self): + self.assertEqual(parser.OpeningHours("Mo-Su 06:00-10:00"), parser.OpeningHours("06:00-10:00")) + + def test_twenty_four_seven(self): + self.assertEqual(parser.OpeningHours("Mo-Su 00:00-24:00"), parser.OpeningHours("24/7")) + + def test_different_day_groupings(self): + self.assertEqual(parser.OpeningHours("Th-Fr 09:00-21:00"), parser.OpeningHours("Th,Fr 09:00-21:00")) + self.assertEqual(parser.OpeningHours("Mo-We 09:00-17:30; Th-Fr 09:00-21:00; Sa 09:00-17:00; Su 10:00-17:00"), parser.OpeningHours("Mo-We 09:00-17:30; Th,Fr 09:00-21:00; Sa 09:00-17:00; Su 10:00-17:00")) + + @unittest.expectedFailure # TODO: implement + def test_list_of_days_may_be_longer_than_two(self): + self.assertEqual(parser.OpeningHours("Th-Sa 09:00-21:00"), parser.OpeningHours("Th,Fr,Sa 09:00-21:00")) + + def test_noting_days_off_is_optional(self): + self.assertEqual(parser.OpeningHours("Mo-Sa 06:00-23:00; Su off"), parser.OpeningHours("Mo-Sa 06:00-23:00")) + + def test_two_ways_of_writting_off(self): + self.assertEqual(parser.OpeningHours("Su off"), parser.OpeningHours("Su closed")) + + def test_different_meaning_different_day_range(self): + self.assertNotEqual(parser.OpeningHours("Mo-Sa 06:00-23:00"), parser.OpeningHours("Mo-Su 06:00-23:00")) + +# We 09:00-10:00,Tu 10:00-02:00 +# https://openingh.openstreetmap.de/evaluation_tool/?EXP=We%2009%3A00-10%3A00%2CTu%2010%3A00-02%3A00&lat=48.7769&lon=9.1844&mode=0&DATE=1737977880000 +# TODO + +# We 09:00-09:00 +# https://openingh.openstreetmap.de/evaluation_tool/?EXP=We%2009%3A00-09%3A00&lat=48.7769&lon=9.1844&mode=0&DATE=1737977880000 +# TODO