'june nth, year' fix

Zeutschler · Sep 23, 2024 · 82be648 · 82be648
1 parent 12a01e6
commit 82be648
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 11 deletions.
diff --git a/datespan/__init__.py b/datespan/__init__.py
@@ -9,7 +9,7 @@
 from datespan.date_span_set import DateSpanSet
 
 __author__ = "Thomas Zeutschler"
-__version__ = "0.2.7"
+__version__ = "0.2.8"
 __license__ = "MIT"
 VERSION = __version__
 

diff --git a/datespan/parser/evaluator.py b/datespan/parser/evaluator.py
@@ -537,18 +537,39 @@ def evaluate_months(self, tokens):
         Evaluates a list of months, possibly with a year, and returns the corresponding date spans.
         """
         months = []
-        year = self.today.year  # Default to current year
+        year = 0
+        day = 0
         idx = 0
         if not tokens:
             return []
 
         # check if the last token is a special like 'ytd'
         tokens, special_token = self._extract_special_token(tokens)
 
-        # Check if the last token is a number (year)
+        # Check if the last token is a number (year) -> extract the year and remove it
         if tokens and tokens[-1].type == TokenType.NUMBER:
-            year = tokens[-1].value
-            tokens = tokens[:-1]  # Remove the year from tokens
+            value = tokens[-1].value
+            if DateSpan.MIN_DATE.year <= value <= DateSpan.MAX_DATE.year:
+                year = value
+                tokens = tokens[:-1]
+
+        # Check if the last token is a punctuation (','), e.g. as in 'June 1st, 2024' -> remove it
+        if tokens and tokens[-1].type == TokenType.PUNCTUATION and tokens[-1].value == ',':
+            tokens = tokens[:-1]
+
+        #Check if the last token is an ordinal, e.g. as in 'June 1st, 2024' -> get the day of the month and remove it
+        if tokens and tokens[-1].type == TokenType.ORDINAL:
+            day = self.ordinal_to_int(tokens[-1].value)
+            tokens = tokens[:-1]
+        elif tokens and tokens[-1].type == TokenType.NUMBER:
+            value = tokens[-1].value
+            if 1 <= value <= 31:
+                day = value
+                tokens = tokens[:-1]
+
+        if year == 0:
+            year = self.today.year
+
         while idx < len(tokens):
             token = tokens[idx]
             if token.type == TokenType.IDENTIFIER and token.value in Lexer.MONTH_ALIASES.values():
@@ -559,8 +580,13 @@ def evaluate_months(self, tokens):
         for month_name in months:
             # Get the month number from the month name
             month_number = datetime.strptime(month_name[:3], '%b').month
-            from_date = datetime(int(year), month_number, 1)
-            to_date = from_date + relativedelta(months=1, days=-1)
+            if day == 0:
+                from_date = datetime(int(year), month_number, 1)
+                to_date = from_date + relativedelta(months=1, days=-1)
+            else:
+                from_date = datetime(int(year), month_number, day)
+                to_date = from_date
+
             start = datetime.combine(from_date.date(), time.min)
             end = datetime.combine(to_date.date(), time.max)
             date_spans.append((start, end))

diff --git a/datespan/parser/parser.py b/datespan/parser/parser.py
@@ -309,10 +309,14 @@ def relative_date_span(self):
         Parses a relative date span, such as 'last week' or 'next 3 months'.
         """
         tokens = []
-        while self.current_token.type in [TokenType.IDENTIFIER, TokenType.NUMBER, TokenType.ORDINAL,
+        while True:
+
+            if self.current_token.type in [TokenType.IDENTIFIER, TokenType.NUMBER, TokenType.ORDINAL,
                                           TokenType.TIME_UNIT, TokenType.SPECIAL]:
-            tokens.append(self.current_token)
-            self.eat(self.current_token.type)
+                tokens.append(self.current_token)
+                self.eat(self.current_token.type)
+            else:
+                break
         return DateSpanNode({'type': 'relative', 'tokens': tokens})
 
     def special_date_span(self):
@@ -343,12 +347,21 @@ def month_date_span(self):
                 self.eat(TokenType.PUNCTUATION)  # Consume comma or hyphen
             elif self.current_token.type == TokenType.IDENTIFIER and self.current_token.value == 'and':
                 self.eat(TokenType.IDENTIFIER)
+
         # Optionally consume 'of' and a year
         if self.current_token.type == TokenType.IDENTIFIER and self.current_token.value == 'of':
             self.eat(TokenType.IDENTIFIER)
         if self.current_token.type == TokenType.NUMBER:
-            tokens.append(self.current_token)  # Append the year
+            tokens.append(self.current_token)  # Append a year, month or day number
             self.eat(TokenType.NUMBER)
+            # check for punctuation ',' as in 'Jan 15, 2024'
+            if self.current_token.type == TokenType.PUNCTUATION:
+                if self.next_token.type in [TokenType.NUMBER, TokenType.ORDINAL]:
+                    tokens.append(self.current_token)
+                    self.eat(TokenType.PUNCTUATION)
+                    tokens.append(self.current_token)
+                    self.eat(self.current_token.type)
+
 
         # optional eat trailing time tokens
         if self.current_token.type == TokenType.TIME_UNIT:

diff --git a/tests/test_datespanset.py b/tests/test_datespanset.py
@@ -109,6 +109,9 @@ def test_datespans(self):
 
     def test_advanced(self):
         samples = [
+            "aug 15, 2023",
+            "aug 15th, 2023",
+            "June 1st, 2023",
             "Q4 last year", "Q3 2022", "3rd quarter of 2022",
             "Q2",  "June 2022",
             "1st quarter",