Skip to content

Commit

Permalink
'june nth, year' fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Zeutschler committed Sep 23, 2024
1 parent 12a01e6 commit 82be648
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 11 deletions.
2 changes: 1 addition & 1 deletion datespan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datespan.date_span_set import DateSpanSet

__author__ = "Thomas Zeutschler"
__version__ = "0.2.7"
__version__ = "0.2.8"
__license__ = "MIT"
VERSION = __version__

Expand Down
38 changes: 32 additions & 6 deletions datespan/parser/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,18 +537,39 @@ def evaluate_months(self, tokens):
Evaluates a list of months, possibly with a year, and returns the corresponding date spans.
"""
months = []
year = self.today.year # Default to current year
year = 0
day = 0
idx = 0
if not tokens:
return []

# check if the last token is a special like 'ytd'
tokens, special_token = self._extract_special_token(tokens)

# Check if the last token is a number (year)
# Check if the last token is a number (year) -> extract the year and remove it
if tokens and tokens[-1].type == TokenType.NUMBER:
year = tokens[-1].value
tokens = tokens[:-1] # Remove the year from tokens
value = tokens[-1].value
if DateSpan.MIN_DATE.year <= value <= DateSpan.MAX_DATE.year:
year = value
tokens = tokens[:-1]

# Check if the last token is a punctuation (','), e.g. as in 'June 1st, 2024' -> remove it
if tokens and tokens[-1].type == TokenType.PUNCTUATION and tokens[-1].value == ',':
tokens = tokens[:-1]

#Check if the last token is an ordinal, e.g. as in 'June 1st, 2024' -> get the day of the month and remove it
if tokens and tokens[-1].type == TokenType.ORDINAL:
day = self.ordinal_to_int(tokens[-1].value)
tokens = tokens[:-1]
elif tokens and tokens[-1].type == TokenType.NUMBER:
value = tokens[-1].value
if 1 <= value <= 31:
day = value
tokens = tokens[:-1]

if year == 0:
year = self.today.year

while idx < len(tokens):
token = tokens[idx]
if token.type == TokenType.IDENTIFIER and token.value in Lexer.MONTH_ALIASES.values():
Expand All @@ -559,8 +580,13 @@ def evaluate_months(self, tokens):
for month_name in months:
# Get the month number from the month name
month_number = datetime.strptime(month_name[:3], '%b').month
from_date = datetime(int(year), month_number, 1)
to_date = from_date + relativedelta(months=1, days=-1)
if day == 0:
from_date = datetime(int(year), month_number, 1)
to_date = from_date + relativedelta(months=1, days=-1)
else:
from_date = datetime(int(year), month_number, day)
to_date = from_date

start = datetime.combine(from_date.date(), time.min)
end = datetime.combine(to_date.date(), time.max)
date_spans.append((start, end))
Expand Down
21 changes: 17 additions & 4 deletions datespan/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,14 @@ def relative_date_span(self):
Parses a relative date span, such as 'last week' or 'next 3 months'.
"""
tokens = []
while self.current_token.type in [TokenType.IDENTIFIER, TokenType.NUMBER, TokenType.ORDINAL,
while True:

if self.current_token.type in [TokenType.IDENTIFIER, TokenType.NUMBER, TokenType.ORDINAL,
TokenType.TIME_UNIT, TokenType.SPECIAL]:
tokens.append(self.current_token)
self.eat(self.current_token.type)
tokens.append(self.current_token)
self.eat(self.current_token.type)
else:
break
return DateSpanNode({'type': 'relative', 'tokens': tokens})

def special_date_span(self):
Expand Down Expand Up @@ -343,12 +347,21 @@ def month_date_span(self):
self.eat(TokenType.PUNCTUATION) # Consume comma or hyphen
elif self.current_token.type == TokenType.IDENTIFIER and self.current_token.value == 'and':
self.eat(TokenType.IDENTIFIER)

# Optionally consume 'of' and a year
if self.current_token.type == TokenType.IDENTIFIER and self.current_token.value == 'of':
self.eat(TokenType.IDENTIFIER)
if self.current_token.type == TokenType.NUMBER:
tokens.append(self.current_token) # Append the year
tokens.append(self.current_token) # Append a year, month or day number
self.eat(TokenType.NUMBER)
# check for punctuation ',' as in 'Jan 15, 2024'
if self.current_token.type == TokenType.PUNCTUATION:
if self.next_token.type in [TokenType.NUMBER, TokenType.ORDINAL]:
tokens.append(self.current_token)
self.eat(TokenType.PUNCTUATION)
tokens.append(self.current_token)
self.eat(self.current_token.type)


# optional eat trailing time tokens
if self.current_token.type == TokenType.TIME_UNIT:
Expand Down
3 changes: 3 additions & 0 deletions tests/test_datespanset.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ def test_datespans(self):

def test_advanced(self):
samples = [
"aug 15, 2023",
"aug 15th, 2023",
"June 1st, 2023",
"Q4 last year", "Q3 2022", "3rd quarter of 2022",
"Q2", "June 2022",
"1st quarter",
Expand Down

0 comments on commit 82be648

Please sign in to comment.