From c14762cf237f3142217d732e0b133f5ec0a5af37 Mon Sep 17 00:00:00 2001 From: cirex Date: Sun, 22 Sep 2024 19:25:53 -0400 Subject: [PATCH] refactor: initial full cleanup (#157) * refactor: initial full cleanup * fix: fix time merge typo * chore: minor cleanup * chore: minor code cleanup --- .gitignore | 3 +- jest.config.js | 1 + src/containers/locationBuilder.ts | 163 +++++-------- src/containers/specials/specialsBuilder.ts | 31 ++- src/containers/time/parsedTime.ts | 4 +- src/containers/time/parsedTimeBase.ts | 2 - src/containers/time/parsedTimeForDate.ts | 55 ++++- src/containers/time/parsedTimeForDay.ts | 32 ++- src/containers/timeBuilder.ts | 204 ++++++++-------- src/overwrites/locationOverwrites.ts | 4 +- src/parser/diningParser.ts | 257 +++------------------ src/server.ts | 2 +- src/types.ts | 65 ++++++ src/utils/assertions.ts | 3 - src/utils/parseTimeToken.ts | 88 +++++++ src/utils/requestUtils.ts | 5 +- src/utils/timeUtils.ts | 227 +++--------------- 17 files changed, 511 insertions(+), 635 deletions(-) create mode 100644 src/types.ts delete mode 100644 src/utils/assertions.ts create mode 100644 src/utils/parseTimeToken.ts diff --git a/.gitignore b/.gitignore index a6b568b..3743485 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ package-lock.json .venv *.log __pycache__ -dist/ \ No newline at end of file +dist/ +coverage/ \ No newline at end of file diff --git a/jest.config.js b/jest.config.js index 1efcc9e..026cd12 100644 --- a/jest.config.js +++ b/jest.config.js @@ -4,4 +4,5 @@ module.exports = { transform: { "^.+.tsx?$": ["ts-jest", { diagnostics: { warnOnly: true } }], }, + moduleDirectories: ['node_modules', 'src'] }; \ No newline at end of file diff --git a/src/containers/locationBuilder.ts b/src/containers/locationBuilder.ts index 1831eb7..2367948 100644 --- a/src/containers/locationBuilder.ts +++ b/src/containers/locationBuilder.ts @@ -1,35 +1,10 @@ -import { DayOfTheWeek } from "../utils/timeUtils"; -import { ISpecial } from "./specials/specialsBuilder"; +import { Element, load } from "cheerio"; +import { getHTMLResponse } from "utils/requestUtils"; +import { LocationOverwrites } from "overwrites/locationOverwrites"; +import { getTimeRangesFromString } from "./timeBuilder"; +import { ICoordinate, ILocation, ISpecial, ITimeRange } from "../types"; +import { sortAndMergeTimeRanges } from "utils/timeUtils"; -export interface ILocation { - conceptId: number; - name?: string; - shortDescription?: string; - description: string; - url: string; - menu?: string; - location: string; - coordinates?: ICoordinate; - acceptsOnlineOrders: boolean; - times: ITime[]; - todaysSpecials?: ISpecial[]; - todaysSoups?: ISpecial[]; -} - -interface IMomentTime { - day: DayOfTheWeek; - hour: number; - minute: number; -} - -export interface ITime { - start: IMomentTime; - end: IMomentTime; -} -export interface ICoordinate { - lat: number; - lng: number; -} /** * For building the location data structure */ @@ -37,7 +12,7 @@ export default class LocationBuilder { static readonly CONCEPT_BASE_LINK = "https://apps.studentaffairs.cmu.edu/dining/conceptinfo/Concept/"; - private conceptId: number; + private conceptId?: number; private name?: string; private shortDescription?: string; private description?: string; @@ -46,94 +21,84 @@ export default class LocationBuilder { private menu?: string; private coordinates?: ICoordinate; private acceptsOnlineOrders?: boolean; - private times?: ITime[]; + private times?: ITimeRange[]; private specials?: ISpecial[]; private soups?: ISpecial[]; - private valid: boolean = true; - - constructor(conceptId: number) { - this.conceptId = conceptId; - } - setName(name: string): LocationBuilder { - this.name = name; - return this; - } - - setShortDesc(shortDesc: string): LocationBuilder { - this.shortDescription = shortDesc; - return this; - } - - setDesc(desc: string): LocationBuilder { - this.description = desc; - return this; - } - - setCoordinates(coordinates: ICoordinate): LocationBuilder { - this.coordinates = coordinates; - return this; - } + constructor(card: Element) { + const link = load(card)("h3.name.detailsLink"); + this.name = link.text().trim(); - setLocation(location: string): LocationBuilder { - this.location = location; - return this; - } + const conceptId = link.attr("onclick")?.match(/Concept\/(\d+)/)?.[1]; + this.conceptId = conceptId !== undefined ? parseInt(conceptId) : undefined; - setAcceptsOnlineOrders(acceptsOnlineOrders: boolean) { - this.acceptsOnlineOrders = acceptsOnlineOrders; - return this; + this.shortDescription = load(card)("div.description").text().trim(); } - - setURL(url: string) { - this.url = url; - return this; - } - - setMenu(menuLink: string) { - this.menu = menuLink; - return this; + overwriteLocation(locationOverwrites: LocationOverwrites) { + if ( + this.name !== undefined && + locationOverwrites[this.name] !== undefined + ) { + this.coordinates = locationOverwrites[this.name]; + } } - - setTimes(times: ITime[]) { - this.times = times; - return this; + setSoup(soupList: Record) { + if (this.name && soupList[this.name] !== undefined) { + this.soups = soupList[this.name]; + } } - - setSpecials(specials: ISpecial[]) { - this.specials = specials; - return this; + setSpecials(specialList: Record) { + if (this.name && specialList[this.name] !== undefined) { + this.specials = specialList[this.name]; + } } + convertMapsLinkToCoordinates(link: string) { + const atIndex = link.indexOf("@"); + const locationUrl = link.slice(atIndex + 1, link.length); + const commaIndex = locationUrl.indexOf(","); + const latitude = locationUrl.slice(0, commaIndex); + const longitude = locationUrl.slice(commaIndex + 1, locationUrl.length); + return { lat: parseFloat(latitude), lng: parseFloat(longitude) }; + } + + async populateDetailedInfo() { + const conceptURL = this.getConceptLink(); + if (!conceptURL) return; + + const $ = load(await getHTMLResponse(conceptURL)); + this.url = conceptURL.toString(); + this.description = $("div.description p").text().trim(); + this.menu = $("div.navItems > a#getMenu").attr("href"); + this.location = $("div.location a").text().trim(); + this.acceptsOnlineOrders = + $("div.navItems.orderOnline").toArray().length > 0; + + const locationHref = $("div.location a").attr("href"); + if (locationHref !== undefined) { + this.coordinates = this.convertMapsLinkToCoordinates(locationHref); + } - setSoups(soups: ISpecial[]) { - this.soups = soups; - return this; + const nextSevenDays = $("ul.schedule").find("li").toArray(); + this.times = sortAndMergeTimeRanges( + nextSevenDays.flatMap((rowHTML) => getTimeRangesFromString(rowHTML)) + ); } - - getConceptLink(): string { - return LocationBuilder.CONCEPT_BASE_LINK + this.conceptId; + getConceptLink() { + if (this.conceptId === undefined) return undefined; + return new URL(LocationBuilder.CONCEPT_BASE_LINK + this.conceptId); } - getName(): string | undefined { - return this.name; - } - invalidate() { - this.valid = false; - } - isValid() { - return this.valid; - } build(): ILocation { - if (!this.valid) throw Error("Location has been invalidated!"); if ( this.times === undefined || this.acceptsOnlineOrders === undefined || this.description === undefined || this.url === undefined || - this.location === undefined + this.location === undefined || + this.conceptId === undefined ) { throw Error( - "Didn't finish configuring restaurant before building metadata!" + "Didn't finish configuring location before building metadata!" ); // All fetches were good - yet we have missing data. This is a problem. } diff --git a/src/containers/specials/specialsBuilder.ts b/src/containers/specials/specialsBuilder.ts index 3920f79..23e897d 100644 --- a/src/containers/specials/specialsBuilder.ts +++ b/src/containers/specials/specialsBuilder.ts @@ -1,7 +1,5 @@ -export interface ISpecial { - title: string; - description?: string; -} +import { load } from "cheerio"; +import { ISpecial } from "types"; /** * For building the specials/soups data structure @@ -24,3 +22,28 @@ export default class SpecialsBuilder { return this.specials; } } + +export async function retrieveSpecials(htmlContent: string) { + const $ = load(htmlContent); + const cards = $("div.card").toArray(); + + const locationSpecialMap: Record = {}; + + for (const card of cards) { + const name = load(card)("h3.name").text().trim(); + const specialsBuilder = new SpecialsBuilder(); + + const specialsText = load(card)("div.specialDetails").text().trim(); + const specialsArray = specialsText.split(/(?<=\n)\s*(?=\S)/); + + for (let i = 0; i < specialsArray.length; i += 2) { + const title = specialsArray[i].trim(); + const description = specialsArray[i + 1]?.trim() || ""; + specialsBuilder.addSpecial(title, description); + } + + locationSpecialMap[name] = specialsBuilder.build(); + } + + return locationSpecialMap; +} diff --git a/src/containers/time/parsedTime.ts b/src/containers/time/parsedTime.ts index 5524e5a..b368be8 100644 --- a/src/containers/time/parsedTime.ts +++ b/src/containers/time/parsedTime.ts @@ -5,7 +5,7 @@ interface Time { minute: number; } -export interface ParsedTimeRange { +export interface IParsedTimeRange { start: Time; end: Time; } @@ -15,7 +15,7 @@ export interface ParsedTimeRange { * structure */ export default class ParsedTime extends ParsedTimeBase { - declare value: ParsedTimeRange; + declare value: IParsedTimeRange; private parseTime(timeStr: string): Time { const normalizedStr = timeStr.trim().toLowerCase(); diff --git a/src/containers/time/parsedTimeBase.ts b/src/containers/time/parsedTimeBase.ts index 532afdf..59c20c6 100644 --- a/src/containers/time/parsedTimeBase.ts +++ b/src/containers/time/parsedTimeBase.ts @@ -1,5 +1,3 @@ -import type { TimeInfoType } from "../../utils/timeUtils"; - /** * Base class for parsing time from a string */ diff --git a/src/containers/time/parsedTimeForDate.ts b/src/containers/time/parsedTimeForDate.ts index 4373f6f..c686796 100644 --- a/src/containers/time/parsedTimeForDate.ts +++ b/src/containers/time/parsedTimeForDate.ts @@ -1,11 +1,8 @@ -import { - convertMonthStringToEnum, - isValidDate, - MonthOfTheYear, -} from "../../utils/timeUtils"; +import { MonthOfTheYear } from "types"; +import { isValidDate } from "utils/parseTimeToken"; import ParsedTimeBase from "./parsedTimeBase"; -export interface ParsedTimeDate { +export interface IParsedTimeDate { month: MonthOfTheYear; date: number; } @@ -14,7 +11,7 @@ export interface ParsedTimeDate { * For parsing a string representing a date to a date data structure */ export default class ParsedTimeForDate extends ParsedTimeBase { - declare value: ParsedTimeDate; + declare value: IParsedTimeDate; parse() { const tokens = this.input.trim().split(/\s/); @@ -39,3 +36,47 @@ export default class ParsedTimeForDate extends ParsedTimeBase { return this; } } + +export function convertMonthStringToEnum(monthStr: string): MonthOfTheYear { + const normalizedMonth = monthStr.trim().toLowerCase(); + switch (normalizedMonth) { + case "january": + case "jan": + return MonthOfTheYear.JANUARY; + case "february": + case "feb": + return MonthOfTheYear.FEBRUARY; + case "march": + case "mar": + return MonthOfTheYear.MARCH; + case "april": + case "apr": + return MonthOfTheYear.APRIL; + case "may": + return MonthOfTheYear.MAY; + case "june": + case "jun": + return MonthOfTheYear.JUNE; + case "july": + case "jul": + return MonthOfTheYear.JULY; + case "august": + case "aug": + return MonthOfTheYear.AUGUST; + case "september": + case "sept": + case "sep": + return MonthOfTheYear.SEPTEMBER; + case "october": + case "oct": + return MonthOfTheYear.OCTOBER; + case "november": + case "nov": + return MonthOfTheYear.NOVEMBER; + case "december": + case "dec": + return MonthOfTheYear.DECEMBER; + default: + throw new Error(`Invalid Month: ${monthStr}`); + } +} diff --git a/src/containers/time/parsedTimeForDay.ts b/src/containers/time/parsedTimeForDay.ts index 23e8e64..e729a1e 100644 --- a/src/containers/time/parsedTimeForDay.ts +++ b/src/containers/time/parsedTimeForDay.ts @@ -1,4 +1,4 @@ -import { convertDayStringToEnum, DayOfTheWeek } from "../../utils/timeUtils"; +import { DayOfTheWeek } from "types"; import ParsedTimeBase from "./parsedTimeBase"; /** @@ -12,3 +12,33 @@ export default class ParsedTimeForDay extends ParsedTimeBase { return this; } } + +export function convertDayStringToEnum(dayStr: string): DayOfTheWeek { + const normalizedDay = dayStr.trim().toLowerCase(); + switch (normalizedDay) { + case "sunday": + case "sun": + return DayOfTheWeek.SUNDAY; + case "monday": + case "mon": + return DayOfTheWeek.MONDAY; + case "tuesday": + case "tue": + return DayOfTheWeek.TUESDAY; + case "wednesday": + case "wed": + return DayOfTheWeek.WEDNESDAY; + case "thursday": + case "thu": + case "thurs": + return DayOfTheWeek.THURSDAY; + case "friday": + case "fri": + return DayOfTheWeek.FRIDAY; + case "saturday": + case "sat": + return DayOfTheWeek.SATURDAY; + default: + throw new Error(`Invalid Day: ${dayStr}`); + } +} diff --git a/src/containers/timeBuilder.ts b/src/containers/timeBuilder.ts index b6ebe69..4728bea 100644 --- a/src/containers/timeBuilder.ts +++ b/src/containers/timeBuilder.ts @@ -1,138 +1,136 @@ -import { - DayOfTheWeek, - determineTimeInfoType, - getNextDay, - TimeInfoType, -} from "../utils/timeUtils"; -import { ITime } from "./locationBuilder"; -import ParsedTime, { ParsedTimeRange } from "./time/parsedTime"; -import ParsedTimeForDate, { ParsedTimeDate } from "./time/parsedTimeForDate"; -import ParsedTimeForDay from "./time/parsedTimeForDay"; +import { Element, load } from "cheerio"; +import { getNextDay } from "../utils/timeUtils"; +import { IParsedTimeRange } from "./time/parsedTime"; +import { IParsedTimeDate } from "./time/parsedTimeForDate"; +import { DayOfTheWeek, ITimeRange, TimeInfoType } from "types"; +import { parseToken } from "utils/parseTimeToken"; -interface TimeBuilderSchema { +interface ITimeRowAttributes { day?: DayOfTheWeek; - date?: ParsedTimeDate; + date?: IParsedTimeDate; /** Multiple times in the same day (ex. https://apps.studentaffairs.cmu.edu/dining/conceptinfo/Concept/180) */ - times?: ParsedTimeRange[]; + times?: IParsedTimeRange[]; closed?: boolean; twentyFour?: boolean; } /** - * For building the location schedules/times data structure + * + * @param rowString ex. Monday, September 09, 7:30 AM - 10:00 AM, 11:00 AM - 2:00 PM, 4:30 PM - 8:30 PM */ -export default class TimeBuilder { - private times: TimeBuilderSchema[]; +export function getTimeRangesFromString(rowHTML: Element) { + let timeRowInfo: ITimeRowAttributes = getTimeAttributesFromRow(rowHTML); + timeRowInfo = resolveAttributeConflicts(timeRowInfo); + return getTimeRangesFromTimeRow(timeRowInfo); +} - constructor() { - this.times = []; - } +function getTimeAttributesFromRow(rowHTML: Element) { + const { day, date, timeSlots } = tokenizeTimeRow(rowHTML); + return getTimeInfoWithRawAttributes([day, date, ...timeSlots]); +} - private resolveConflicts(input: TimeBuilderSchema): TimeBuilderSchema { - if (input.closed) { - return { - day: input.day, - date: input.date, - closed: input.closed, - }; - } - if (input.times && input.times.length > 0) { - return { - day: input.day, - date: input.date, - times: input.times, - }; +function tokenizeTimeRow(rowHTML: Element) { + const $ = load(rowHTML); + let day = $("strong").text(); + const dataStr = $.text().replace(/\s\s+/g, " ").replace(day, "").trim(); + let [date, time] = dataStr.split(/,(.+)/); + + day = (day.charAt(0).toUpperCase() + day.slice(1).toLowerCase()).trim(); + date = (date.charAt(0).toUpperCase() + date.slice(1).toLowerCase()).trim(); + time = time.toUpperCase().trim(); + const timeSlots = time.split(/[,;]/).map((slot) => slot.trim()); + return { day, date, timeSlots }; +} + +function getTimeInfoWithRawAttributes(tokens: string[]) { + const timeInfo: ITimeRowAttributes = {}; + + for (const token of tokens) { + try { + const { type: timeInfoType, value } = parseToken(token); + switch (timeInfoType) { + case TimeInfoType.DAY: + timeInfo.day = value; + break; + case TimeInfoType.DATE: + timeInfo.date = value; + break; + case TimeInfoType.TIME: + if (timeInfo.times !== undefined) { + timeInfo.times.push(value); + } else { + timeInfo.times = [value]; + } + break; + case TimeInfoType.CLOSED: + timeInfo.closed = true; + break; + case TimeInfoType.TWENTYFOURHOURS: + timeInfo.twentyFour = true; + break; + } + } catch (err) { + console.error(err); + continue; } + } + return timeInfo; +} + +function resolveAttributeConflicts( + input: ITimeRowAttributes +): ITimeRowAttributes { + if (input.closed) { return { day: input.day, date: input.date, - times: [{ start: { hour: 0, minute: 0 }, end: { hour: 23, minute: 59 } }], + closed: input.closed, }; } + if (input.times && input.times.length > 0) { + return { + day: input.day, + date: input.date, + times: input.times, + }; + } + return { + day: input.day, + date: input.date, + times: [{ start: { hour: 0, minute: 0 }, end: { hour: 23, minute: 59 } }], + }; +} - addSchedule(timeArray: Array): TimeBuilder { - const timeFields: TimeBuilderSchema = {}; - for (const token of timeArray) { - const timeInfoType = determineTimeInfoType(token); - try { - switch (timeInfoType) { - case TimeInfoType.DAY: - timeFields.day = new ParsedTimeForDay(token).parse().value; - break; - case TimeInfoType.DATE: - timeFields.date = new ParsedTimeForDate(token).parse().value; - break; - case TimeInfoType.TIME: - const timeRange = new ParsedTime(token).parse().value; - if (Array.isArray(timeFields.times)) { - timeFields.times.push(timeRange); - } else { - timeFields.times = [timeRange]; - } - break; - case TimeInfoType.CLOSED: - timeFields.closed = true; - break; - case TimeInfoType.TWENTYFOURHOURS: - timeFields.twentyFour = true; - break; - } - } catch (err) { - console.error(err); - continue; - } - } - const normalizedSchedule = this.resolveConflicts(timeFields); - this.times.push(normalizedSchedule); - - return this; +function getTimeRangesFromTimeRow(time: ITimeRowAttributes) { + if (time.day === undefined) { + throw new Error("Cannot convert when day is not set"); } + const allRanges: ITimeRange[] = []; + for (const range of time.times ?? []) { + rollBack12AmEndTime(range); - private convertTimeRangeToTimeSchema( - time: TimeBuilderSchema, - range: ParsedTimeRange - ) { - if (time.day === undefined) { - throw new Error("Cannot convert when day is not set"); - } - const spillToNextDay = + const shouldSpillToNextDay = range.start.hour * 60 + range.start.minute > range.end.hour * 60 + range.end.minute; - return { + allRanges.push({ start: { day: time.day, hour: range.start.hour, minute: range.start.minute, }, end: { - day: spillToNextDay ? getNextDay(time.day) : time.day, + day: shouldSpillToNextDay ? getNextDay(time.day) : time.day, hour: range.end.hour, minute: range.end.minute, }, - }; - } - - build() { - const result: ITime[] = []; - for (const time of this.times) { - if (Array.isArray(time.times)) { - result.push( - ...time.times.map((current) => { - return this.convertTimeRangeToTimeSchema(time, current); - }) - ); - } - } - result.sort((timeA, timeB) => { - const startA = timeA.start; - const startB = timeB.start; - - if (startA.day !== startB.day) return startA.day - startB.day; - if (startA.hour !== startB.hour) return startA.hour - startB.hour; - return startA.minute - startB.minute; }); - - return result; + } + return allRanges; +} +function rollBack12AmEndTime(range: IParsedTimeRange) { + if (range.end.hour === 0 && range.end.minute === 0) { + range.end.hour = 23; + range.end.minute = 59; } } diff --git a/src/overwrites/locationOverwrites.ts b/src/overwrites/locationOverwrites.ts index dc743a7..41667c0 100644 --- a/src/overwrites/locationOverwrites.ts +++ b/src/overwrites/locationOverwrites.ts @@ -1,6 +1,6 @@ -import { ICoordinate } from "containers/locationBuilder"; +import { ICoordinate } from "types"; -type LocationOverwrites = { +export type LocationOverwrites = { [conceptName: string]: ICoordinate; }; diff --git a/src/parser/diningParser.ts b/src/parser/diningParser.ts index b84a52b..fe3ec16 100644 --- a/src/parser/diningParser.ts +++ b/src/parser/diningParser.ts @@ -1,12 +1,9 @@ import { getHTMLResponse } from "../utils/requestUtils"; -import { determineTimeInfoType, TimeInfoType } from "../utils/timeUtils"; -import { CheerioAPI, load } from "cheerio"; -import LocationBuilder, { ILocation } from "../containers/locationBuilder"; -import TimeBuilder from "../containers/timeBuilder"; -import SpecialsBuilder, { - ISpecial, -} from "../containers/specials/specialsBuilder"; -import locationOverwrites from "../overwrites/locationOverwrites"; +import { load } from "cheerio"; +import LocationBuilder from "../containers/locationBuilder"; +import { retrieveSpecials } from "../containers/specials/specialsBuilder"; +import locationOverwrites from "overwrites/locationOverwrites"; +import { ILocation, ISpecial } from "types"; /** * Retrieves the HTML from the CMU Dining website and parses the information @@ -19,232 +16,52 @@ export default class DiningParser { "https://apps.studentaffairs.cmu.edu/dining/conceptinfo/Specials"; static readonly DINING_SOUPS_URL = "https://apps.studentaffairs.cmu.edu/dining/conceptinfo/Soups"; - static readonly DINING_MENUS_BASE_URL = - "https://apps.studentaffairs.cmu.edu/dining/conceptinfo/"; - - private $?: CheerioAPI; constructor() {} - private async preprocess() { + async process(): Promise { + const locationBuilders = + await this.initializeLocationBuildersFromMainPage(); + + const [specials, soups] = await this.fetchSpecials(); + + for (const builder of locationBuilders) { + await builder.populateDetailedInfo(); + builder.setSoup(soups); + builder.setSpecials(specials); + builder.overwriteLocation(locationOverwrites); + } + + return locationBuilders.map((builder) => builder.build()); + } + + private async initializeLocationBuildersFromMainPage(): Promise< + LocationBuilder[] + > { const mainPageHTML = await getHTMLResponse( new URL(DiningParser.DINING_URL) ); - this.$ = load(mainPageHTML); - } - - private retrieveBasicLocationInfo(): LocationBuilder[] { - const mainContainer = this.$?.("div.conceptCards"); + const mainContainer = load(mainPageHTML)("div.conceptCards"); if (mainContainer === undefined) { throw new Error("Unable to load page"); } - const linkHeaders = mainContainer?.find("div.card"); + const linkHeaders = mainContainer.find("div.card"); if (linkHeaders === undefined) { return []; } - const info = Array.from(linkHeaders).map((card) => { - const link = load(card)("h3.name.detailsLink"); - const onClickAttr = link.attr("onclick"); - const conceptId = onClickAttr?.match(/Concept\/(\d+)/)?.[1]; - if (conceptId === undefined) { - return undefined; - } - const name = link.text().trim(); - const shortDesc = load(card)("div.description").text().trim(); - - const builder = new LocationBuilder(parseInt(conceptId)); - if (name !== undefined) { - builder.setName(name); - } - if (shortDesc !== undefined) { - builder.setShortDesc(shortDesc); - } - return builder; - }); - return info.filter((item): item is LocationBuilder => item !== undefined); - } - - private convertMapsLinkToCoordinates(link: string): [number, number] { - const atIndex = link.indexOf("@"); - const locationUrl = link.slice(atIndex + 1, link.length); - const commaIndex = locationUrl.indexOf(","); - const latitude = locationUrl.slice(0, commaIndex); - const longitude = locationUrl.slice(commaIndex + 1, locationUrl.length); - return [parseFloat(latitude), parseFloat(longitude)]; - } - - private async retrieveDetailedInfoForLocation(builder: LocationBuilder) { - const conceptLink = builder.getConceptLink(); - const conceptHTML = await getHTMLResponse(new URL(conceptLink)); - const $ = load(conceptHTML); - builder.setURL(conceptLink); - const description = $("div.description p").text().trim(); - builder.setDesc(description); - - const menuHref = $("div.navItems > a#getMenu").attr("href"); - if (menuHref) { - builder.setMenu(menuHref); - } - - builder.setLocation($("div.location a").text().trim()); - const locationHref = $("div.location a").attr("href"); - const name = builder.getName(); - - if (name !== undefined && locationOverwrites[name] !== undefined) { - builder.setCoordinates(locationOverwrites[name]); - } else if (locationHref !== undefined) { - const [lat, lng] = this.convertMapsLinkToCoordinates(locationHref); - builder.setCoordinates({ lat, lng }); - } - - const timeBuilder = new TimeBuilder(); - const nextSevenDays = $("ul.schedule").find("li").toArray(); - const addedSchedules = new Set(); - - for (const day of nextSevenDays) { - let dayStr = load(day)("strong").text(); - dayStr = dayStr.charAt(0).toUpperCase() + dayStr.slice(1).toLowerCase(); - - const dataStr = load(day) - .text() - .replace(/\s\s+/g, " ") - .replace(dayStr, "") - .trim(); - - let [dateStr, timeStr] = dataStr.split(/,(.+)/); - dateStr = - dateStr.charAt(0).toUpperCase() + dateStr.slice(1).toLowerCase(); - timeStr = timeStr.toUpperCase().trim(); - - const timeInfoType = determineTimeInfoType(timeStr); - - if ( - timeInfoType === TimeInfoType.CLOSED || - timeInfoType === TimeInfoType.TWENTYFOURHOURS - ) { - const scheduleString = `${dayStr.trim()}, ${timeStr}`; - addedSchedules.add(scheduleString); - timeBuilder.addSchedule([dayStr.trim(), dateStr.trim(), timeStr]); - } else if (timeInfoType === TimeInfoType.TIME) { - const timeSlots = timeStr.split(/[,;]/).map((slot) => slot.trim()); - - // Sort time slots based on opening time - timeSlots.sort((a, b) => { - const [aStart, aEnd] = a.split("-").map((time) => time.trim()); - const [bStart, bEnd] = b.split("-").map((time) => time.trim()); - const startComparison = aStart.localeCompare(bStart); - if (startComparison !== 0) { - return startComparison; - } - return bEnd.localeCompare(aEnd); // Reverse order for end times - }); - - // Merge overlapping, contained, and duplicate time slots - const mergedTimeSlots = []; - let prevSlot = null; - for (const timeSlot of timeSlots) { - const [start, end] = timeSlot.split("-").map((time) => time.trim()); - - if (prevSlot && start === prevSlot.start) { - // If the current time slot has the same opening time as the previous one - // Update the previous slot with the later closing time - if (end > prevSlot.end) { - prevSlot.end = end; - } - } else { - mergedTimeSlots.push({ start, end }); - prevSlot = { start, end }; - } - } - - // Format and add merged time slots - mergedTimeSlots.forEach((slot) => { - let { start, end } = slot; - - // Handle case where end time is 12:00 AM - if (/12:00 AM$/i.test(end)) { - end = end.replace(/12:00 AM$/i, "11:59 PM"); - } - - const scheduleString = `${dayStr.trim()}, ${start} - ${end}`; - if (!addedSchedules.has(scheduleString)) { - addedSchedules.add(scheduleString); - timeBuilder.addSchedule([ - dayStr.trim(), - dateStr.trim(), - `${start} - ${end}`, - ]); - } - }); - } - } - - builder.setTimes(timeBuilder.build()); - - const onlineDiv = $("div.navItems.orderOnline").toArray(); - builder.setAcceptsOnlineOrders(onlineDiv.length > 0); - } - - private async retrieveSpecials(url: URL): Promise> { - const specialsHTML = await getHTMLResponse(url); - const $ = load(specialsHTML); - const cards = $("div.card").toArray(); - - const locationSpecialMap = new Map(); - - for (const card of cards) { - const name = load(card)("h3.name").text().trim(); - const specialsBuilder = new SpecialsBuilder(); - - const specialsText = load(card)("div.specialDetails").text().trim(); - const specialsArray = specialsText.split(/(?<=\n)\s*(?=\S)/); - - for (let i = 0; i < specialsArray.length; i += 2) { - const title = specialsArray[i].trim(); - const description = specialsArray[i + 1]?.trim() || ""; - specialsBuilder.addSpecial(title, description); - } - - locationSpecialMap.set(name, specialsBuilder.build()); - } - - return locationSpecialMap; + return Array.from(linkHeaders).map((card) => new LocationBuilder(card)); } - async process(): Promise { - await this.preprocess(); - const locationInfo = this.retrieveBasicLocationInfo(); - - const [specials, soups] = await Promise.all([ - this.retrieveSpecials(new URL(DiningParser.DINING_SPECIALS_URL)), - this.retrieveSpecials(new URL(DiningParser.DINING_SOUPS_URL)), + private async fetchSpecials(): Promise< + [Record, Record] + > { + return await Promise.all([ + retrieveSpecials( + await getHTMLResponse(new URL(DiningParser.DINING_SPECIALS_URL)) + ), + retrieveSpecials( + await getHTMLResponse(new URL(DiningParser.DINING_SOUPS_URL)) + ), ]); - - for (const builder of locationInfo) { - const name = builder.getName(); - if (name !== undefined) { - const specialList = specials.get(name); - const soupList = soups.get(name); - - if (Array.isArray(specialList)) { - builder.setSpecials(specialList); - } - - if (Array.isArray(soupList)) { - builder.setSoups(soupList); - } - } - - try { - await this.retrieveDetailedInfoForLocation(builder); - } catch (error) { - console.error(`Failed to retrieve detailed info for ${name}:`, error); - builder.invalidate(); - } - } - - return locationInfo - .filter((builder) => builder.isValid()) - .map((builder) => builder.build()); } } diff --git a/src/server.ts b/src/server.ts index 8bc9c4b..04415c2 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,7 +1,7 @@ import { Elysia } from "elysia"; import { cors } from "@elysiajs/cors"; import DiningParser from "./parser/diningParser"; -import { ILocation } from "./containers/locationBuilder"; +import { ILocation } from "types"; const PORT = process.env.PORT ?? 5010; let cachedLocations: ILocation[]; diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000..57ffefe --- /dev/null +++ b/src/types.ts @@ -0,0 +1,65 @@ +export interface ILocation { + conceptId: number; + name?: string; + shortDescription?: string; + description: string; + url: string; + menu?: string; + location: string; + coordinates?: ICoordinate; + acceptsOnlineOrders: boolean; + times: ITimeRange[]; + todaysSpecials?: ISpecial[]; + todaysSoups?: ISpecial[]; +} +export interface ISpecial { + title: string; + description?: string; +} + +export interface ITimeMoment { + day: DayOfTheWeek; + hour: number; + minute: number; +} + +export interface ITimeRange { + start: ITimeMoment; + end: ITimeMoment; +} +export interface ICoordinate { + lat: number; + lng: number; +} +export enum DayOfTheWeek { + SUNDAY = 0, + MONDAY = 1, + TUESDAY = 2, + WEDNESDAY = 3, + THURSDAY = 4, + FRIDAY = 5, + SATURDAY = 6, +} + +export enum MonthOfTheYear { + JANUARY = 1, + FEBRUARY = 2, + MARCH = 3, + APRIL = 4, + MAY = 5, + JUNE = 6, + JULY = 7, + AUGUST = 8, + SEPTEMBER = 9, + OCTOBER = 10, + NOVEMBER = 11, + DECEMBER = 12, +} + +export enum TimeInfoType { + DAY = "DAY", + DATE = "DATE", + TIME = "TIME", + CLOSED = "CLOSED", + TWENTYFOURHOURS = "TWENTYFOURHOURS", +} diff --git a/src/utils/assertions.ts b/src/utils/assertions.ts deleted file mode 100644 index c823888..0000000 --- a/src/utils/assertions.ts +++ /dev/null @@ -1,3 +0,0 @@ -export function throwUnreachable(): never { - throw new Error("Unreachable code reached!"); -} diff --git a/src/utils/parseTimeToken.ts b/src/utils/parseTimeToken.ts new file mode 100644 index 0000000..0cd4ede --- /dev/null +++ b/src/utils/parseTimeToken.ts @@ -0,0 +1,88 @@ +import ParsedTime from "containers/time/parsedTime"; +import ParsedTimeForDate, { + convertMonthStringToEnum, +} from "containers/time/parsedTimeForDate"; +import ParsedTimeForDay, { + convertDayStringToEnum, +} from "containers/time/parsedTimeForDay"; +import { TimeInfoType, MonthOfTheYear } from "types"; + +export function parseToken(token: string) { + token = token.trim().toLowerCase(); + if (isDay(token)) { + return { + type: TimeInfoType.DAY, + value: new ParsedTimeForDay(token).parse().value, + } as const; + } + const testMonth = token.split(/\s/)[0]; + if (isMonth(testMonth)) { + return { + type: TimeInfoType.DATE, + value: new ParsedTimeForDate(token).parse().value, + } as const; + } + if ( + token === "24 hours" || + token === "24 hrs" || + token === "open 24 hrs" || + token === "open 24 hours" + ) { + return { type: TimeInfoType.TWENTYFOURHOURS } as const; + } + if (token === "closed") { + return { type: TimeInfoType.CLOSED } as const; + } + if ( + Array.isArray( + token.match(/\d\d?:\d\d\s?(?:am|pm)\s?-\s?\d\d?:\d\d\s?(?:am|pm)/) + ) + ) { + return { + type: TimeInfoType.TIME, + value: new ParsedTime(token).parse().value, + } as const; + } + throw new Error(`Could not determine time info type of string '${token}'`); +} + +export function isDay(input: string): boolean { + try { + convertDayStringToEnum(input); + return true; + } catch { + return false; + } +} + +export function isMonth(input: string): boolean { + try { + convertMonthStringToEnum(input); + return true; + } catch { + return false; + } +} + +export function isValidDate(month: MonthOfTheYear, date: number): boolean { + if (!Number.isInteger(date)) { + return false; + } + switch (month) { + case MonthOfTheYear.JANUARY: + case MonthOfTheYear.MARCH: + case MonthOfTheYear.MAY: + case MonthOfTheYear.JULY: + case MonthOfTheYear.AUGUST: + case MonthOfTheYear.OCTOBER: + case MonthOfTheYear.DECEMBER: + return date <= 31 && date >= 1; + case MonthOfTheYear.FEBRUARY: + return date <= 29 && date >= 1; + case MonthOfTheYear.APRIL: + case MonthOfTheYear.JUNE: + case MonthOfTheYear.SEPTEMBER: + case MonthOfTheYear.NOVEMBER: + return date <= 30 && date >= 1; + } +} diff --git a/src/utils/requestUtils.ts b/src/utils/requestUtils.ts index 6e06b0d..df87787 100644 --- a/src/utils/requestUtils.ts +++ b/src/utils/requestUtils.ts @@ -4,7 +4,10 @@ const wait = (ms: number) => { return new Promise((re) => setTimeout(re, ms)); }; -export async function getHTMLResponse(url: URL, retriesLeft = 4): Promise { +export async function getHTMLResponse( + url: URL, + retriesLeft = 4 +): Promise { try { const response = await axios.get(url.toString()); return response.data; diff --git a/src/utils/timeUtils.ts b/src/utils/timeUtils.ts index a5da2d5..67e630e 100644 --- a/src/utils/timeUtils.ts +++ b/src/utils/timeUtils.ts @@ -1,38 +1,4 @@ -import ParsedTimeBase from "../containers/time/parsedTimeBase"; -import ParsedTimeForDay from "../containers/time/parsedTimeForDay"; - -export enum DayOfTheWeek { - SUNDAY, - MONDAY, - TUESDAY, - WEDNESDAY, - THURSDAY, - FRIDAY, - SATURDAY, -} - -export enum MonthOfTheYear { - JANUARY = 1, - FEBRUARY = 2, - MARCH = 3, - APRIL = 4, - MAY = 5, - JUNE = 6, - JULY = 7, - AUGUST = 8, - SEPTEMBER = 9, - OCTOBER = 10, - NOVEMBER = 11, - DECEMBER = 12, -} - -export enum TimeInfoType { - DAY = "DAY", - DATE = "DATE", - TIME = "TIME", - CLOSED = "CLOSED", - TWENTYFOURHOURS = "TWENTYFOURHOURS", -} +import { DayOfTheWeek, ITimeMoment, ITimeRange } from "types"; export function getNextDay(day: DayOfTheWeek): DayOfTheWeek { const weekdays: DayOfTheWeek[] = [ @@ -47,160 +13,43 @@ export function getNextDay(day: DayOfTheWeek): DayOfTheWeek { return weekdays[(weekdays.indexOf(day) + 1) % 7]; } -export function convertDayStringToEnum(dayStr: string): DayOfTheWeek { - const normalizedDay = dayStr.trim().toLowerCase(); - switch (normalizedDay) { - case "sunday": - case "sun": - return DayOfTheWeek.SUNDAY; - case "monday": - case "mon": - return DayOfTheWeek.MONDAY; - case "tuesday": - case "tue": - return DayOfTheWeek.TUESDAY; - case "wednesday": - case "wed": - return DayOfTheWeek.WEDNESDAY; - case "thursday": - case "thu": - case "thurs": - return DayOfTheWeek.THURSDAY; - case "friday": - case "fri": - return DayOfTheWeek.FRIDAY; - case "saturday": - case "sat": - return DayOfTheWeek.SATURDAY; - default: - throw new Error(`Invalid Day: ${dayStr}`); - } -} - -export function isDay(input: string): boolean { - try { - convertDayStringToEnum(input); - return true; - } catch { - return false; - } -} - -export function convertMonthStringToEnum(monthStr: string): MonthOfTheYear { - const normalizedMonth = monthStr.trim().toLowerCase(); - switch (normalizedMonth) { - case "january": - case "jan": - return MonthOfTheYear.JANUARY; - case "february": - case "feb": - return MonthOfTheYear.FEBRUARY; - case "march": - case "mar": - return MonthOfTheYear.MARCH; - case "april": - case "apr": - return MonthOfTheYear.APRIL; - case "may": - return MonthOfTheYear.MAY; - case "june": - case "jun": - return MonthOfTheYear.JUNE; - case "july": - case "jul": - return MonthOfTheYear.JULY; - case "august": - case "aug": - return MonthOfTheYear.AUGUST; - case "september": - case "sept": - case "sep": - return MonthOfTheYear.SEPTEMBER; - case "october": - case "oct": - return MonthOfTheYear.OCTOBER; - case "november": - case "nov": - return MonthOfTheYear.NOVEMBER; - case "december": - case "dec": - return MonthOfTheYear.DECEMBER; - default: - throw new Error(`Invalid Month: ${monthStr}`); - } -} - -export function isMonth(input: string): boolean { - try { - convertMonthStringToEnum(input); - return true; - } catch { - return false; - } -} - -export function isValidDate(month: MonthOfTheYear, date: number): boolean { - if (!Number.isInteger(date)) { - return false; - } - switch (month) { - case MonthOfTheYear.JANUARY: - case MonthOfTheYear.MARCH: - case MonthOfTheYear.MAY: - case MonthOfTheYear.JULY: - case MonthOfTheYear.AUGUST: - case MonthOfTheYear.OCTOBER: - case MonthOfTheYear.DECEMBER: - return date <= 31 && date >= 1; - case MonthOfTheYear.FEBRUARY: - return date <= 29 && date >= 1; - case MonthOfTheYear.APRIL: - case MonthOfTheYear.JUNE: - case MonthOfTheYear.SEPTEMBER: - case MonthOfTheYear.NOVEMBER: - return date <= 30 && date >= 1; - } -} - -export function assertHourIsValid(hour: number, twentyFourHours: boolean) { - if (twentyFourHours) { - if (hour > 23 || hour < 0) { - throw new Error(`Invalid Hour: ${hour} (24-hour format)`); - } - } else { - if (hour > 12 || hour < 1) { - throw new Error(`Invalid Hour: ${hour} (12-hour format)`); +export function getMinutesSinceStartOfSunday(timeMoment: ITimeMoment) { + return timeMoment.day * (24 * 60) + timeMoment.hour * 60 + timeMoment.minute; +} +/** + * + * @param moment1 + * @param moment2 + * @returns Delta in minutes of moment1 - moment2 + */ +export function compareTimeMoments(moment1: ITimeMoment, moment2: ITimeMoment) { + return ( + getMinutesSinceStartOfSunday(moment1) - + getMinutesSinceStartOfSunday(moment2) + ); +} + +export function sortAndMergeTimeRanges(timeRanges: ITimeRange[]) { + timeRanges.sort((range1, range2) => + compareTimeMoments(range1.start, range2.start) + ); + const mergedRanges: ITimeRange[] = []; + + for (const timeRange of timeRanges) { + const lastTimeRange = mergedRanges.length + ? mergedRanges[mergedRanges.length - 1] + : undefined; + if ( + lastTimeRange && + compareTimeMoments(lastTimeRange.end, timeRange.start) >= 0 + ) { + if (compareTimeMoments(timeRange.end, lastTimeRange.end) > 0) { + lastTimeRange.end = timeRange.end; // join current range with last range + } + } else { + mergedRanges.push(timeRange); } } -} - -export function assertMinuteIsValid(minute: number) { - if (minute > 59 || minute < 0) { - throw new Error(`Invalid Minute: ${minute}`); - } -} - -export function determineTimeInfoType(input: string): TimeInfoType { - input = input.trim().toLowerCase(); - if (isDay(input)) { - return TimeInfoType.DAY; - } - const testMonth = input.split(/\s/)[0]; - if (isMonth(testMonth)) { - return TimeInfoType.DATE; - } - if (input === "24 hours" || input === "24 hrs" || input === "open 24 hrs" || input === "open 24 hours") { - return TimeInfoType.TWENTYFOURHOURS; - } - if (input === "closed") { - return TimeInfoType.CLOSED; - } - if ( - Array.isArray( - input.match(/\d\d?:\d\d\s?(?:am|pm)\s?-\s?\d\d?:\d\d\s?(?:am|pm)/) - ) - ) { - return TimeInfoType.TIME; - } - throw new Error("Could not determine time info type"); + console.log(timeRanges, mergedRanges); + return mergedRanges; }