Skip to content

Commit

Permalink
bash 175
Browse files Browse the repository at this point in the history
  • Loading branch information
cirex-web committed Feb 17, 2025
1 parent ed41eda commit b426323
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 20 deletions.
42 changes: 36 additions & 6 deletions src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,43 @@ import { cors } from "@elysiajs/cors";
import DiningParser from "./parser/diningParser";
import Scraper from "./utils/requestUtils";
import { ILocation } from "types";
import { load } from "cheerio";
import { getTimeRangesFromString } from "containers/timeBuilder";
import { sortAndMergeTimeRanges } from "utils/timeUtils";

const PORT = process.env.PORT ?? 5010;
let cachedLocations: ILocation[];

async function bash() {
let fails = 0;
const scraper = new Scraper();
await scraper.initialize();

for (let i = 0; true; i++) {
const html = await scraper.getHTML(
new URL(
"https://apps.studentaffairs.cmu.edu/dining/conceptinfo/Concept/175"
)
);
const $ = load(html);
const nextSevenDays = $("ul.schedule").find("li").toArray();

const times = sortAndMergeTimeRanges(
nextSevenDays.flatMap((rowHTML) => getTimeRangesFromString(rowHTML))
);
if (times.length != 5) {
fails++;
}
console.log(fails / (i + 1), fails, i + 1);
// console.log(new Date(), times.length, JSON.stringify(times));
}
}
async function reload(): Promise<void> {
const now = new Date();
console.log(`Reloading Dining API: ${now}`);
const scraper = new Scraper();
await scraper.initialize();

const parser = new DiningParser(scraper);
const locations = await parser.process();
await scraper.close();
Expand Down Expand Up @@ -76,10 +104,12 @@ setInterval(() => {
}, interval);

// Initial load and start the server
reload().then(() => {
app.listen(PORT);
bash().then(() =>
reload().then(() => {
app.listen(PORT);

console.log(
`Dining API is running at ${app.server?.hostname}:${app.server?.port}`
);
});
console.log(
`Dining API is running at ${app.server?.hostname}:${app.server?.port}`
);
})
);
44 changes: 30 additions & 14 deletions src/utils/requestUtils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import puppeteer, { Browser, Page } from "puppeteer";
import { AXIOS_RETRY_INTERVAL_MS, IS_TESTING } from "../config";

import axios from "axios";
const wait = (ms: number) => {
return new Promise((re) => setTimeout(re, ms));
};
Expand All @@ -21,23 +21,39 @@ export default class Scraper {
}

async getHTML(url: URL, retriesLeft = 4): Promise<string> {
if (!this.initialized) {
if (!this.initialized || !this.page) {
throw new Error("Scraper not initialized");
}
try {
console.log(`Scraping ${url}`);
await this.page!.goto(url.toString());
if (IS_TESTING || process.env.DEV) {
await wait(1000);
} else {
await wait(10000);
}
const response = await this.page!.content();
console.log({
message: `Scraped ${url}`,
html: response,
url: url.toString(),
// console.log(`Scraping ${url}`);
// await this.page.setViewport({ width: 1280, height: 720 });

await this.page!.goto(url.toString(), {
waitUntil: ["domcontentloaded", "networkidle2"],
});
// const res = (await axios.get(url.toString())).data;
// await wait(1000);
// return res;
// if (IS_TESTING || process.env.DEV) {
// await wait(1000);
// } else {
// await wait(10000);
// }
// console.log({
// message: `Scraped ${url}`,
// html: response,
// url: url.toString(),
// });
const now = new Date();
// await this.page.screenshot({
// path: "screens/screenshot" + new Date() + ".jpg",
// });
// await wait(20000);
// await this.page.screenshot({
// path: "screens/screenshot" + new Date() + "_wait.jpg",
// });
const response = await this.page!.content();

return response;
} catch (err) {
if (!IS_TESTING) console.error(err);
Expand Down

0 comments on commit b426323

Please sign in to comment.