Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: skip failed locations without failing entire parse #173

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions src/parser/diningParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,23 @@ export default class DiningParser {
await this.initializeLocationBuildersFromMainPage();

const [specials, soups] = await this.fetchSpecials();

const finalLocationData: ILocation[] = [];
for (const builder of locationBuilders) {
await builder.populateDetailedInfo();
builder.setSoup(soups);
builder.setSpecials(specials);
builder.overwriteLocation(locationOverwrites);
try {
await builder.populateDetailedInfo();
builder.setSoup(soups);
builder.setSpecials(specials);
builder.overwriteLocation(locationOverwrites);
finalLocationData.push(builder.build());
} catch (e) {
console.error(
`Parsing location ${builder.getConceptLink()} failed with error`,
e
);
}
}

return locationBuilders.map((builder) => builder.build());
return finalLocationData;
}

private async initializeLocationBuildersFromMainPage(): Promise<
Expand Down
132 changes: 132 additions & 0 deletions tests/html/concepts/113-broken.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@


<!DOCTYPE html>
<html lang="en">
<head><script src="//archive.org/includes/athena.js" type="text/javascript"></script>
<script type="text/javascript">window.addEventListener('DOMContentLoaded',function(){var v=archive_analytics.values;v.service='wb';v.server_name='wwwb-app221.us.archive.org';v.server_ms=580;archive_analytics.send_pageview({});});</script>
<script type="text/javascript" src="https://web-static.archive.org/_static/js/bundle-playback.js?v=2mqEsuX0" charset="utf-8"></script>
<script type="text/javascript" src="https://web-static.archive.org/_static/js/wombat.js?v=txqj7nKC" charset="utf-8"></script>
<script>window.RufflePlayer=window.RufflePlayer||{};window.RufflePlayer.config={"autoplay":"on","unmuteOverlay":"hidden"};</script>
<script type="text/javascript" src="https://web-static.archive.org/_static/js/ruffle/ruffle.js"></script>
<script type="text/javascript">
__wm.init("https://web.archive.org/web");
__wm.wombat("https://apps.studentaffairs.cmu.edu/dining/conceptinfo/Concept/113","20230806004812","https://web.archive.org/","web","https://web-static.archive.org/_static/",
"1691282892");
</script>
<link rel="stylesheet" type="text/css" href="https://web-static.archive.org/_static/css/banner-styles.css?v=S1zqJCYt" />
<link rel="stylesheet" type="text/css" href="https://web-static.archive.org/_static/css/iconochive.css?v=qtvMKcIJ" />
<!-- End Wayback Rewrite JS Include -->

<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<base href="/web/20230806004812/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/"/>
<link rel="stylesheet" href="/web/20230806004812cs_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/css/bootstrap/bootstrap.min.css"/>
<link href="/web/20230806004812cs_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/css/site.css" rel="stylesheet"/>
<link href="/web/20230806004812cs_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/css/screen.css" rel="stylesheet"/>
<link href="/web/20230806004812cs_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/Dining_Web.Public.styles.css" rel="stylesheet"/>
<link href="https://web.archive.org/web/20230806004812cs_/https://fonts.googleapis.com/icon?family=Material+Icons+Outlined" rel="stylesheet">
<!--Blazor:{"sequence":0,"type":"server","prerenderId":"daa81457ce7c4197bf2206dc5af61575","descriptor":"CfDJ8FosNORZvMxNjRRSEt\u002B/9pwjYdd9Eghf0IyvWgRWWAvcfsW573ZwHw7lb8xkplrH52UHRKaIGCy1pK3xO9ZFzZoAuWuBjbvVI3YI17/5ehVdncPvpaQEsSngqaJRoyf8xeFhfaC7dcZSnDxlWGfShC1tj2kdZk7zFYIjTIJr1iMKy5yKdl6B\u002B8K/1i3quyKjiJRebtaeNL2zZhyVIh79f5XdnBCrgq2/a/Pv1IsfhfmaO5efqEWcikKYpD28BDbQb29gg7Z76PmlzMT7oBnsUy\u002By6GHwj1P1dFl59ApJk3YUUFwDtlZ3lYZP14RaixwnHmnVxV4eURnMxF96z6kIpPE86MWNh\u002BCe4mvtQm435Z5lfasnaxoc\u002BHlPWpjPAqYOl2jrzdZnkvnOe/DakjZV4c3Ed3yNSho9KCAyJfwbbByx"}--><title>Concept</title><!--Blazor:{"prerenderId":"daa81457ce7c4197bf2206dc5af61575"}-->
</head>
<body>

<!--Blazor:{"sequence":1,"type":"server","prerenderId":"75002c52576e400196070466a5970cc4","descriptor":"CfDJ8FosNORZvMxNjRRSEt\u002B/9pwG57EF57tvP6RXub9uK1IaGICD7EHavLdla1RFEoyfBd05\u002BV2itijkBGrxyiQspDoaig4kZT0pICy5yJuZUamDVW0TA1nWC9e0rjwLddBp/R\u002B78sAFVTG97ZyWaJfSda04fWaB2M7cgpJWq/kiz3b\u002BoWOXMZUEsU23Iq/DlZ\u002BnKPhrOc/oGYol9L4QDcZtRQoMD5u1lw0oaOp1IWczz4yx5Fg3Xzva1k5dcN\u002ByVeRw9LL8R3G\u002B9jFj2CoPmlnNRPvcPgjInl7Q\u002BEDJtag9JP0WM3U5643U7/VeiFLav4DHLkyl7Mh29eebTZoF2ReaRpuDjgN2J0B/820nneNpDfamTDv5Eo7RFcyEwcoQ7XWBVg=="}-->

<div><section class="row header headerImage" style="display:none;" b-w03wipcpbl><div b-w03wipcpbl><div class="screen bg" b-w03wipcpbl><div class="column" b-w03wipcpbl><div id="headerContent" b-w03wipcpbl><div id="logo" b-w03wipcpbl><a href="https://web.archive.org/web/20230806004812/https://www.cmu.edu/" b-w03wipcpbl><img src="/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/images/cmu-wordmark.png" b-w03wipcpbl></a></div>
<div id="tagLine" b-w03wipcpbl><a href="https://web.archive.org/web/20230806004812/http://www.cmu.edu/dining/" b-w03wipcpbl>CMU Dining Services</a></div>
<div id="hamburger" b-w03wipcpbl><img src="/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/images/hamburger.jpg" b-w03wipcpbl/></div></div>

<nav class="topNav" b-w03wipcpbl><a href b-w03wipcpbl>Locations</a>
<a class="jsHitCount" id="campusMap" href="https://web.archive.org/web/20230806004812/https://apps.studentaffairs.cmu.edu/dining/dashboard_images/Production/map/CampusMap (1).pdf" target="_blank" b-w03wipcpbl>Map</a>
<a href="https://web.archive.org/web/20230806004812/https://get.cbord.com/cmu/full/prelogin.php" target="_blank" b-w03wipcpbl>Get Funds</a>
<a href="https://web.archive.org/web/20230806004812/https://www.cmu.edu/dining" target="_blank" b-w03wipcpbl>Dining Services</a></nav>

<div class="collapse-important nav-scrollable" b-1xk6bg11sg><nav class="flex-column" b-1xk6bg11sg><div class="nav-item px-3" b-1xk6bg11sg><a class="nav-link" href b-1xk6bg11sg>
Locations
</a></div>
<div class="nav-item px-3" b-1xk6bg11sg><a class="nav-link" href="images/CampusMap.pdf" target="_blank" b-1xk6bg11sg>
Map
</a></div>
<div class="nav-item px-3" b-1xk6bg11sg><a class="nav-link" href="https://web.archive.org/web/20230806004812/https://get.cbord.com/cmu/full/prelogin.php" target="_blank" b-1xk6bg11sg>
Get Funds
</a></div>
<div class="nav-item px-3" b-1xk6bg11sg><a class="nav-link" href="https://web.archive.org/web/20230806004812/https://www.cmu.edu/dining" target="_blank" b-1xk6bg11sg>
Dining Services
</a></div></nav></div></div></div></div></section>

<main><article class="content"><div style>

<section b-a0g72i9rrk><div class="btn-container" style="background-color:transparent;" b-qmxam8zpit><div class="btn-inner-container" style="color:#224477;" b-qmxam8zpit><div class="left" b-qmxam8zpit><span class="material-icons-outlined line-height-adjust" b-qmxam8zpit>arrow_back</span></div>
<div class="right" b-qmxam8zpit>Back to Locations</div></div></div></section>

<section b-a0g72i9rrk><div class="conceptDetail" b-a0g72i9rrk><div class="columnLeft" b-a0g72i9rrk><h1 class="title" b-a0g72i9rrk>AU BON PAIN AT SKIBO CAF&#xC9;</h1>


<div class="location" b-a0g72i9rrk>
Location: <a href="https://web.archive.org/web/20230806004812/https://www.google.com/maps/place/40.440000000000000000,-79.940000000000000000/@40.440000000000000000,-79.940000000000000000,19z" target="_blank" b-a0g72i9rrk>Cohon Center, Second floor</a></div>

<div class="description" b-a0g72i9rrk><p>At Au Bon Pain café bakery, each signature recipe is uniquely crafted. You can enjoy delicious hot or iced coffee and teas, espresso drinks, a variety of cold beverages, soup, a customized made-to-order breakfast or lunch sandwich or salad, or you can grab a pre-made salad, sandwich, wrap, yogurt parfait, fresh fruit or snack. There is always something new to try ... healthy choices, comfort food, indulgent treats … try them all! For nutritional information about Au Bon Pain's menu items, please click here &lt;a&gt;https://www.aubonpain.com/nutrition&lt;/a&gt; To place an Au Bon Pain catering order, please contact 1-800-765-4227 or visit http://aubonpain.com/cateringFor on-campus assistance, call <strong>412-621-1934.</strong></p></div>

<h2 b-a0g72i9rrk>Hours</h2>

<div class="nextSevenSchedule" b-a0g72i9rrk><ul class="schedule" b-a0g72i9rrk><li><div><strong>Saturday</strong> August 05</div></li><li><div><strong>Sunday</strong> August 06,&nbsp;
4:30 PM - 8:00 PM</div></li><li><div><strong>Monday</strong> August 07,&nbsp;
8:00 AM - 8:00 PM</div></li><li><div><strong>Tuesday</strong> August 08,&nbsp;
8:00 AM - 8:00 PM</div></li><li><div><strong>Wednesday</strong> August 09,&nbsp;
8:00 AM - 8:00 PM</div></li><li><div><strong>Thursday</strong> August 10,&nbsp;
8:00 AM - 8:00 PM</div></li><li><div><strong>Friday</strong> August 11,&nbsp;
8:00 AM - 12:00 AM</div></li></ul></div></div>


<div class="columnRight" b-a0g72i9rrk><div class="logo" b-a0g72i9rrk><img class="conceptLogo visible-xs" src="https://web.archive.org/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/dashboard_images/Production/logos/113/ABP_Logo_wTagline_onwht.jpg" b-a0g72i9rrk/></div>
<div class="conceptImage" b-a0g72i9rrk><img src="https://web.archive.org/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/dashboard_images/Production/pics/113/abpweb1.jpg" style="width:100%;" b-a0g72i9rrk/></div>

<div class="navItems" b-a0g72i9rrk><a href="https://web.archive.org/web/20230806004812/https://apps.studentaffairs.cmu.edu/dining/dashboard_images/Production/menus/113/abp-menu6.pdf" id="getMenu" b-a0g72i9rrk>View Menu<img src="/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/images/tinyTwill.png" b-a0g72i9rrk></a><a href="https://web.archive.org/web/20230806004812/https://www.google.com/maps/place/40.440000000000000000,-79.940000000000000000/@40.440000000000000000,-79.940000000000000000,19z" target="_blank" b-a0g72i9rrk>Map It<img src="/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/images/tinyTwill.png" b-a0g72i9rrk></a>

<a href="https://web.archive.org/web/20230806004812/mailto:dining@andrew.cmu.edu?Subject=Dining Locations and Hours: AU BON PAIN AT SKIBO CAFÉ" b-a0g72i9rrk>Question or Comment?<img src="/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/images/tinyTwill.png" b-a0g72i9rrk></a><a href="https://web.archive.org/web/20230806004812/tel:412-621-1934" id="phoneTxt" target b-a0g72i9rrk><img class="phoneImg" src="/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/images/phone_icon.png" b-a0g72i9rrk>412-621-1934</a></div><div class="navItems orderOnline" b-a0g72i9rrk><a href="https://web.archive.org/web/20230806004812/https://www.grubhub.com/about/campus" target="_blank" b-a0g72i9rrk>Order Online<img src="/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/images/tinyTwill.png" b-a0g72i9rrk></a></div></div></div></section></div></article></main>

<section class="row footer"><div class="column footer"><div class="address">
Residence on Fifth<br>
4700 Fifth Avenue<br>
Pittsburgh, PA 15213<br>
(412) 268-3782<br>
<a href="https://web.archive.org/web/20230806004812/mailto:dining@andrew.cmu.edu">dining@andrew.cmu.edu</a> <br>
<a href="https://web.archive.org/web/20230806004812/https://www.cmu.edu/dining/">cmu.edu/dining</a> <br><br><br>
<a href="https://web.archive.org/web/20230806004812/https://www.cmu.edu/legal/">Legal Info</a><br>
<a href="https://web.archive.org/web/20230806004812/https://www.cmu.edu/">www.cmu.edu</a>
<footer>&copy; 2016 Carnegie Mellon University</footer></div>
<div class="socialMedia"><div><a href="https://web.archive.org/web/20230806004812/https://www.facebook.com/cmudining" target="_blank"><img src="/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/images/facebook.png"></a></div>
<div><a href="https://web.archive.org/web/20230806004812/https://www.instagram.com/lifeatcmu/" target="_blank"><img src="/web/20230806004812im_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/images/instagram.png"></a></div></div></div></section></div>
<!--Blazor:{"prerenderId":"75002c52576e400196070466a5970cc4"}-->

<div id="blazor-error-ui">

An error has occurred. This application may no longer respond until reloaded.


<a href="" class="reload">Reload</a>
<a class="dismiss">🗙</a>
</div>

<script src="/web/20230806004812js_/https://apps.studentaffairs.cmu.edu/dining/conceptinfo/_framework/blazor.server.js"></script>
</body>
</html>
<!--
FILE ARCHIVED ON 00:48:12 Aug 06, 2023 AND RETRIEVED FROM THE
INTERNET ARCHIVE ON 21:23:48 Sep 08, 2024.
JAVASCRIPT APPENDED BY WAYBACK MACHINE, COPYRIGHT INTERNET ARCHIVE.

ALL OTHER CONTENT MAY ALSO BE PROTECTED BY COPYRIGHT (17 U.S.C.
SECTION 108(a)(3)).
-->
<!--
playback timings (ms):
captures_list: 0.474
exclusion.robots: 0.023
exclusion.robots.policy: 0.014
esindex: 0.008
cdx.remote: 6.484
LoadShardBlock: 175.851 (3)
PetaboxLoader3.datanode: 147.857 (4)
load_resource: 369.268
PetaboxLoader3.resolve: 333.995
-->
11 changes: 11 additions & 0 deletions tests/integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,18 @@ test("specials for The Exchange", async () => {
])
);
});
test("113 broken should return empty data", async () => {
mockAxiosGETMethodWithFilePaths({
conceptListFilePath: "html/listconcepts-just-113.html",
specialsFilePath: "html/specials-for-92.html",
soupsFilePath: "html/soups.html",
getConceptFilePath: (conceptId: string) =>
conceptId === "113" ? `html/concepts/113-broken.html` : "html/blank.html",
});
const parser = new DiningParser();

expect(await parser.process()).toStrictEqual([]);
});
test(
"parser throws on repeated axios error",
async () => {
Expand Down