Skip to content

Commit 91f5389

Browse files
committed
Add Proms prices, some nice logging
1 parent 3eb619b commit 91f5389

File tree

3 files changed

+74
-21
lines changed

3 files changed

+74
-21
lines changed

rust/src/core.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
use chrono::{DateTime, Utc};
1+
use chrono::{DateTime, Utc, TimeZone};
2+
use chrono_tz::Europe::London;
23
use serde::{Deserialize, Serialize};
34
use ts_rs::TS;
45

@@ -34,3 +35,8 @@ pub struct Concert {
3435
pub is_wigmore_u35: bool,
3536
pub is_prom: bool,
3637
}
38+
39+
pub fn report_concert(c: &Concert) -> () {
40+
let london_datetime = c.datetime.with_timezone(&London);
41+
eprintln!("Found concert on {}: {}", london_datetime, c.title);
42+
}

rust/src/proms.rs

+58-16
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
11
use crate::core;
22
use chrono::{NaiveDate, TimeZone, Utc};
33
use chrono_tz::Europe::London;
4+
use regex::Regex;
45
use scraper::{ElementRef, Html, Selector};
6+
use std::cmp::min;
57

68
// Scrapes concerts from BBC Proms website
79
pub async fn scrape(url: &str, client: &reqwest::Client) -> Vec<core::Concert> {
10+
println!("----------------------------------------");
11+
println!("Scraping BBC Proms from URL: {}", url);
12+
println!("----------------------------------------");
13+
814
let html: String = client
915
.get(url)
1016
// it 500's with default user-agent
@@ -47,6 +53,8 @@ struct PromsConcertMetadata {
4753
url: String,
4854
pieces: Vec<core::Piece>,
4955
performers: Vec<core::Performer>,
56+
min_price: Option<u32>,
57+
max_price: Option<u32>,
5058
}
5159

5260
/// Scrapes a single date's worth of concerts from the BBC Proms website
@@ -62,7 +70,6 @@ async fn scrape_one_date(date_fragment: ElementRef<'_>) -> (NaiveDate, Vec<Proms
6270
.trim();
6371
// BBC's website reports dates as e.g. "Fri 23 Aug 2024"
6472
let date = NaiveDate::parse_from_str(date_str, "%a %e %b %Y").unwrap();
65-
println!("parsed {date_str} into date: {:?}", date);
6673

6774
// Get the concerts themselves
6875
let mut intermediate_concerts: Vec<PromsConcertMetadata> = vec![];
@@ -113,7 +120,31 @@ fn parse_single_concert(elem: ElementRef<'_>) -> PromsConcertMetadata {
113120
.map(|performer_elem| parse_performer(performer_elem))
114121
.collect();
115122

116-
let concert = PromsConcertMetadata {
123+
let price_selector =
124+
Selector::parse("div.ev-event-calendar__ticket-link-subtitle--desktop").unwrap();
125+
let price_text = elem
126+
.select(&price_selector)
127+
.next()
128+
.unwrap()
129+
.text()
130+
.next()
131+
.unwrap()
132+
.trim();
133+
// Regexes are hacky, but it works fine for now ... otherwise the website text is very
134+
// inconsistent and hard to parse.
135+
let price_re = Regex::new(r"£(\d+)").unwrap();
136+
let prices: Vec<u32> = price_re
137+
.captures_iter(price_text)
138+
.map(|cap| cap.get(1).unwrap().as_str().parse().unwrap())
139+
.collect();
140+
let (min_price, max_price) = match prices[..] {
141+
[] => (None, None),
142+
[price] => (Some(price * 100), Some(price * 100)),
143+
[min_price, max_price] => (Some(min_price * 100), Some(max_price * 100)),
144+
_ => panic!("couldn't parse prices from {:?}", price_text),
145+
};
146+
147+
PromsConcertMetadata {
117148
london_time: parsed_time,
118149
title: elem
119150
.select(&Selector::parse("div.ev-event-calendar__name").unwrap())
@@ -149,11 +180,9 @@ fn parse_single_concert(elem: ElementRef<'_>) -> PromsConcertMetadata {
149180
.to_string(),
150181
pieces,
151182
performers,
152-
};
153-
154-
println!("found concert: {:?}", concert);
155-
156-
concert
183+
min_price,
184+
max_price,
185+
}
157186
}
158187

159188
/// Combines the date and the concert metadata to form a full core::Concert
@@ -163,7 +192,11 @@ fn make_full_concert(date: NaiveDate, metadata: PromsConcertMetadata) -> core::C
163192
.unwrap();
164193
let tz_datetime = London.from_local_datetime(&naive_datetime).unwrap();
165194

166-
core::Concert {
195+
let is_rah_prom = metadata.venue == "Royal Albert Hall"
196+
&& (metadata.title.starts_with("Prom") || metadata.title.starts_with("First Night"));
197+
let promming_price = 800;
198+
199+
let concert = core::Concert {
167200
datetime: tz_datetime.with_timezone(&Utc),
168201
url: metadata.url,
169202
venue: metadata.venue,
@@ -172,22 +205,34 @@ fn make_full_concert(date: NaiveDate, metadata: PromsConcertMetadata) -> core::C
172205
pieces: metadata.pieces,
173206
performers: metadata.performers,
174207

208+
// Proms concerts don't have subtitles or programme PDFs available
175209
subtitle: None,
176210
programme_pdf_url: None,
177-
min_price: Some(800),
178-
max_price: Some(800),
179211

212+
// Day promming prices aren't shown on the website so we add them in here
213+
min_price: if is_rah_prom {
214+
match metadata.min_price {
215+
Some(price) => Some(min(price, promming_price)),
216+
None => Some(promming_price),
217+
}
218+
} else {
219+
metadata.min_price
220+
},
221+
max_price: metadata.max_price,
222+
223+
// By definition
180224
is_wigmore_u35: false,
181225
is_prom: true,
182-
}
226+
};
227+
228+
core::report_concert(&concert);
229+
concert
183230
}
184231

185232
/// Helper function to parse a piece from a concert
186233
fn parse_piece(piece_elem: ElementRef<'_>) -> Option<core::Piece> {
187234
// This is kind of hacky but it works
188235
let all_texts = piece_elem.text().collect::<Vec<&str>>();
189-
println!("all_texts: {:?}", all_texts);
190-
191236
match all_texts[..] {
192237
["interval"] => None,
193238
_ => Some(core::Piece {
@@ -212,9 +257,6 @@ fn parse_performer(performer_elem: ElementRef<'_>) -> core::Performer {
212257
.unwrap()
213258
.text()
214259
.collect::<Vec<&str>>();
215-
216-
println!("found performer: {} ({:?})", name, role_texts);
217-
218260
core::Performer {
219261
name: name.to_string(),
220262
instrument: match &role_texts[..] {

rust/src/wigmore.rs

+9-4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ use serde::{Deserialize, Serialize};
99

1010
/// Fetch full data for all Wigmore concerts
1111
pub async fn get_concerts(client: &reqwest::Client) -> Vec<core::Concert> {
12+
println!("----------------------------------------");
13+
println!("Scraping Wigmore Hall concerts");
14+
println!("----------------------------------------");
15+
1216
let wigmore_intermediate_concerts = get_api(&client).await;
1317

1418
let mut wigmore_concerts = stream::iter(&wigmore_intermediate_concerts[..40])
@@ -102,8 +106,6 @@ async fn get_full_concert(
102106
fp_entry: &WigmoreFrontPageConcert,
103107
client: &reqwest::Client,
104108
) -> Option<core::Concert> {
105-
eprintln!("Scraping concert at {}", fp_entry.url);
106-
107109
// Wigmore's website actually seems to give us all the data in JSON format, but curiously, it's
108110
// in a script tag in the HTML. Not complaining though as it is still so much easier than
109111
// parsing the HTML itself.
@@ -227,7 +229,7 @@ fn parse_concert_json(
227229
cleaned
228230
}
229231

230-
core::Concert {
232+
let concert = core::Concert {
231233
datetime: fp_entry.datetime,
232234
url: fp_entry.url.clone(),
233235
title: fp_entry.title.clone(),
@@ -245,5 +247,8 @@ fn parse_concert_json(
245247
min_price,
246248
max_price,
247249
is_prom: false,
248-
}
250+
};
251+
252+
core::report_concert(&concert);
253+
concert
249254
}

0 commit comments

Comments
 (0)