Skip to content

Commit 6cc1098

Browse files
authored
Merge branch 'main' into delimiter-char
2 parents 138d568 + 218f408 commit 6cc1098

12 files changed

+590
-237
lines changed

Cargo.toml

+2-3
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@ repository = "https://github.com/denoland/rust-urlpattern"
88
license = "MIT"
99

1010
[dependencies]
11-
derive_more = "0.99.16"
12-
url = "2.2.2"
13-
regex = "1.4.3"
11+
url = "2.4.1"
12+
regex = "1.10.5"
1413
serde = { version = "1.0.127", features = ["derive"] }
1514
unic-ucd-ident = { version = "0.9.0", features = ["id"] }
1615

src/canonicalize_and_process.rs

+22
Original file line numberDiff line numberDiff line change
@@ -270,3 +270,25 @@ pub fn special_scheme_default_port(scheme: &str) -> Option<&'static str> {
270270
_ => None,
271271
}
272272
}
273+
274+
// Ref: https://urlpattern.spec.whatwg.org/#process-a-base-url-string
275+
pub fn process_base_url(input: &str, kind: &ProcessType) -> String {
276+
if kind != &ProcessType::Pattern {
277+
input.to_string()
278+
} else {
279+
escape_pattern_string(input)
280+
}
281+
}
282+
283+
// Ref: https://wicg.github.io/urlpattern/#escape-a-pattern-string
284+
pub fn escape_pattern_string(input: &str) -> String {
285+
assert!(input.is_ascii());
286+
let mut result = String::new();
287+
for char in input.chars() {
288+
if matches!(char, '+' | '*' | '?' | ':' | '{' | '}' | '(' | ')' | '\\') {
289+
result.push('\\');
290+
}
291+
result.push(char);
292+
}
293+
result
294+
}

src/component.rs

+7-15
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
22

3+
use crate::canonicalize_and_process::escape_pattern_string;
34
use crate::matcher::InnerMatcher;
45
use crate::matcher::Matcher;
56
use crate::parser::Options;
@@ -18,6 +19,7 @@ pub(crate) struct Component<R: RegExp> {
1819
pub regexp: Result<R, Error>,
1920
pub group_name_list: Vec<String>,
2021
pub matcher: Matcher<R>,
22+
pub has_regexp_group: bool,
2123
}
2224

2325
impl<R: RegExp> Component<R> {
@@ -46,6 +48,9 @@ impl<R: RegExp> Component<R> {
4648
regexp,
4749
group_name_list: name_list,
4850
matcher,
51+
has_regexp_group: part_list
52+
.iter()
53+
.any(|part| part.kind == PartType::Regexp),
4954
})
5055
}
5156

@@ -67,13 +72,13 @@ impl<R: RegExp> Component<R> {
6772
pub(crate) fn create_match_result(
6873
&self,
6974
input: String,
70-
exec_result: Vec<&str>,
75+
exec_result: Vec<Option<&str>>,
7176
) -> crate::UrlPatternComponentResult {
7277
let groups = self
7378
.group_name_list
7479
.clone()
7580
.into_iter()
76-
.zip(exec_result.into_iter().map(str::to_owned))
81+
.zip(exec_result.into_iter().map(|s| s.map(str::to_owned)))
7782
.collect();
7883
crate::UrlPatternComponentResult { input, groups }
7984
}
@@ -258,19 +263,6 @@ fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String {
258263
result
259264
}
260265

261-
// Ref: https://wicg.github.io/urlpattern/#escape-a-pattern-string
262-
fn escape_pattern_string(input: &str) -> String {
263-
assert!(input.is_ascii());
264-
let mut result = String::new();
265-
for char in input.chars() {
266-
if matches!(char, '+' | '*' | '?' | ':' | '{' | '}' | '(' | ')' | '\\') {
267-
result.push('\\');
268-
}
269-
result.push(char);
270-
}
271-
result
272-
}
273-
274266
/// This function generates a matcher for a given parts list.
275267
fn generate_matcher<R: RegExp>(
276268
mut part_list: &[&Part],

src/constructor_parser.rs

+71-16
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,11 @@ impl<'a> ConstructorStringParser<'a> {
127127
}
128128

129129
// Ref: https://wicg.github.io/urlpattern/#change-state
130-
fn change_state(&mut self, state: ConstructorStringParserState, skip: usize) {
130+
fn change_state(
131+
&mut self,
132+
new_state: ConstructorStringParserState,
133+
skip: usize,
134+
) {
131135
match self.state {
132136
ConstructorStringParserState::Protocol => {
133137
self.result.protocol = Some(self.make_component_string())
@@ -153,10 +157,69 @@ impl<'a> ConstructorStringParser<'a> {
153157
ConstructorStringParserState::Hash => {
154158
self.result.hash = Some(self.make_component_string())
155159
}
156-
_ => {}
160+
ConstructorStringParserState::Init
161+
| ConstructorStringParserState::Authority
162+
| ConstructorStringParserState::Done => {}
157163
}
158164

159-
self.state = state;
165+
if self.state != ConstructorStringParserState::Init
166+
&& new_state != ConstructorStringParserState::Done
167+
{
168+
if matches!(
169+
self.state,
170+
ConstructorStringParserState::Protocol
171+
| ConstructorStringParserState::Authority
172+
| ConstructorStringParserState::Username
173+
| ConstructorStringParserState::Password
174+
) && matches!(
175+
new_state,
176+
ConstructorStringParserState::Port
177+
| ConstructorStringParserState::Pathname
178+
| ConstructorStringParserState::Search
179+
| ConstructorStringParserState::Hash
180+
) && self.result.hostname.is_none()
181+
{
182+
self.result.hostname = Some(String::new());
183+
}
184+
185+
if matches!(
186+
self.state,
187+
ConstructorStringParserState::Protocol
188+
| ConstructorStringParserState::Authority
189+
| ConstructorStringParserState::Username
190+
| ConstructorStringParserState::Password
191+
| ConstructorStringParserState::Hostname
192+
| ConstructorStringParserState::Port
193+
) && matches!(
194+
new_state,
195+
ConstructorStringParserState::Search
196+
| ConstructorStringParserState::Hash
197+
) && self.result.pathname.is_none()
198+
{
199+
if self.protocol_matches_special_scheme {
200+
self.result.pathname = Some(String::from("/"));
201+
} else {
202+
self.result.pathname = Some(String::new());
203+
}
204+
}
205+
206+
if matches!(
207+
self.state,
208+
ConstructorStringParserState::Protocol
209+
| ConstructorStringParserState::Authority
210+
| ConstructorStringParserState::Username
211+
| ConstructorStringParserState::Password
212+
| ConstructorStringParserState::Hostname
213+
| ConstructorStringParserState::Port
214+
| ConstructorStringParserState::Pathname
215+
) && new_state == ConstructorStringParserState::Hash
216+
&& self.result.search.is_none()
217+
{
218+
self.result.search = Some(String::new());
219+
}
220+
}
221+
222+
self.state = new_state;
160223
self.token_index += skip;
161224
self.component_start = self.token_index;
162225
self.token_increment = 0;
@@ -273,11 +336,8 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
273336
parser.change_state(ConstructorStringParserState::Hash, 1);
274337
} else if parser.is_search_prefix() {
275338
parser.change_state(ConstructorStringParserState::Search, 1);
276-
parser.result.hash = Some(String::new());
277339
} else {
278340
parser.change_state(ConstructorStringParserState::Pathname, 0);
279-
parser.result.search = Some(String::new());
280-
parser.result.hash = Some(String::new());
281341
}
282342
parser.token_index += parser.token_increment;
283343
continue;
@@ -306,22 +366,12 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
306366
match parser.state {
307367
ConstructorStringParserState::Init => {
308368
if parser.is_protocol_suffix() {
309-
parser.result.username = Some(String::new());
310-
parser.result.password = Some(String::new());
311-
parser.result.hostname = Some(String::new());
312-
parser.result.port = Some(String::new());
313-
parser.result.pathname = Some(String::new());
314-
parser.result.search = Some(String::new());
315-
parser.result.hash = Some(String::new());
316369
parser.rewind_and_set_state(ConstructorStringParserState::Protocol);
317370
}
318371
}
319372
ConstructorStringParserState::Protocol => {
320373
if parser.is_protocol_suffix() {
321374
parser.compute_protocol_matches_special_scheme::<R>()?;
322-
if parser.protocol_matches_special_scheme {
323-
parser.result.pathname = Some(String::from("/"));
324-
}
325375
let mut next_state = ConstructorStringParserState::Pathname;
326376
let mut skip = 1;
327377
if parser.next_is_authority_slashes() {
@@ -398,5 +448,10 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
398448
}
399449
parser.token_index += parser.token_increment;
400450
}
451+
452+
if parser.result.hostname.is_some() && parser.result.port.is_none() {
453+
parser.result.port = Some(String::new());
454+
}
455+
401456
Ok(parser.result)
402457
}

src/error.rs

+54-27
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,80 @@
1-
use derive_more::Display;
1+
use std::fmt;
22

33
use crate::tokenizer::TokenType;
44

55
/// A error occurring during URL pattern construction, or matching.
6-
#[derive(Display)]
6+
#[derive(Debug)]
77
pub enum Error {
8-
#[display(fmt = "a relative input without a base URL is not valid")]
98
BaseUrlRequired,
10-
11-
#[display(
12-
fmt = "specifying both an init object, and a separate base URL is not valid"
13-
)]
149
BaseUrlWithInit,
15-
16-
#[display(fmt = "tokenizer error: {_0} (at char {_1})")]
1710
Tokenizer(TokenizerError, usize),
18-
19-
#[display(fmt = "parser error: {_0}")]
2011
Parser(ParserError),
21-
2212
Url(url::ParseError),
23-
24-
#[display(fmt = "regexp error")]
2513
RegExp(()),
2614
}
2715

28-
impl std::error::Error for Error {}
29-
30-
impl std::fmt::Debug for Error {
31-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32-
std::fmt::Display::fmt(self, f)
16+
impl fmt::Display for Error {
17+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
18+
match self {
19+
Error::BaseUrlRequired => {
20+
f.write_str("a relative input without a base URL is not valid")
21+
}
22+
Error::BaseUrlWithInit => f.write_str(
23+
"specifying both an init object, and a separate base URL is not valid",
24+
),
25+
Error::Tokenizer(err, pos) => {
26+
write!(f, "tokenizer error: {err} (at char {pos})")
27+
}
28+
Error::Parser(err) => write!(f, "parser error: {err}"),
29+
Error::Url(err) => err.fmt(f),
30+
Error::RegExp(_) => f.write_str("regexp error"),
31+
}
3332
}
3433
}
3534

36-
#[derive(Debug, Display)]
35+
impl std::error::Error for Error {}
36+
37+
#[derive(Debug)]
3738
pub enum TokenizerError {
38-
#[display(fmt = "incomplete escape code")]
3939
IncompleteEscapeCode,
40-
#[display(fmt = "invalid name; must be at least length 1")]
4140
InvalidName,
42-
#[display(fmt = "invalid regex: {_0}")]
4341
InvalidRegex(&'static str),
4442
}
4543

46-
#[derive(Debug, Display)]
44+
impl fmt::Display for TokenizerError {
45+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46+
match self {
47+
Self::IncompleteEscapeCode => f.write_str("incomplete escape code"),
48+
Self::InvalidName => {
49+
f.write_str("invalid name; must be at least length 1")
50+
}
51+
Self::InvalidRegex(err) => write!(f, "invalid regex: {err}"),
52+
}
53+
}
54+
}
55+
56+
impl std::error::Error for TokenizerError {}
57+
58+
#[derive(Debug)]
4759
pub enum ParserError {
48-
#[display(fmt = "expected token {_0}, found '{_2}' of type {_1}")]
4960
ExpectedToken(TokenType, TokenType, String),
50-
51-
#[display(fmt = "pattern contains duplicate name {_0}")]
5261
DuplicateName(String),
5362
}
63+
64+
impl fmt::Display for ParserError {
65+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66+
match self {
67+
Self::ExpectedToken(expected_ty, found_ty, found_val) => {
68+
write!(
69+
f,
70+
"expected token {expected_ty:?}, found '{found_val}' of type {found_ty:?}"
71+
)
72+
}
73+
Self::DuplicateName(name) => {
74+
write!(f, "pattern contains duplicate name {name}")
75+
}
76+
}
77+
}
78+
}
79+
80+
impl std::error::Error for ParserError {}

0 commit comments

Comments
 (0)