Skip to content

Commit 218f408

Browse files
authored
feat: use "inherit left, wildcard right" behavior (#44)
1 parent 8be8cf5 commit 218f408

5 files changed

+457
-177
lines changed

src/canonicalize_and_process.rs

+22
Original file line numberDiff line numberDiff line change
@@ -270,3 +270,25 @@ pub fn special_scheme_default_port(scheme: &str) -> Option<&'static str> {
270270
_ => None,
271271
}
272272
}
273+
274+
// Ref: https://urlpattern.spec.whatwg.org/#process-a-base-url-string
275+
pub fn process_base_url(input: &str, kind: &ProcessType) -> String {
276+
if kind != &ProcessType::Pattern {
277+
input.to_string()
278+
} else {
279+
escape_pattern_string(input)
280+
}
281+
}
282+
283+
// Ref: https://wicg.github.io/urlpattern/#escape-a-pattern-string
284+
pub fn escape_pattern_string(input: &str) -> String {
285+
assert!(input.is_ascii());
286+
let mut result = String::new();
287+
for char in input.chars() {
288+
if matches!(char, '+' | '*' | '?' | ':' | '{' | '}' | '(' | ')' | '\\') {
289+
result.push('\\');
290+
}
291+
result.push(char);
292+
}
293+
result
294+
}

src/component.rs

+1-13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
22

3+
use crate::canonicalize_and_process::escape_pattern_string;
34
use crate::matcher::InnerMatcher;
45
use crate::matcher::Matcher;
56
use crate::parser::Options;
@@ -262,19 +263,6 @@ fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String {
262263
result
263264
}
264265

265-
// Ref: https://wicg.github.io/urlpattern/#escape-a-pattern-string
266-
fn escape_pattern_string(input: &str) -> String {
267-
assert!(input.is_ascii());
268-
let mut result = String::new();
269-
for char in input.chars() {
270-
if matches!(char, '+' | '*' | '?' | ':' | '{' | '}' | '(' | ')' | '\\') {
271-
result.push('\\');
272-
}
273-
result.push(char);
274-
}
275-
result
276-
}
277-
278266
/// This function generates a matcher for a given parts list.
279267
fn generate_matcher<R: RegExp>(
280268
mut part_list: &[&Part],

src/constructor_parser.rs

+71-16
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,11 @@ impl<'a> ConstructorStringParser<'a> {
127127
}
128128

129129
// Ref: https://wicg.github.io/urlpattern/#change-state
130-
fn change_state(&mut self, state: ConstructorStringParserState, skip: usize) {
130+
fn change_state(
131+
&mut self,
132+
new_state: ConstructorStringParserState,
133+
skip: usize,
134+
) {
131135
match self.state {
132136
ConstructorStringParserState::Protocol => {
133137
self.result.protocol = Some(self.make_component_string())
@@ -153,10 +157,69 @@ impl<'a> ConstructorStringParser<'a> {
153157
ConstructorStringParserState::Hash => {
154158
self.result.hash = Some(self.make_component_string())
155159
}
156-
_ => {}
160+
ConstructorStringParserState::Init
161+
| ConstructorStringParserState::Authority
162+
| ConstructorStringParserState::Done => {}
157163
}
158164

159-
self.state = state;
165+
if self.state != ConstructorStringParserState::Init
166+
&& new_state != ConstructorStringParserState::Done
167+
{
168+
if matches!(
169+
self.state,
170+
ConstructorStringParserState::Protocol
171+
| ConstructorStringParserState::Authority
172+
| ConstructorStringParserState::Username
173+
| ConstructorStringParserState::Password
174+
) && matches!(
175+
new_state,
176+
ConstructorStringParserState::Port
177+
| ConstructorStringParserState::Pathname
178+
| ConstructorStringParserState::Search
179+
| ConstructorStringParserState::Hash
180+
) && self.result.hostname.is_none()
181+
{
182+
self.result.hostname = Some(String::new());
183+
}
184+
185+
if matches!(
186+
self.state,
187+
ConstructorStringParserState::Protocol
188+
| ConstructorStringParserState::Authority
189+
| ConstructorStringParserState::Username
190+
| ConstructorStringParserState::Password
191+
| ConstructorStringParserState::Hostname
192+
| ConstructorStringParserState::Port
193+
) && matches!(
194+
new_state,
195+
ConstructorStringParserState::Search
196+
| ConstructorStringParserState::Hash
197+
) && self.result.pathname.is_none()
198+
{
199+
if self.protocol_matches_special_scheme {
200+
self.result.pathname = Some(String::from("/"));
201+
} else {
202+
self.result.pathname = Some(String::new());
203+
}
204+
}
205+
206+
if matches!(
207+
self.state,
208+
ConstructorStringParserState::Protocol
209+
| ConstructorStringParserState::Authority
210+
| ConstructorStringParserState::Username
211+
| ConstructorStringParserState::Password
212+
| ConstructorStringParserState::Hostname
213+
| ConstructorStringParserState::Port
214+
| ConstructorStringParserState::Pathname
215+
) && new_state == ConstructorStringParserState::Hash
216+
&& self.result.search.is_none()
217+
{
218+
self.result.search = Some(String::new());
219+
}
220+
}
221+
222+
self.state = new_state;
160223
self.token_index += skip;
161224
self.component_start = self.token_index;
162225
self.token_increment = 0;
@@ -273,11 +336,8 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
273336
parser.change_state(ConstructorStringParserState::Hash, 1);
274337
} else if parser.is_search_prefix() {
275338
parser.change_state(ConstructorStringParserState::Search, 1);
276-
parser.result.hash = Some(String::new());
277339
} else {
278340
parser.change_state(ConstructorStringParserState::Pathname, 0);
279-
parser.result.search = Some(String::new());
280-
parser.result.hash = Some(String::new());
281341
}
282342
parser.token_index += parser.token_increment;
283343
continue;
@@ -306,22 +366,12 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
306366
match parser.state {
307367
ConstructorStringParserState::Init => {
308368
if parser.is_protocol_suffix() {
309-
parser.result.username = Some(String::new());
310-
parser.result.password = Some(String::new());
311-
parser.result.hostname = Some(String::new());
312-
parser.result.port = Some(String::new());
313-
parser.result.pathname = Some(String::new());
314-
parser.result.search = Some(String::new());
315-
parser.result.hash = Some(String::new());
316369
parser.rewind_and_set_state(ConstructorStringParserState::Protocol);
317370
}
318371
}
319372
ConstructorStringParserState::Protocol => {
320373
if parser.is_protocol_suffix() {
321374
parser.compute_protocol_matches_special_scheme::<R>()?;
322-
if parser.protocol_matches_special_scheme {
323-
parser.result.pathname = Some(String::from("/"));
324-
}
325375
let mut next_state = ConstructorStringParserState::Pathname;
326376
let mut skip = 1;
327377
if parser.next_is_authority_slashes() {
@@ -398,5 +448,10 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
398448
}
399449
parser.token_index += parser.token_increment;
400450
}
451+
452+
if parser.result.hostname.is_some() && parser.result.port.is_none() {
453+
parser.result.port = Some(String::new());
454+
}
455+
401456
Ok(parser.result)
402457
}

src/lib.rs

+129-21
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ mod tokenizer;
1818
pub use error::Error;
1919
use url::Url;
2020

21-
use crate::canonicalize_and_process::is_special_scheme;
2221
use crate::canonicalize_and_process::special_scheme_default_port;
22+
use crate::canonicalize_and_process::ProcessType;
23+
use crate::canonicalize_and_process::{is_special_scheme, process_base_url};
2324
use crate::component::Component;
2425
use crate::regexp::RegExp;
2526

@@ -55,7 +56,7 @@ impl UrlPatternInit {
5556
#[allow(clippy::too_many_arguments)]
5657
fn process(
5758
&self,
58-
kind: canonicalize_and_process::ProcessType,
59+
kind: ProcessType,
5960
protocol: Option<String>,
6061
username: Option<String>,
6162
password: Option<String>,
@@ -78,18 +79,84 @@ impl UrlPatternInit {
7879
};
7980

8081
let base_url = if let Some(parsed_base_url) = &self.base_url {
81-
// TODO: check if these are correct
82-
result.protocol = Some(parsed_base_url.scheme().to_string());
83-
result.username = Some(parsed_base_url.username().to_string());
84-
result.password =
85-
Some(parsed_base_url.password().unwrap_or_default().to_string());
86-
result.hostname =
87-
Some(parsed_base_url.host_str().unwrap_or_default().to_string());
88-
result.port = Some(url::quirks::port(parsed_base_url).to_string());
89-
result.pathname =
90-
Some(url::quirks::pathname(parsed_base_url).to_string());
91-
result.search = Some(parsed_base_url.query().unwrap_or("").to_string());
92-
result.hash = Some(parsed_base_url.fragment().unwrap_or("").to_string());
82+
if self.protocol.is_none() {
83+
result.protocol =
84+
Some(process_base_url(parsed_base_url.scheme(), &kind));
85+
}
86+
87+
if kind != ProcessType::Pattern
88+
&& (self.protocol.is_none()
89+
&& self.hostname.is_none()
90+
&& self.port.is_none()
91+
&& self.username.is_none())
92+
{
93+
result.username =
94+
Some(process_base_url(parsed_base_url.username(), &kind));
95+
}
96+
97+
if kind != ProcessType::Pattern
98+
&& (self.protocol.is_none()
99+
&& self.hostname.is_none()
100+
&& self.port.is_none()
101+
&& self.username.is_none()
102+
&& self.password.is_none())
103+
{
104+
result.password = Some(process_base_url(
105+
parsed_base_url.password().unwrap_or_default(),
106+
&kind,
107+
));
108+
}
109+
110+
if self.protocol.is_none() && self.hostname.is_none() {
111+
result.hostname = Some(process_base_url(
112+
parsed_base_url.host_str().unwrap_or_default(),
113+
&kind,
114+
));
115+
}
116+
117+
if self.protocol.is_none()
118+
&& self.hostname.is_none()
119+
&& self.port.is_none()
120+
{
121+
result.port =
122+
Some(process_base_url(url::quirks::port(parsed_base_url), &kind));
123+
}
124+
125+
if self.protocol.is_none()
126+
&& self.hostname.is_none()
127+
&& self.port.is_none()
128+
&& self.pathname.is_none()
129+
{
130+
result.pathname = Some(process_base_url(
131+
url::quirks::pathname(parsed_base_url),
132+
&kind,
133+
));
134+
}
135+
136+
if self.protocol.is_none()
137+
&& self.hostname.is_none()
138+
&& self.port.is_none()
139+
&& self.pathname.is_none()
140+
&& self.search.is_none()
141+
{
142+
result.search = Some(process_base_url(
143+
parsed_base_url.query().unwrap_or_default(),
144+
&kind,
145+
));
146+
}
147+
148+
if self.protocol.is_none()
149+
&& self.hostname.is_none()
150+
&& self.port.is_none()
151+
&& self.pathname.is_none()
152+
&& self.search.is_none()
153+
&& self.hash.is_none()
154+
{
155+
result.hash = Some(process_base_url(
156+
parsed_base_url.fragment().unwrap_or_default(),
157+
&kind,
158+
));
159+
}
93160

94161
Some(parsed_base_url)
95162
} else {
@@ -235,7 +302,7 @@ impl<R: RegExp> UrlPattern<R> {
235302
report_regex_errors: bool,
236303
) -> Result<Self, Error> {
237304
let mut processed_init = init.process(
238-
canonicalize_and_process::ProcessType::Pattern,
305+
ProcessType::Pattern,
239306
None,
240307
None,
241308
None,
@@ -413,7 +480,7 @@ impl<R: RegExp> UrlPattern<R> {
413480
&self,
414481
input: UrlPatternMatchInput,
415482
) -> Result<Option<UrlPatternResult>, Error> {
416-
let input = match crate::quirks::parse_match_input(input) {
483+
let input = match quirks::parse_match_input(input) {
417484
Some(input) => input,
418485
None => return Ok(None),
419486
};
@@ -591,9 +658,9 @@ mod tests {
591658

592659
fn test_case(case: TestCase) {
593660
let input = case.pattern.first().cloned();
594-
let mut base_url = case.pattern.get(1).map(|input| match input {
595-
StringOrInit::String(str) => str.clone(),
596-
StringOrInit::Init(_) => unreachable!(),
661+
let mut base_url = case.pattern.get(1).and_then(|input| match input {
662+
StringOrInit::String(str) => Some(str.clone()),
663+
StringOrInit::Init(_) => None,
597664
});
598665

599666
println!("\n=====");
@@ -664,7 +731,48 @@ mod tests {
664731
}) = &input
665732
{
666733
expected = Some($field.to_owned())
667-
} else if let Some(base_url) = &base_url {
734+
} else if {
735+
if let StringOrInit::Init(init) = &input {
736+
match stringify!($field) {
737+
"protocol" => false,
738+
"hostname" => init.protocol.is_some(),
739+
"port" => init.protocol.is_some() || init.hostname.is_some(),
740+
"username" => false,
741+
"password" => false,
742+
"pathname" => {
743+
init.protocol.is_some()
744+
|| init.hostname.is_some()
745+
|| init.port.is_some()
746+
}
747+
"search" => {
748+
init.protocol.is_some()
749+
|| init.hostname.is_some()
750+
|| init.port.is_some()
751+
|| init.pathname.is_some()
752+
}
753+
"hash" => {
754+
init.protocol.is_some()
755+
|| init.hostname.is_some()
756+
|| init.port.is_some()
757+
|| init.pathname.is_some()
758+
|| init.search.is_some()
759+
}
760+
_ => unreachable!(),
761+
}
762+
} else {
763+
false
764+
}
765+
} {
766+
expected = Some("*".to_owned())
767+
} else if let Some(base_url) =
768+
base_url.as_ref().and_then(|base_url| {
769+
if !matches!(stringify!($field), "username" | "password") {
770+
Some(base_url)
771+
} else {
772+
None
773+
}
774+
})
775+
{
668776
let base_url = Url::parse(base_url).unwrap();
669777
let field = url::quirks::$field(&base_url);
670778
let field: String = match stringify!($field) {
@@ -684,8 +792,8 @@ mod tests {
684792
let pattern = &pattern.$field.pattern_string;
685793

686794
assert_eq!(
687-
pattern,
688795
&expected,
796+
pattern,
689797
"pattern for {} does not match",
690798
stringify!($field)
691799
);

0 commit comments

Comments
 (0)