From fa7a2a51c40ed1ae2539271e0e1f3e2d863f6fa6 Mon Sep 17 00:00:00 2001 From: Tom Schuster Date: Fri, 14 Feb 2025 22:28:57 +0100 Subject: [PATCH] Switch identifier crate from unic-ucd-ident to unicode-id-start --- Cargo.toml | 2 +- src/lib.rs | 13 +++++++++++++ src/tokenizer.rs | 4 ++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8295022..9e1d87f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ license = "MIT" url = "2.4.1" regex = "1.10.5" serde = { version = "1.0.127", features = ["derive"] } -unic-ucd-ident = { version = "0.9.0", features = ["id"] } +unicode-id-start = "1.0" [dev-dependencies] serde_json = "1.0.66" diff --git a/src/lib.rs b/src/lib.rs index 7a73226..18f76c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1047,4 +1047,17 @@ mod tests { .unwrap(); assert!(pattern.has_regexp_groups()); } + + #[test] + fn unicode_middle_dot() { + let pattern = ::parse( + UrlPatternInit { + pathname: Some("/:a\u{30FB}b.".to_owned()), + ..Default::default() + }, + Default::default(), + ) + .unwrap(); + assert_eq!(pattern.pathname.group_name_list, vec!["a\u{30FB}b"]); + } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index dd25cfb..c15c22e 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -319,9 +319,9 @@ pub fn tokenize( #[inline] pub(crate) fn is_valid_name_codepoint(code_point: char, first: bool) -> bool { if first { - unic_ucd_ident::is_id_start(code_point) || matches!(code_point, '$' | '_') + unicode_id_start::is_id_start(code_point) || matches!(code_point, '$' | '_') } else { - unic_ucd_ident::is_id_continue(code_point) + unicode_id_start::is_id_continue(code_point) || matches!(code_point, '$' | '\u{200C}' | '\u{200D}') } }