From 23f0d0158f1793e413d66d7402d5f30b8ab22cee Mon Sep 17 00:00:00 2001 From: Andrzej Sulkowski Date: Thu, 6 Mar 2025 09:47:57 +0100 Subject: [PATCH] feat(client): Add system proxy support for macOS #3850 --- Cargo.toml | 29 ++- src/client/proxy/builder.rs | 315 ++++++++++++++++++++++++++++ src/client/proxy/matcher.rs | 390 +++++------------------------------ src/client/proxy/mod.rs | 3 + src/client/proxy/no_proxy.rs | 208 +++++++++++++++++++ src/client/proxy/utils.rs | 86 ++++++++ 6 files changed, 691 insertions(+), 340 deletions(-) create mode 100644 src/client/proxy/builder.rs create mode 100644 src/client/proxy/no_proxy.rs create mode 100644 src/client/proxy/utils.rs diff --git a/Cargo.toml b/Cargo.toml index 4232af1..fa49040 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,11 @@ repository = "https://github.com/hyperium/hyper-util" license = "MIT" authors = ["Sean McArthur "] keywords = ["http", "hyper", "hyperium"] -categories = ["network-programming", "web-programming::http-client", "web-programming::http-server"] +categories = [ + "network-programming", + "web-programming::http-client", + "web-programming::http-server", +] edition = "2021" rust-version = "1.63" @@ -29,10 +33,19 @@ ipnet = { version = "2.9", optional = true } percent-encoding = { version = "2.3", optional = true } pin-project-lite = "0.2.4" socket2 = { version = "0.5", optional = true, features = ["all"] } -tracing = { version = "0.1", default-features = false, features = ["std"], optional = true } -tokio = { version = "1", optional = true, default-features = false } +tracing = { version = "0.1", default-features = false, features = [ + "std", +], optional = true } +tokio = { version = "1", optional = true, default-features = false } tower-service = { version = "0.3", optional = true } +# Conditional dependencies for system proxy support +[target.'cfg(target_os = "macos")'.dependencies] +system-configuration = { version = "0.6.1", optional = true } + +[target.'cfg(target_os = "windows")'.dependencies] +winreg = { version = "0.55.0", optional = true } + [dev-dependencies] hyper = { version = "1.4.0", features = ["full"] } bytes = "1" @@ -58,9 +71,15 @@ full = [ "http1", "http2", "tokio", + "system-proxies" ] -client = ["hyper/client", "dep:tracing", "dep:futures-channel", "dep:tower-service"] +client = [ + "hyper/client", + "dep:tracing", + "dep:futures-channel", + "dep:tower-service", +] client-legacy = ["client", "dep:socket2", "tokio/sync"] client-proxy-env = ["client", "dep:base64", "dep:ipnet", "dep:percent-encoding"] @@ -75,6 +94,8 @@ http2 = ["hyper/http2"] tokio = ["dep:tokio", "tokio/net", "tokio/rt", "tokio/time"] +system-proxies = ["system-configuration", "winreg"] + # internal features used in CI __internal_happy_eyeballs_tests = [] diff --git a/src/client/proxy/builder.rs b/src/client/proxy/builder.rs new file mode 100644 index 0000000..2a5f0d6 --- /dev/null +++ b/src/client/proxy/builder.rs @@ -0,0 +1,315 @@ +use super::no_proxy::NoProxy; +use super::utils::{get_first_env, parse_env_uri}; +use super::Matcher; + +#[derive(Default)] +pub struct Builder { + pub(crate) is_cgi: bool, + pub(crate) all: String, + pub(crate) http: String, + pub(crate) https: String, + pub(crate) no: String, +} + +// ===== impl Builder ===== +impl Builder { + pub(crate) fn from_env() -> Self { + Builder { + is_cgi: std::env::var_os("REQUEST_METHOD").is_some(), + all: get_first_env(&["ALL_PROXY", "all_proxy"]), + http: get_first_env(&["HTTP_PROXY", "http_proxy"]), + https: get_first_env(&["HTTPS_PROXY", "https_proxy"]), + no: get_first_env(&["NO_PROXY", "no_proxy"]), + } + } + + /// Set a proxy for all schemes (ALL_PROXY equivalent). + pub fn all_proxy(mut self, proxy: impl Into) -> Self { + self.all = proxy.into(); + self + } + + /// Set a proxy for HTTP schemes (HTTP_PROXY equivalent). + pub fn http_proxy(mut self, proxy: impl Into) -> Self { + self.http = proxy.into(); + self + } + + /// Set a proxy for HTTPS schemes (HTTPS_PROXY equivalent). + pub fn https_proxy(mut self, proxy: impl Into) -> Self { + self.https = proxy.into(); + self + } + + /// Set no-proxy rules (NO_PROXY equivalent). + pub fn no_proxy(mut self, no_proxy: impl Into) -> Self { + self.no = no_proxy.into(); + self + } + + pub(crate) fn build(self) -> Matcher { + if self.is_cgi { + return Matcher { + http: None, + https: None, + no: NoProxy::empty(), + }; + } + + let all = parse_env_uri(&self.all); + + Matcher { + http: parse_env_uri(&self.http).or_else(|| all.clone()), + https: parse_env_uri(&self.https).or(all), + no: NoProxy::from_string(&self.no), + } + } +} + +// ===== MacOS Builder System Proxies ===== +#[cfg(feature = "system-proxies")] +#[cfg(target_os = "macos")] +mod macos_proxies { + use super::*; + + use system_configuration::core_foundation::array::CFArray; + use system_configuration::core_foundation::base::{CFType, TCFType, TCFTypeRef}; + use system_configuration::core_foundation::dictionary::CFDictionary; + use system_configuration::core_foundation::number::CFNumber; + use system_configuration::core_foundation::string::{CFString, CFStringRef}; + use system_configuration::dynamic_store::{SCDynamicStore, SCDynamicStoreBuilder}; + + impl Builder { + // Helper function to check if a proxy is enabled + fn is_proxy_enabled(&self, prefix: &str, proxies: &CFDictionary) -> bool { + let key = format!("{}Enable", prefix); + proxies + .find(CFString::new(&key)) + .map(|val| { + // Try to get the value as i32 directly + unsafe { + let num_ref = val.as_concrete_TypeRef(); + if num_ref.is_null() { + return false; + } + let num = CFNumber::wrap_under_get_rule(num_ref as *const _); + num.to_i32() == Some(1) + } + }) + .unwrap_or(false) + } + // Helper function to get a string value + fn get_string( + &self, + key: &str, + proxies: &CFDictionary, + ) -> Option { + proxies + .find(CFString::new(key)) + .map(|val| unsafe { + let str_ref = val.as_concrete_TypeRef(); + if str_ref.is_null() { + return None; + } + let cfstr = CFString::wrap_under_get_rule(str_ref as *const _); + Some(cfstr.to_string()) + }) + .flatten() + } + // Helper function to get an integer value + fn get_int(&self, key: &str, proxies: &CFDictionary) -> Option { + proxies + .find(CFString::new(key)) + .map(|val| unsafe { + let num_ref = val.as_concrete_TypeRef(); + if num_ref.is_null() { + return None; + } + let num = CFNumber::wrap_under_get_rule(num_ref as *const _); + num.to_i32() + }) + .flatten() + } + + pub fn from_system_proxy(mut self) -> Self { + let store = SCDynamicStoreBuilder::new("proxy-fetcher").build(); + + if let Some(proxies) = store.get_proxies() { + let (http, https, no) = self.extract_system_proxy(proxies); + + if let Some(http_proxy) = http { + self.http = http_proxy; + } + if let Some(https_proxy) = https { + self.https = https_proxy; + } + if let Some(no_proxy) = no { + self.no = no_proxy; + } + } + + self + } + pub(crate) fn extract_system_proxy( + &self, + proxies: CFDictionary, + ) -> (Option, Option, Option) { + let mut http: Option = None; + let mut https: Option = None; + let mut no: Option = None; + + // Process HTTP proxy + if self.is_proxy_enabled("HTTP", &proxies) { + if let Some(host) = self.get_string("HTTPProxy", &proxies) { + let port = self.get_int("HTTPPort", &proxies); + http = match port { + Some(p) => Some(format!("http://{}:{}", host, p)), + None => Some(format!("http://{}", host)), + }; + } + } + + // Process HTTPS proxy + if self.is_proxy_enabled("HTTPS", &proxies) { + if let Some(host) = self.get_string("HTTPSProxy", &proxies) { + let port = self.get_int("HTTPSPort", &proxies); + https = match port { + Some(p) => Some(format!("https://{}:{}", host, p)), + None => Some(format!("https://{}", host)), + }; + } + } + + // Process exceptions (NO_PROXY) + if let Some(exceptions_ref) = proxies.find(CFString::new("ExceptionsList")) { + if let Some(arr) = exceptions_ref.downcast::() { + let exceptions: Vec = arr + .iter() + .filter_map(|item| unsafe { + // Get the raw pointer value + let ptr = item.as_void_ptr(); + if ptr.is_null() { + return None; + } + // Try to convert it to a CFString + let cfstr = CFString::wrap_under_get_rule(ptr as *const _); + Some(cfstr.to_string()) + }) + .collect(); + no = Some(exceptions.join(",")); + } + } + + (http, https, no) + } + } + + #[cfg(test)] + mod tests { + use super::*; + use crate::client::proxy::Matcher; + use system_configuration::core_foundation::array::CFArray; + use std::{net::IpAddr, str::FromStr}; + + struct MockSCDynamicStore { + pairs: Vec<(CFString, CFType)>, + } + + impl MockSCDynamicStore { + fn new() -> Self { + let mut keys = Vec::new(); + let mut values = Vec::new(); + + // HTTP proxy enabled + keys.push(CFString::new("HTTPEnable")); + values.push(CFNumber::from(1).as_CFType()); + + // HTTP proxy host and port + keys.push(CFString::new("HTTPProxy")); + values.push(CFString::new("test-proxy.example.com").as_CFType()); + keys.push(CFString::new("HTTPPort")); + values.push(CFNumber::from(8080).as_CFType()); + + // HTTPS proxy enabled + keys.push(CFString::new("HTTPSEnable")); + values.push(CFNumber::from(1).as_CFType()); + // HTTPS proxy host and port + keys.push(CFString::new("HTTPSProxy")); + values.push(CFString::new("secure-proxy.example.com").as_CFType()); + keys.push(CFString::new("HTTPSPort")); + values.push(CFNumber::from(8443).as_CFType()); + + // Exception list + keys.push(CFString::new("ExceptionsList")); + let exceptions = vec![ + CFString::new("localhost").as_CFType(), + CFString::new("127.0.0.1").as_CFType(), + CFString::new("*.local").as_CFType(), + ]; + values.push(CFArray::from_CFTypes(&exceptions).as_CFType()); + + let pairs = keys + .iter() + .map(|k| k.clone()) + .zip(values.iter().map(|v| v.as_CFType())) + .collect::>(); + + MockSCDynamicStore { pairs } + } + + fn get_proxies(&self) -> Option> { + let proxies = CFDictionary::from_CFType_pairs(&self.pairs.clone()); + Some(proxies) + } + } + + #[test] + fn test_mac_os_proxy_mocked() { + let mock_store = MockSCDynamicStore::new(); + let proxies = mock_store.get_proxies().unwrap(); + let (http, https, ns) = Matcher::builder().extract_system_proxy(proxies); + + assert!(http.is_some()); + assert!(https.is_some()); + assert!(ns.is_some()); + } + + #[ignore] + #[test] + fn test_mac_os_proxy() { + let matcher = Matcher::builder().from_system_proxy().build(); + assert!(matcher + .http + .unwrap() + .uri + .eq("http://proxy.example.com:8080")); + assert!(matcher + .https + .unwrap() + .uri + .eq("https://proxy.example.com:8080")); + + assert!(matcher.no.domains.contains("ebay.com")); + assert!(matcher.no.domains.contains("amazon.com")); + + let ip = IpAddr::from_str("54.239.28.85").unwrap(); + assert!(matcher.no.ips.contains(ip)); + } + } +} + +// ===== Windows Builder System Proxies ===== +#[cfg(feature = "system-proxies")] +#[cfg(target_os = "win")] +mod win_proxies { + impl Builder { + pub fn from_system_proxy(mut self) -> Self { + todo!("Load Win system proxy settings"); + } + } + + #[cfg(test)] + mod tests { + use super::*; + } +} diff --git a/src/client/proxy/matcher.rs b/src/client/proxy/matcher.rs index c7e96e3..808698d 100644 --- a/src/client/proxy/matcher.rs +++ b/src/client/proxy/matcher.rs @@ -1,46 +1,25 @@ use std::fmt; -use std::net::IpAddr; use http::header::HeaderValue; -use ipnet::IpNet; -use percent_encoding::percent_decode_str; +use super::builder::Builder; +use super::no_proxy::NoProxy; + /// A proxy matcher built using standard environment variables. +#[derive(Debug)] pub struct Matcher { - http: Option, - https: Option, - no: NoProxy, + pub (crate) http: Option, + pub (crate) https: Option, + pub (crate) no: NoProxy, } #[derive(Clone)] pub struct Intercept { - uri: http::Uri, - basic_auth: Option, - raw_auth: Option<(String, String)>, -} - -#[derive(Default)] -struct Builder { - is_cgi: bool, - all: String, - http: String, - https: String, - no: String, + pub (crate) uri: http::Uri, + pub (crate) basic_auth: Option, + pub (crate) raw_auth: Option<(String, String)>, } -struct NoProxy { - ips: IpMatcher, - domains: DomainMatcher, -} - -struct DomainMatcher(Vec); - -struct IpMatcher(Vec); - -enum Ip { - Address(IpAddr), - Network(IpNet), -} // ===== impl Matcher ===== @@ -50,11 +29,10 @@ impl Matcher { Builder::from_env().build() } - /* + /// Create a builder to configure a Matcher programmatically. pub fn builder() -> Builder { - Builder::from_env().build() + Builder::default() } - */ /// Check if the destination should be intercepted by a proxy. /// @@ -98,244 +76,52 @@ impl fmt::Debug for Intercept { } } -// ===== impl Builder ===== - -impl Builder { - fn from_env() -> Self { - Builder { - is_cgi: std::env::var_os("REQUEST_METHOD").is_some(), - all: get_first_env(&["ALL_PROXY", "all_proxy"]), - http: get_first_env(&["HTTP_PROXY", "http_proxy"]), - https: get_first_env(&["HTTPS_PROXY", "https_proxy"]), - no: get_first_env(&["NO_PROXY", "no_proxy"]), - } - } - - fn build(self) -> Matcher { - if self.is_cgi { - return Matcher { - http: None, - https: None, - no: NoProxy::empty(), - }; - } - - let all = parse_env_uri(&self.all); - - Matcher { - http: parse_env_uri(&self.http).or_else(|| all.clone()), - https: parse_env_uri(&self.https).or(all), - no: NoProxy::from_string(&self.no), - } - } -} - -fn get_first_env(names: &[&str]) -> String { - for name in names { - if let Ok(val) = std::env::var(name) { - return val; - } - } - - String::new() -} - -fn parse_env_uri(val: &str) -> Option { - let uri = val.parse::().ok()?; - let mut builder = http::Uri::builder(); - let mut is_httpish = false; - let mut basic_auth = None; - let mut raw_auth = None; - - builder = builder.scheme(match uri.scheme() { - Some(s) => { - if s == &http::uri::Scheme::HTTP || s == &http::uri::Scheme::HTTPS { - is_httpish = true; - s.clone() - } else if s.as_str() == "socks5" || s.as_str() == "socks5h" { - s.clone() - } else { - // can't use this proxy scheme - return None; - } - } - // if no scheme provided, assume they meant 'http' - None => { - is_httpish = true; - http::uri::Scheme::HTTP - }, - }); - - let authority = uri.authority()?; - - if let Some((userinfo, host_port)) = authority.as_str().split_once('@') { - let (user, pass) = userinfo.split_once(':')?; - let user = percent_decode_str(user).decode_utf8_lossy(); - let pass = percent_decode_str(pass).decode_utf8_lossy(); - if is_httpish { - basic_auth = Some(encode_basic_auth(&user, Some(&pass))); - } else { - raw_auth = Some((user.into(), pass.into())); - } - builder = builder.authority(host_port); - } else { - builder = builder.authority(authority.clone()); - } - - // removing any path, but we MUST specify one or the builder errors - builder = builder.path_and_query("/"); - - let dst = builder.build().ok()?; - - Some(Intercept { - uri: dst, - basic_auth, - raw_auth, - }) -} - -fn encode_basic_auth(user: &str, pass: Option<&str>) -> HeaderValue { - use base64::prelude::BASE64_STANDARD; - use base64::write::EncoderWriter; - use std::io::Write; - - let mut buf = b"Basic ".to_vec(); - { - let mut encoder = EncoderWriter::new(&mut buf, &BASE64_STANDARD); - let _ = write!(encoder, "{user}:"); - if let Some(password) = pass { - let _ = write!(encoder, "{password}"); - } - } - let mut header = HeaderValue::from_bytes(&buf).expect("base64 is always valid HeaderValue"); - header.set_sensitive(true); - header -} - -impl NoProxy { - /* - fn from_env() -> NoProxy { - let raw = std::env::var("NO_PROXY") - .or_else(|_| std::env::var("no_proxy")) - .unwrap_or_default(); +#[cfg(test)] +mod tests { + use crate::client::proxy::no_proxy::DomainMatcher; + use super::*; - Self::from_string(&raw) - } - */ + #[test] + fn test_manual_configuration() { + let matcher = Matcher::builder() + .http_proxy("http://proxy.example.com:8080") + .no_proxy("localhost, 127.0.0.1") + .build(); + + // HTTP URL should use the proxy + let intercept = matcher.intercept(&"http://example.com".parse().unwrap()); + assert!(intercept.is_some()); + assert_eq!( + intercept.unwrap().uri().to_string(), + "http://proxy.example.com:8080/" + ); - fn empty() -> NoProxy { - NoProxy { - ips: IpMatcher(Vec::new()), - domains: DomainMatcher(Vec::new()), - } - } + // No-proxy hosts should bypass the proxy + let intercept = matcher.intercept(&"http://localhost".parse().unwrap()); + assert!(intercept.is_none()); - /// Returns a new no-proxy configuration based on a `no_proxy` string (or `None` if no variables - /// are set) - /// The rules are as follows: - /// * The environment variable `NO_PROXY` is checked, if it is not set, `no_proxy` is checked - /// * If neither environment variable is set, `None` is returned - /// * Entries are expected to be comma-separated (whitespace between entries is ignored) - /// * IP addresses (both IPv4 and IPv6) are allowed, as are optional subnet masks (by adding /size, - /// for example "`192.168.1.0/24`"). - /// * An entry "`*`" matches all hostnames (this is the only wildcard allowed) - /// * Any other entry is considered a domain name (and may contain a leading dot, for example `google.com` - /// and `.google.com` are equivalent) and would match both that domain AND all subdomains. - /// - /// For example, if `"NO_PROXY=google.com, 192.168.1.0/24"` was set, all of the following would match - /// (and therefore would bypass the proxy): - /// * `http://google.com/` - /// * `http://www.google.com/` - /// * `http://192.168.1.42/` - /// - /// The URL `http://notgoogle.com/` would not match. - fn from_string(no_proxy_list: &str) -> Self { - let mut ips = Vec::new(); - let mut domains = Vec::new(); - let parts = no_proxy_list.split(',').map(str::trim); - for part in parts { - match part.parse::() { - // If we can parse an IP net or address, then use it, otherwise, assume it is a domain - Ok(ip) => ips.push(Ip::Network(ip)), - Err(_) => match part.parse::() { - Ok(addr) => ips.push(Ip::Address(addr)), - Err(_) => domains.push(part.to_owned()), - }, - } - } - NoProxy { - ips: IpMatcher(ips), - domains: DomainMatcher(domains), - } + let intercept = matcher.intercept(&"http://127.0.0.1".parse().unwrap()); + assert!(intercept.is_none()); } - fn contains(&self, host: &str) -> bool { - // According to RFC3986, raw IPv6 hosts will be wrapped in []. So we need to strip those off - // the end in order to parse correctly - let host = if host.starts_with('[') { - let x: &[_] = &['[', ']']; - host.trim_matches(x) - } else { - host - }; - match host.parse::() { - // If we can parse an IP addr, then use it, otherwise, assume it is a domain - Ok(ip) => self.ips.contains(ip), - Err(_) => self.domains.contains(host), - } - } -} + #[test] + fn test_all_proxy_manual() { + let matcher = Matcher::builder() + .all_proxy("http://all.proxy.com:9999") + .build(); -impl IpMatcher { - fn contains(&self, addr: IpAddr) -> bool { - for ip in &self.0 { - match ip { - Ip::Address(address) => { - if &addr == address { - return true; - } - } - Ip::Network(net) => { - if net.contains(&addr) { - return true; - } - } - } - } - false - } -} + let intercept = matcher.intercept(&"http://example.com".parse().unwrap()); + assert_eq!( + intercept.unwrap().uri().to_string(), + "http://all.proxy.com:9999/" + ); -impl DomainMatcher { - // The following links may be useful to understand the origin of these rules: - // * https://curl.se/libcurl/c/CURLOPT_NOPROXY.html - // * https://github.com/curl/curl/issues/1208 - fn contains(&self, domain: &str) -> bool { - let domain_len = domain.len(); - for d in &self.0 { - if d == domain || d.strip_prefix('.') == Some(domain) { - return true; - } else if domain.ends_with(d) { - if d.starts_with('.') { - // If the first character of d is a dot, that means the first character of domain - // must also be a dot, so we are looking at a subdomain of d and that matches - return true; - } else if domain.as_bytes().get(domain_len - d.len() - 1) == Some(&b'.') { - // Given that d is a prefix of domain, if the prior character in domain is a dot - // then that means we must be matching a subdomain of d, and that matches - return true; - } - } else if d == "*" { - return true; - } - } - false + let intercept = matcher.intercept(&"https://example.com".parse().unwrap()); + assert_eq!( + intercept.unwrap().uri().to_string(), + "http://all.proxy.com:9999/" + ); } -} - -#[cfg(test)] -mod tests { - use super::*; #[test] fn test_domain_matcher() { @@ -357,62 +143,6 @@ mod tests { assert!(!matcher.contains("notbar.foo")); } - #[test] - fn test_no_proxy_wildcard() { - let no_proxy = NoProxy::from_string("*"); - assert!(no_proxy.contains("any.where")); - } - - #[test] - fn test_no_proxy_ip_ranges() { - let no_proxy = - NoProxy::from_string(".foo.bar, bar.baz,10.42.1.1/24,::1,10.124.7.8,2001::/17"); - - let should_not_match = [ - // random url, not in no_proxy - "hyper.rs", - // make sure that random non-subdomain string prefixes don't match - "notfoo.bar", - // make sure that random non-subdomain string prefixes don't match - "notbar.baz", - // ipv4 address out of range - "10.43.1.1", - // ipv4 address out of range - "10.124.7.7", - // ipv6 address out of range - "[ffff:db8:a0b:12f0::1]", - // ipv6 address out of range - "[2005:db8:a0b:12f0::1]", - ]; - - for host in &should_not_match { - assert!(!no_proxy.contains(host), "should not contain {:?}", host); - } - - let should_match = [ - // make sure subdomains (with leading .) match - "hello.foo.bar", - // make sure exact matches (without leading .) match (also makes sure spaces between entries work) - "bar.baz", - // make sure subdomains (without leading . in no_proxy) match - "foo.bar.baz", - // make sure subdomains (without leading . in no_proxy) match - this differs from cURL - "foo.bar", - // ipv4 address match within range - "10.42.1.100", - // ipv6 address exact match - "[::1]", - // ipv6 address match within range - "[2001:db8:a0b:12f0::1]", - // ipv4 address exact match - "10.124.7.8", - ]; - - for host in &should_match { - assert!(no_proxy.contains(host), "should contain {:?}", host); - } - } - macro_rules! p { ($($n:ident = $v:expr,)*) => ({Builder { $($n: $v.into(),)* @@ -430,15 +160,9 @@ mod tests { all = "http://om.nom", }; - assert_eq!( - "http://om.nom", - intercept(&p, "http://example.com").uri() - ); + assert_eq!("http://om.nom", intercept(&p, "http://example.com").uri()); - assert_eq!( - "http://om.nom", - intercept(&p, "https://example.com").uri() - ); + assert_eq!("http://om.nom", intercept(&p, "https://example.com").uri()); } #[test] @@ -448,15 +172,9 @@ mod tests { http = "http://y.ep", }; - assert_eq!( - "http://no.pe", - intercept(&p, "https://example.com").uri() - ); + assert_eq!("http://no.pe", intercept(&p, "https://example.com").uri()); // the http rule is "more specific" than the all rule - assert_eq!( - "http://y.ep", - intercept(&p, "http://example.com").uri() - ); + assert_eq!("http://y.ep", intercept(&p, "http://example.com").uri()); } } diff --git a/src/client/proxy/mod.rs b/src/client/proxy/mod.rs index 492b788..c0f6c59 100644 --- a/src/client/proxy/mod.rs +++ b/src/client/proxy/mod.rs @@ -1,5 +1,8 @@ //! Proxy utilities mod matcher; +mod builder; +mod no_proxy; +mod utils; pub use self::matcher::Matcher; diff --git a/src/client/proxy/no_proxy.rs b/src/client/proxy/no_proxy.rs new file mode 100644 index 0000000..7e40452 --- /dev/null +++ b/src/client/proxy/no_proxy.rs @@ -0,0 +1,208 @@ +use std::net::IpAddr; +use ipnet::IpNet; + +#[derive(Debug)] +pub struct DomainMatcher(pub (crate) Vec); + +#[derive(Debug)] +pub struct IpMatcher(pub (crate) Vec); + +#[derive(Debug)] +pub enum Ip { + Address(IpAddr), + Network(IpNet), +} + +#[derive(Debug)] +pub struct NoProxy { + pub (crate) ips: IpMatcher, + pub (crate) domains: DomainMatcher, +} + +// ===== impl NoProxy ===== + +impl NoProxy { + /* + fn from_env() -> NoProxy { + let raw = std::env::var("NO_PROXY") + .or_else(|_| std::env::var("no_proxy")) + .unwrap_or_default(); + + Self::from_string(&raw) + } + */ + + pub fn empty() -> NoProxy { + NoProxy { + ips: IpMatcher(Vec::new()), + domains: DomainMatcher(Vec::new()), + } + } + + /// Returns a new no-proxy configuration based on a `no_proxy` string (or `None` if no variables + /// are set) + /// The rules are as follows: + /// * The environment variable `NO_PROXY` is checked, if it is not set, `no_proxy` is checked + /// * If neither environment variable is set, `None` is returned + /// * Entries are expected to be comma-separated (whitespace between entries is ignored) + /// * IP addresses (both IPv4 and IPv6) are allowed, as are optional subnet masks (by adding /size, + /// for example "`192.168.1.0/24`"). + /// * An entry "`*`" matches all hostnames (this is the only wildcard allowed) + /// * Any other entry is considered a domain name (and may contain a leading dot, for example `google.com` + /// and `.google.com` are equivalent) and would match both that domain AND all subdomains. + /// + /// For example, if `"NO_PROXY=google.com, 192.168.1.0/24"` was set, all of the following would match + /// (and therefore would bypass the proxy): + /// * `http://google.com/` + /// * `http://www.google.com/` + /// * `http://192.168.1.42/` + /// + /// The URL `http://notgoogle.com/` would not match. + pub fn from_string(no_proxy_list: &str) -> Self { + let mut ips = Vec::new(); + let mut domains = Vec::new(); + let parts = no_proxy_list.split(',').map(str::trim); + for part in parts { + match part.parse::() { + // If we can parse an IP net or address, then use it, otherwise, assume it is a domain + Ok(ip) => ips.push(Ip::Network(ip)), + Err(_) => match part.parse::() { + Ok(addr) => ips.push(Ip::Address(addr)), + Err(_) => domains.push(part.to_owned()), + }, + } + } + NoProxy { + ips: IpMatcher(ips), + domains: DomainMatcher(domains), + } + } + + pub fn contains(&self, host: &str) -> bool { + // According to RFC3986, raw IPv6 hosts will be wrapped in []. So we need to strip those off + // the end in order to parse correctly + let host = if host.starts_with('[') { + let x: &[_] = &['[', ']']; + host.trim_matches(x) + } else { + host + }; + match host.parse::() { + // If we can parse an IP addr, then use it, otherwise, assume it is a domain + Ok(ip) => self.ips.contains(ip), + Err(_) => self.domains.contains(host), + } + } +} + +// ===== impl IpMatcher ===== + +impl IpMatcher { + pub fn contains(&self, addr: IpAddr) -> bool { + for ip in &self.0 { + match ip { + Ip::Address(address) => { + if &addr == address { + return true; + } + } + Ip::Network(net) => { + if net.contains(&addr) { + return true; + } + } + } + } + false + } +} + +// ===== impl DomainMatcher ===== + +impl DomainMatcher { + // The following links may be useful to understand the origin of these rules: + // * https://curl.se/libcurl/c/CURLOPT_NOPROXY.html + // * https://github.com/curl/curl/issues/1208 + pub fn contains(&self, domain: &str) -> bool { + let domain_len = domain.len(); + for d in &self.0 { + if d == domain || d.strip_prefix('.') == Some(domain) { + return true; + } else if domain.ends_with(d) { + if d.starts_with('.') { + // If the first character of d is a dot, that means the first character of domain + // must also be a dot, so we are looking at a subdomain of d and that matches + return true; + } else if domain.as_bytes().get(domain_len - d.len() - 1) == Some(&b'.') { + // Given that d is a prefix of domain, if the prior character in domain is a dot + // then that means we must be matching a subdomain of d, and that matches + return true; + } + } else if d == "*" { + return true; + } + } + false + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_proxy_wildcard() { + let no_proxy = NoProxy::from_string("*"); + assert!(no_proxy.contains("any.where")); + } + + #[test] + fn test_no_proxy_ip_ranges() { + let no_proxy = + NoProxy::from_string(".foo.bar, bar.baz,10.42.1.1/24,::1,10.124.7.8,2001::/17"); + + let should_not_match = [ + // random url, not in no_proxy + "hyper.rs", + // make sure that random non-subdomain string prefixes don't match + "notfoo.bar", + // make sure that random non-subdomain string prefixes don't match + "notbar.baz", + // ipv4 address out of range + "10.43.1.1", + // ipv4 address out of range + "10.124.7.7", + // ipv6 address out of range + "[ffff:db8:a0b:12f0::1]", + // ipv6 address out of range + "[2005:db8:a0b:12f0::1]", + ]; + + for host in &should_not_match { + assert!(!no_proxy.contains(host), "should not contain {:?}", host); + } + + let should_match = [ + // make sure subdomains (with leading .) match + "hello.foo.bar", + // make sure exact matches (without leading .) match (also makes sure spaces between entries work) + "bar.baz", + // make sure subdomains (without leading . in no_proxy) match + "foo.bar.baz", + // make sure subdomains (without leading . in no_proxy) match - this differs from cURL + "foo.bar", + // ipv4 address match within range + "10.42.1.100", + // ipv6 address exact match + "[::1]", + // ipv6 address match within range + "[2001:db8:a0b:12f0::1]", + // ipv4 address exact match + "10.124.7.8", + ]; + + for host in &should_match { + assert!(no_proxy.contains(host), "should contain {:?}", host); + } + } +} diff --git a/src/client/proxy/utils.rs b/src/client/proxy/utils.rs new file mode 100644 index 0000000..cbb8f3c --- /dev/null +++ b/src/client/proxy/utils.rs @@ -0,0 +1,86 @@ +use http::HeaderValue; +use percent_encoding::percent_decode_str; +use super::matcher::Intercept; + + +pub fn get_first_env(names: &[&str]) -> String { + for name in names { + if let Ok(val) = std::env::var(name) { + return val; + } + } + + String::new() +} + +pub fn parse_env_uri(val: &str) -> Option { + let uri = val.parse::().ok()?; + let mut builder = http::Uri::builder(); + let mut is_httpish = false; + let mut basic_auth = None; + let mut raw_auth = None; + + builder = builder.scheme(match uri.scheme() { + Some(s) => { + if s == &http::uri::Scheme::HTTP || s == &http::uri::Scheme::HTTPS { + is_httpish = true; + s.clone() + } else if s.as_str() == "socks5" || s.as_str() == "socks5h" { + s.clone() + } else { + // can't use this proxy scheme + return None; + } + } + // if no scheme provided, assume they meant 'http' + None => { + is_httpish = true; + http::uri::Scheme::HTTP + } + }); + + let authority = uri.authority()?; + + if let Some((userinfo, host_port)) = authority.as_str().split_once('@') { + let (user, pass) = userinfo.split_once(':')?; + let user = percent_decode_str(user).decode_utf8_lossy(); + let pass = percent_decode_str(pass).decode_utf8_lossy(); + if is_httpish { + basic_auth = Some(encode_basic_auth(&user, Some(&pass))); + } else { + raw_auth = Some((user.into(), pass.into())); + } + builder = builder.authority(host_port); + } else { + builder = builder.authority(authority.clone()); + } + + // removing any path, but we MUST specify one or the builder errors + builder = builder.path_and_query("/"); + + let dst = builder.build().ok()?; + + Some(Intercept { + uri: dst, + basic_auth, + raw_auth, + }) +} + +pub fn encode_basic_auth(user: &str, pass: Option<&str>) -> HeaderValue { + use base64::prelude::BASE64_STANDARD; + use base64::write::EncoderWriter; + use std::io::Write; + + let mut buf = b"Basic ".to_vec(); + { + let mut encoder = EncoderWriter::new(&mut buf, &BASE64_STANDARD); + let _ = write!(encoder, "{user}:"); + if let Some(password) = pass { + let _ = write!(encoder, "{password}"); + } + } + let mut header = HeaderValue::from_bytes(&buf).expect("base64 is always valid HeaderValue"); + header.set_sensitive(true); + header +}