use std::{ borrow::{Borrow, Cow}, fmt::Display, hash::{Hash, Hasher}, str::FromStr, }; use ahash::{AHashMap, AHasher}; use percent_encoding::{percent_decode_str, AsciiSet, CONTROLS}; use serde_json::Value; use url::Url; use crate::CompileError; // -- #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub(crate) struct JsonPointer(pub(crate) String); impl JsonPointer { pub(crate) fn escape(token: &str) -> Cow<'_, str> { const SPECIAL: [char; 2] = ['~', '/']; if token.contains(SPECIAL) { token.replace('~', "~0").replace('/', "~1").into() } else { token.into() } } pub(crate) fn unescape(mut tok: &str) -> Result, ()> { let Some(mut tilde) = tok.find('~') else { return Ok(Cow::Borrowed(tok)); }; let mut s = String::with_capacity(tok.len()); loop { s.push_str(&tok[..tilde]); tok = &tok[tilde + 1..]; match tok.chars().next() { Some('1') => s.push('/'), Some('0') => s.push('~'), _ => return Err(()), } tok = &tok[1..]; let Some(i) = tok.find('~') else { s.push_str(tok); break; }; tilde = i; } Ok(Cow::Owned(s)) } pub(crate) fn lookup<'a>( &self, mut v: &'a Value, v_url: &Url, ) -> Result<&'a Value, CompileError> { for tok in self.0.split('/').skip(1) { let Ok(tok) = Self::unescape(tok) else { let loc = UrlFrag::format(v_url, self.as_str()); return Err(CompileError::InvalidJsonPointer(loc)); }; match v { Value::Object(obj) => { if let Some(pvalue) = obj.get(tok.as_ref()) { v = pvalue; continue; } } Value::Array(arr) => { if let Ok(i) = usize::from_str(tok.as_ref()) { if let Some(item) = arr.get(i) { v = item; continue; } }; } _ => {} } let loc = UrlFrag::format(v_url, self.as_str()); return Err(CompileError::JsonPointerNotFound(loc)); } Ok(v) } pub(crate) fn as_str(&self) -> &str { &self.0 } pub(crate) fn is_empty(&self) -> bool { self.0.is_empty() } pub(crate) fn concat(&self, next: &Self) -> Self { JsonPointer(format!("{}{}", self.0, next.0)) } pub(crate) fn append(&self, tok: &str) -> Self { Self(format!("{}/{}", self, Self::escape(tok))) } pub(crate) fn append2(&self, tok1: &str, tok2: &str) -> Self { Self(format!( "{}/{}/{}", self, Self::escape(tok1), Self::escape(tok2) )) } } impl Display for JsonPointer { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) } } impl Borrow for JsonPointer { fn borrow(&self) -> &str { &self.0 } } impl From<&str> for JsonPointer { fn from(value: &str) -> Self { Self(value.into()) } } // -- #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub(crate) struct Anchor(pub(crate) String); impl Display for Anchor { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) } } impl Borrow for Anchor { fn borrow(&self) -> &str { &self.0 } } impl From<&str> for Anchor { fn from(value: &str) -> Self { Self(value.into()) } } // -- #[derive(Debug, Clone, Eq, PartialEq)] pub(crate) enum Fragment { Anchor(Anchor), JsonPointer(JsonPointer), } impl Fragment { pub(crate) fn split(s: &str) -> Result<(&str, Fragment), CompileError> { let (u, frag) = split(s); let frag = percent_decode_str(frag) .decode_utf8() .map_err(|src| CompileError::ParseUrlError { url: s.to_string(), src: src.into(), })? .to_string(); let frag = if frag.is_empty() || frag.starts_with('/') { Fragment::JsonPointer(JsonPointer(frag)) } else { Fragment::Anchor(Anchor(frag)) }; Ok((u, frag)) } pub(crate) fn encode(frag: &str) -> String { // https://url.spec.whatwg.org/#fragment-percent-encode-set const FRAGMENT: &AsciiSet = &CONTROLS .add(b'%') .add(b' ') .add(b'"') .add(b'<') .add(b'>') .add(b'`'); percent_encoding::utf8_percent_encode(frag, FRAGMENT).to_string() } pub(crate) fn as_str(&self) -> &str { match self { Fragment::Anchor(s) => &s.0, Fragment::JsonPointer(s) => &s.0, } } } // -- #[derive(Clone)] pub(crate) struct UrlFrag { pub(crate) url: Url, pub(crate) frag: Fragment, } impl UrlFrag { pub(crate) fn absolute(input: &str) -> Result { let (u, frag) = Fragment::split(input)?; // note: windows drive letter is treated as url scheme by url parser #[cfg(not(target_arch = "wasm32"))] if std::env::consts::OS == "windows" && starts_with_windows_drive(u) { let url = Url::from_file_path(u) .map_err(|_| CompileError::Bug(format!("failed to convert {u} into url").into()))?; return Ok(UrlFrag { url, frag }); } match Url::parse(u) { Ok(url) => Ok(UrlFrag { url, frag }), #[cfg(not(target_arch = "wasm32"))] Err(url::ParseError::RelativeUrlWithoutBase) => { let p = std::path::absolute(u).map_err(|e| CompileError::ParseUrlError { url: u.to_owned(), src: e.into(), })?; let url = Url::from_file_path(p).map_err(|_| { CompileError::Bug(format!("failed to convert {u} into url").into()) })?; Ok(UrlFrag { url, frag }) } Err(e) => Err(CompileError::ParseUrlError { url: u.to_owned(), src: e.into(), }), } } pub(crate) fn join(url: &Url, input: &str) -> Result { let (input, frag) = Fragment::split(input)?; if input.is_empty() { return Ok(UrlFrag { url: url.clone(), frag, }); } let url = url.join(input).map_err(|e| CompileError::ParseUrlError { url: input.to_string(), src: e.into(), })?; Ok(UrlFrag { url, frag }) } pub(crate) fn format(url: &Url, frag: &str) -> String { if frag.is_empty() { url.to_string() } else { format!("{}#{}", url, Fragment::encode(frag)) } } } impl Display for UrlFrag { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}#{}", self.url, Fragment::encode(self.frag.as_str())) } } // -- #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub(crate) struct UrlPtr { pub(crate) url: Url, pub(crate) ptr: JsonPointer, } impl UrlPtr { pub(crate) fn lookup<'a>(&self, doc: &'a Value) -> Result<&'a Value, CompileError> { self.ptr.lookup(doc, &self.url) } pub(crate) fn format(&self, tok: &str) -> String { format!( "{}#{}/{}", self.url, Fragment::encode(self.ptr.as_str()), Fragment::encode(JsonPointer::escape(tok).as_ref()), ) } } impl Display for UrlPtr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}#{}", self.url, Fragment::encode(self.ptr.as_str())) } } // -- pub(crate) fn is_integer(v: &Value) -> bool { match v { Value::Number(n) => { n.is_i64() || n.is_u64() || n.as_f64().filter(|n| n.fract() == 0.0).is_some() } _ => false, } } #[cfg(not(target_arch = "wasm32"))] fn starts_with_windows_drive(p: &str) -> bool { p.chars().next().filter(char::is_ascii_uppercase).is_some() && p[1..].starts_with(":\\") } /// returns single-quoted string pub(crate) fn quote(s: &T) -> String where T: AsRef + std::fmt::Debug + ?Sized, { let s = format!("{s:?}").replace(r#"\""#, "\"").replace('\'', r"\'"); format!("'{}'", &s[1..s.len() - 1]) } pub(crate) fn join_iter(iterable: T, sep: &str) -> String where T: IntoIterator, T::Item: Display, { iterable .into_iter() .map(|e| e.to_string()) .collect::>() .join(sep) } pub(crate) fn escape(token: &str) -> Cow<'_, str> { JsonPointer::escape(token) } pub(crate) fn split(url: &str) -> (&str, &str) { if let Some(i) = url.find('#') { (&url[..i], &url[i + 1..]) } else { (url, "") } } /// serde_json treats 0 and 0.0 not equal. so we cannot simply use v1==v2 pub(crate) fn equals(v1: &Value, v2: &Value) -> bool { match (v1, v2) { (Value::Null, Value::Null) => true, (Value::Bool(b1), Value::Bool(b2)) => b1 == b2, (Value::Number(n1), Value::Number(n2)) => { if let (Some(n1), Some(n2)) = (n1.as_u64(), n2.as_u64()) { return n1 == n2; } if let (Some(n1), Some(n2)) = (n1.as_i64(), n2.as_i64()) { return n1 == n2; } if let (Some(n1), Some(n2)) = (n1.as_f64(), n2.as_f64()) { return n1 == n2; } false } (Value::String(s1), Value::String(s2)) => s1 == s2, (Value::Array(arr1), Value::Array(arr2)) => { if arr1.len() != arr2.len() { return false; } arr1.iter().zip(arr2).all(|(e1, e2)| equals(e1, e2)) } (Value::Object(obj1), Value::Object(obj2)) => { if obj1.len() != obj2.len() { return false; } for (k1, v1) in obj1 { if let Some(v2) = obj2.get(k1) { if !equals(v1, v2) { return false; } } else { return false; } } true } _ => false, } } pub(crate) fn duplicates(arr: &Vec) -> Option<(usize, usize)> { match arr.as_slice() { [e0, e1] => { if equals(e0, e1) { return Some((0, 1)); } } [e0, e1, e2] => { if equals(e0, e1) { return Some((0, 1)); } else if equals(e0, e2) { return Some((0, 2)); } else if equals(e1, e2) { return Some((1, 2)); } } _ => { let len = arr.len(); if len <= 20 { for i in 0..len - 1 { for j in i + 1..len { if equals(&arr[i], &arr[j]) { return Some((i, j)); } } } } else { let mut seen = AHashMap::with_capacity(len); for (i, item) in arr.iter().enumerate() { if let Some(j) = seen.insert(HashedValue(item), i) { return Some((j, i)); } } } } } None } // HashedValue -- // Based on implementation proposed by Sven Marnach: // https://stackoverflow.com/questions/60882381/what-is-the-fastest-correct-way-to-detect-that-there-are-no-duplicates-in-a-json pub(crate) struct HashedValue<'a>(pub(crate) &'a Value); impl PartialEq for HashedValue<'_> { fn eq(&self, other: &Self) -> bool { equals(self.0, other.0) } } impl Eq for HashedValue<'_> {} impl Hash for HashedValue<'_> { fn hash(&self, state: &mut H) { match self.0 { Value::Null => state.write_u32(3_221_225_473), // chosen randomly Value::Bool(ref b) => b.hash(state), Value::Number(ref num) => { if let Some(num) = num.as_f64() { num.to_bits().hash(state); } else if let Some(num) = num.as_u64() { num.hash(state); } else if let Some(num) = num.as_i64() { num.hash(state); } } Value::String(ref str) => str.hash(state), Value::Array(ref arr) => { for item in arr { HashedValue(item).hash(state); } } Value::Object(ref obj) => { let mut hash = 0; for (pname, pvalue) in obj { // We have no way of building a new hasher of type `H`, so we // hardcode using the default hasher of a hash map. let mut hasher = AHasher::default(); pname.hash(&mut hasher); HashedValue(pvalue).hash(&mut hasher); hash ^= hasher.finish(); } state.write_u64(hash); } } } } #[cfg(test)] mod tests { use ahash::AHashMap; use serde_json::json; use super::*; #[test] fn test_quote() { assert_eq!(quote(r#"abc"def'ghi"#), r#"'abc"def\'ghi'"#); } #[test] fn test_fragment_split() { let tests = [ ("#", Fragment::JsonPointer("".into())), ("#/a/b", Fragment::JsonPointer("/a/b".into())), ("#abcd", Fragment::Anchor("abcd".into())), ("#%61%62%63%64", Fragment::Anchor("abcd".into())), ( "#%2F%61%62%63%64%2fef", Fragment::JsonPointer("/abcd/ef".into()), ), // '/' is encoded ("#abcd+ef", Fragment::Anchor("abcd+ef".into())), // '+' should not traslate to space ]; for test in tests { let (_, got) = Fragment::split(test.0).unwrap(); assert_eq!(got, test.1, "Fragment::split({:?})", test.0); } } #[test] fn test_unescape() { let tests = [ ("bar~0", Some("bar~")), ("bar~1", Some("bar/")), ("bar~01", Some("bar~1")), ("bar~", None), ("bar~~", None), ]; for (tok, want) in tests { let res = JsonPointer::unescape(tok).ok(); let got = res.as_ref().map(|c| c.as_ref()); assert_eq!(got, want, "unescape({:?})", tok) } } #[test] fn test_equals() { let tests = [["1.0", "1"], ["-1.0", "-1"]]; for [a, b] in tests { let a = serde_json::from_str(a).unwrap(); let b = serde_json::from_str(b).unwrap(); assert!(equals(&a, &b)); } } #[test] fn test_hashed_value() { let mut seen = AHashMap::with_capacity(10); let (v1, v2) = (json!(2), json!(2.0)); assert!(equals(&v1, &v2)); assert!(seen.insert(HashedValue(&v1), 1).is_none()); assert!(seen.insert(HashedValue(&v2), 1).is_some()); } }