Files
jspg/validator/src/util.rs

546 lines
13 KiB
Rust

use std::{
borrow::{Borrow, Cow},
fmt::Display,
hash::{Hash, Hasher},
str::FromStr,
};
use ahash::{AHashMap, AHasher};
use percent_encoding::{percent_decode_str, AsciiSet, CONTROLS};
use serde_json::Value;
use url::Url;
use crate::CompileError;
// --
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub(crate) struct JsonPointer(pub(crate) String);
impl JsonPointer {
pub(crate) fn escape(token: &str) -> Cow<'_, str> {
const SPECIAL: [char; 2] = ['~', '/'];
if token.contains(SPECIAL) {
token.replace('~', "~0").replace('/', "~1").into()
} else {
token.into()
}
}
pub(crate) fn unescape(mut tok: &str) -> Result<Cow<'_, str>, ()> {
let Some(mut tilde) = tok.find('~') else {
return Ok(Cow::Borrowed(tok));
};
let mut s = String::with_capacity(tok.len());
loop {
s.push_str(&tok[..tilde]);
tok = &tok[tilde + 1..];
match tok.chars().next() {
Some('1') => s.push('/'),
Some('0') => s.push('~'),
_ => return Err(()),
}
tok = &tok[1..];
let Some(i) = tok.find('~') else {
s.push_str(tok);
break;
};
tilde = i;
}
Ok(Cow::Owned(s))
}
pub(crate) fn lookup<'a>(
&self,
mut v: &'a Value,
v_url: &Url,
) -> Result<&'a Value, CompileError> {
for tok in self.0.split('/').skip(1) {
let Ok(tok) = Self::unescape(tok) else {
let loc = UrlFrag::format(v_url, self.as_str());
return Err(CompileError::InvalidJsonPointer(loc));
};
match v {
Value::Object(obj) => {
if let Some(pvalue) = obj.get(tok.as_ref()) {
v = pvalue;
continue;
}
}
Value::Array(arr) => {
if let Ok(i) = usize::from_str(tok.as_ref()) {
if let Some(item) = arr.get(i) {
v = item;
continue;
}
};
}
_ => {}
}
let loc = UrlFrag::format(v_url, self.as_str());
return Err(CompileError::JsonPointerNotFound(loc));
}
Ok(v)
}
pub(crate) fn as_str(&self) -> &str {
&self.0
}
pub(crate) fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub(crate) fn concat(&self, next: &Self) -> Self {
JsonPointer(format!("{}{}", self.0, next.0))
}
pub(crate) fn append(&self, tok: &str) -> Self {
Self(format!("{}/{}", self, Self::escape(tok)))
}
pub(crate) fn append2(&self, tok1: &str, tok2: &str) -> Self {
Self(format!(
"{}/{}/{}",
self,
Self::escape(tok1),
Self::escape(tok2)
))
}
}
impl Display for JsonPointer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl Borrow<str> for JsonPointer {
fn borrow(&self) -> &str {
&self.0
}
}
impl From<&str> for JsonPointer {
fn from(value: &str) -> Self {
Self(value.into())
}
}
// --
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub(crate) struct Anchor(pub(crate) String);
impl Display for Anchor {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl Borrow<str> for Anchor {
fn borrow(&self) -> &str {
&self.0
}
}
impl From<&str> for Anchor {
fn from(value: &str) -> Self {
Self(value.into())
}
}
// --
#[derive(Debug, Clone, Eq, PartialEq)]
pub(crate) enum Fragment {
Anchor(Anchor),
JsonPointer(JsonPointer),
}
impl Fragment {
pub(crate) fn split(s: &str) -> Result<(&str, Fragment), CompileError> {
let (u, frag) = split(s);
let frag = percent_decode_str(frag)
.decode_utf8()
.map_err(|src| CompileError::ParseUrlError {
url: s.to_string(),
src: src.into(),
})?
.to_string();
let frag = if frag.is_empty() || frag.starts_with('/') {
Fragment::JsonPointer(JsonPointer(frag))
} else {
Fragment::Anchor(Anchor(frag))
};
Ok((u, frag))
}
pub(crate) fn encode(frag: &str) -> String {
// https://url.spec.whatwg.org/#fragment-percent-encode-set
const FRAGMENT: &AsciiSet = &CONTROLS
.add(b'%')
.add(b' ')
.add(b'"')
.add(b'<')
.add(b'>')
.add(b'`');
percent_encoding::utf8_percent_encode(frag, FRAGMENT).to_string()
}
pub(crate) fn as_str(&self) -> &str {
match self {
Fragment::Anchor(s) => &s.0,
Fragment::JsonPointer(s) => &s.0,
}
}
}
// --
#[derive(Clone)]
pub(crate) struct UrlFrag {
pub(crate) url: Url,
pub(crate) frag: Fragment,
}
impl UrlFrag {
pub(crate) fn absolute(input: &str) -> Result<UrlFrag, CompileError> {
let (u, frag) = Fragment::split(input)?;
// note: windows drive letter is treated as url scheme by url parser
#[cfg(not(target_arch = "wasm32"))]
if std::env::consts::OS == "windows" && starts_with_windows_drive(u) {
let url = Url::from_file_path(u)
.map_err(|_| CompileError::Bug(format!("failed to convert {u} into url").into()))?;
return Ok(UrlFrag { url, frag });
}
match Url::parse(u) {
Ok(url) => Ok(UrlFrag { url, frag }),
#[cfg(not(target_arch = "wasm32"))]
Err(url::ParseError::RelativeUrlWithoutBase) => {
let p = std::path::absolute(u).map_err(|e| CompileError::ParseUrlError {
url: u.to_owned(),
src: e.into(),
})?;
let url = Url::from_file_path(p).map_err(|_| {
CompileError::Bug(format!("failed to convert {u} into url").into())
})?;
Ok(UrlFrag { url, frag })
}
Err(e) => Err(CompileError::ParseUrlError {
url: u.to_owned(),
src: e.into(),
}),
}
}
pub(crate) fn join(url: &Url, input: &str) -> Result<UrlFrag, CompileError> {
let (input, frag) = Fragment::split(input)?;
if input.is_empty() {
return Ok(UrlFrag {
url: url.clone(),
frag,
});
}
let url = url.join(input).map_err(|e| CompileError::ParseUrlError {
url: input.to_string(),
src: e.into(),
})?;
Ok(UrlFrag { url, frag })
}
pub(crate) fn format(url: &Url, frag: &str) -> String {
if frag.is_empty() {
url.to_string()
} else {
format!("{}#{}", url, Fragment::encode(frag))
}
}
}
impl Display for UrlFrag {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}#{}", self.url, Fragment::encode(self.frag.as_str()))
}
}
// --
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub(crate) struct UrlPtr {
pub(crate) url: Url,
pub(crate) ptr: JsonPointer,
}
impl UrlPtr {
pub(crate) fn lookup<'a>(&self, doc: &'a Value) -> Result<&'a Value, CompileError> {
self.ptr.lookup(doc, &self.url)
}
pub(crate) fn format(&self, tok: &str) -> String {
format!(
"{}#{}/{}",
self.url,
Fragment::encode(self.ptr.as_str()),
Fragment::encode(JsonPointer::escape(tok).as_ref()),
)
}
}
impl Display for UrlPtr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}#{}", self.url, Fragment::encode(self.ptr.as_str()))
}
}
// --
pub(crate) fn is_integer(v: &Value) -> bool {
match v {
Value::Number(n) => {
n.is_i64() || n.is_u64() || n.as_f64().filter(|n| n.fract() == 0.0).is_some()
}
_ => false,
}
}
#[cfg(not(target_arch = "wasm32"))]
fn starts_with_windows_drive(p: &str) -> bool {
p.chars().next().filter(char::is_ascii_uppercase).is_some() && p[1..].starts_with(":\\")
}
/// returns single-quoted string
pub(crate) fn quote<T>(s: &T) -> String
where
T: AsRef<str> + std::fmt::Debug + ?Sized,
{
let s = format!("{s:?}").replace(r#"\""#, "\"").replace('\'', r"\'");
format!("'{}'", &s[1..s.len() - 1])
}
pub(crate) fn join_iter<T>(iterable: T, sep: &str) -> String
where
T: IntoIterator,
T::Item: Display,
{
iterable
.into_iter()
.map(|e| e.to_string())
.collect::<Vec<_>>()
.join(sep)
}
pub(crate) fn escape(token: &str) -> Cow<'_, str> {
JsonPointer::escape(token)
}
pub(crate) fn split(url: &str) -> (&str, &str) {
if let Some(i) = url.find('#') {
(&url[..i], &url[i + 1..])
} else {
(url, "")
}
}
/// serde_json treats 0 and 0.0 not equal. so we cannot simply use v1==v2
pub(crate) fn equals(v1: &Value, v2: &Value) -> bool {
match (v1, v2) {
(Value::Null, Value::Null) => true,
(Value::Bool(b1), Value::Bool(b2)) => b1 == b2,
(Value::Number(n1), Value::Number(n2)) => {
if let (Some(n1), Some(n2)) = (n1.as_u64(), n2.as_u64()) {
return n1 == n2;
}
if let (Some(n1), Some(n2)) = (n1.as_i64(), n2.as_i64()) {
return n1 == n2;
}
if let (Some(n1), Some(n2)) = (n1.as_f64(), n2.as_f64()) {
return n1 == n2;
}
false
}
(Value::String(s1), Value::String(s2)) => s1 == s2,
(Value::Array(arr1), Value::Array(arr2)) => {
if arr1.len() != arr2.len() {
return false;
}
arr1.iter().zip(arr2).all(|(e1, e2)| equals(e1, e2))
}
(Value::Object(obj1), Value::Object(obj2)) => {
if obj1.len() != obj2.len() {
return false;
}
for (k1, v1) in obj1 {
if let Some(v2) = obj2.get(k1) {
if !equals(v1, v2) {
return false;
}
} else {
return false;
}
}
true
}
_ => false,
}
}
pub(crate) fn duplicates(arr: &Vec<Value>) -> Option<(usize, usize)> {
match arr.as_slice() {
[e0, e1] => {
if equals(e0, e1) {
return Some((0, 1));
}
}
[e0, e1, e2] => {
if equals(e0, e1) {
return Some((0, 1));
} else if equals(e0, e2) {
return Some((0, 2));
} else if equals(e1, e2) {
return Some((1, 2));
}
}
_ => {
let len = arr.len();
if len <= 20 {
for i in 0..len - 1 {
for j in i + 1..len {
if equals(&arr[i], &arr[j]) {
return Some((i, j));
}
}
}
} else {
let mut seen = AHashMap::with_capacity(len);
for (i, item) in arr.iter().enumerate() {
if let Some(j) = seen.insert(HashedValue(item), i) {
return Some((j, i));
}
}
}
}
}
None
}
// HashedValue --
// Based on implementation proposed by Sven Marnach:
// https://stackoverflow.com/questions/60882381/what-is-the-fastest-correct-way-to-detect-that-there-are-no-duplicates-in-a-json
pub(crate) struct HashedValue<'a>(pub(crate) &'a Value);
impl PartialEq for HashedValue<'_> {
fn eq(&self, other: &Self) -> bool {
equals(self.0, other.0)
}
}
impl Eq for HashedValue<'_> {}
impl Hash for HashedValue<'_> {
fn hash<H: Hasher>(&self, state: &mut H) {
match self.0 {
Value::Null => state.write_u32(3_221_225_473), // chosen randomly
Value::Bool(ref b) => b.hash(state),
Value::Number(ref num) => {
if let Some(num) = num.as_f64() {
num.to_bits().hash(state);
} else if let Some(num) = num.as_u64() {
num.hash(state);
} else if let Some(num) = num.as_i64() {
num.hash(state);
}
}
Value::String(ref str) => str.hash(state),
Value::Array(ref arr) => {
for item in arr {
HashedValue(item).hash(state);
}
}
Value::Object(ref obj) => {
let mut hash = 0;
for (pname, pvalue) in obj {
// We have no way of building a new hasher of type `H`, so we
// hardcode using the default hasher of a hash map.
let mut hasher = AHasher::default();
pname.hash(&mut hasher);
HashedValue(pvalue).hash(&mut hasher);
hash ^= hasher.finish();
}
state.write_u64(hash);
}
}
}
}
#[cfg(test)]
mod tests {
use ahash::AHashMap;
use serde_json::json;
use super::*;
#[test]
fn test_quote() {
assert_eq!(quote(r#"abc"def'ghi"#), r#"'abc"def\'ghi'"#);
}
#[test]
fn test_fragment_split() {
let tests = [
("#", Fragment::JsonPointer("".into())),
("#/a/b", Fragment::JsonPointer("/a/b".into())),
("#abcd", Fragment::Anchor("abcd".into())),
("#%61%62%63%64", Fragment::Anchor("abcd".into())),
(
"#%2F%61%62%63%64%2fef",
Fragment::JsonPointer("/abcd/ef".into()),
), // '/' is encoded
("#abcd+ef", Fragment::Anchor("abcd+ef".into())), // '+' should not traslate to space
];
for test in tests {
let (_, got) = Fragment::split(test.0).unwrap();
assert_eq!(got, test.1, "Fragment::split({:?})", test.0);
}
}
#[test]
fn test_unescape() {
let tests = [
("bar~0", Some("bar~")),
("bar~1", Some("bar/")),
("bar~01", Some("bar~1")),
("bar~", None),
("bar~~", None),
];
for (tok, want) in tests {
let res = JsonPointer::unescape(tok).ok();
let got = res.as_ref().map(|c| c.as_ref());
assert_eq!(got, want, "unescape({:?})", tok)
}
}
#[test]
fn test_equals() {
let tests = [["1.0", "1"], ["-1.0", "-1"]];
for [a, b] in tests {
let a = serde_json::from_str(a).unwrap();
let b = serde_json::from_str(b).unwrap();
assert!(equals(&a, &b));
}
}
#[test]
fn test_hashed_value() {
let mut seen = AHashMap::with_capacity(10);
let (v1, v2) = (json!(2), json!(2.0));
assert!(equals(&v1, &v2));
assert!(seen.insert(HashedValue(&v1), 1).is_none());
assert!(seen.insert(HashedValue(&v2), 1).is_some());
}
}