boon now included

This commit is contained in:
2025-09-30 01:10:58 -04:00
parent c9b1245a57
commit cc04f38c14
77 changed files with 12905 additions and 52 deletions

985
validator/src/compiler.rs Normal file
View File

@ -0,0 +1,985 @@
use std::{cmp::Ordering, collections::HashMap, error::Error, fmt::Display};
use regex::Regex;
use serde_json::{Map, Value};
use url::Url;
use crate::{content::*, draft::*, ecma, formats::*, root::*, roots::*, util::*, *};
/// Supported draft versions
#[non_exhaustive]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Draft {
/// Draft for `http://json-schema.org/draft-04/schema`
V4,
/// Draft for `http://json-schema.org/draft-06/schema`
V6,
/// Draft for `http://json-schema.org/draft-07/schema`
V7,
/// Draft for `https://json-schema.org/draft/2019-09/schema`
V2019_09,
/// Draft for `https://json-schema.org/draft/2020-12/schema`
V2020_12,
}
impl Draft {
/**
Get [`Draft`] for given `url`
# Arguments
* `url` - accepts both `http` and `https` and ignores any fragments in url
# Examples
```
# use boon::*;
assert_eq!(Draft::from_url("https://json-schema.org/draft/2020-12/schema"), Some(Draft::V2020_12));
assert_eq!(Draft::from_url("http://json-schema.org/draft-07/schema#"), Some(Draft::V7));
```
*/
pub fn from_url(url: &str) -> Option<Draft> {
match crate::draft::Draft::from_url(url) {
Some(draft) => match draft.version {
4 => Some(Draft::V4),
6 => Some(Draft::V6),
7 => Some(Draft::V7),
2019 => Some(Draft::V2019_09),
2020 => Some(Draft::V2020_12),
_ => None,
},
None => None,
}
}
pub(crate) fn internal(&self) -> &'static crate::draft::Draft {
match self {
Draft::V4 => &DRAFT4,
Draft::V6 => &DRAFT6,
Draft::V7 => &DRAFT7,
Draft::V2019_09 => &DRAFT2019,
Draft::V2020_12 => &DRAFT2020,
}
}
}
/// Returns latest draft supported
impl Default for Draft {
fn default() -> Self {
Draft::V2020_12
}
}
/// JsonSchema compiler.
#[derive(Default)]
pub struct Compiler {
roots: Roots,
assert_format: bool,
assert_content: bool,
formats: HashMap<&'static str, Format>,
decoders: HashMap<&'static str, Decoder>,
media_types: HashMap<&'static str, MediaType>,
}
impl Compiler {
pub fn new() -> Self {
Self::default()
}
/**
Overrides the draft used to compile schemas without
explicit `$schema` field.
By default this library uses latest draft supported.
The use of this option is HIGHLY encouraged to ensure
continued correct operation of your schema. The current
default value will not stay the same over time.
*/
pub fn set_default_draft(&mut self, d: Draft) {
self.roots.default_draft = d.internal()
}
/**
Always enable format assertions.
# Default Behavior
- for draft-07 and earlier: enabled
- for draft/2019-09: disabled, unless
metaschema says `format` vocabulary is required
- for draft/2020-12: disabled, unless
metaschema says `format-assertion` vocabulary is required
*/
pub fn enable_format_assertions(&mut self) {
self.assert_format = true;
}
/**
Always enable content assertions.
content assertions include keywords:
- contentEncoding
- contentMediaType
- contentSchema
Default Behavior is always disabled.
*/
pub fn enable_content_assertions(&mut self) {
self.assert_content = true;
}
/// Overrides default [`UrlLoader`] used to load schema resources
pub fn use_loader(&mut self, url_loader: Box<dyn UrlLoader>) {
self.roots.loader.use_loader(url_loader);
}
/**
Registers custom `format`
# Note
- `regex` format cannot be overridden
- format assertions are disabled for draft >= 2019-09.
see [`Compiler::enable_format_assertions`]
*/
pub fn register_format(&mut self, format: Format) {
if format.name != "regex" {
self.formats.insert(format.name, format);
}
}
/**
Registers custom `contentEncoding`
Note that content assertions are disabled by default.
see [`Compiler::enable_content_assertions`]
*/
pub fn register_content_encoding(&mut self, decoder: Decoder) {
self.decoders.insert(decoder.name, decoder);
}
/**
Registers custom `contentMediaType`
Note that content assertions are disabled by default.
see [`Compiler::enable_content_assertions`]
*/
pub fn register_content_media_type(&mut self, media_type: MediaType) {
self.media_types.insert(media_type.name, media_type);
}
/**
Adds schema resource which used later in reference resoltion
If you do not know which schema resources required, then use [`UrlLoader`].
The argument `loc` can be file path or url. any fragment in `loc` is ignored.
# Errors
returns [`CompileError`] if url parsing failed.
*/
pub fn add_resource(&mut self, loc: &str, json: Value) -> Result<(), CompileError> {
let uf = UrlFrag::absolute(loc)?;
self.roots.loader.add_doc(uf.url, json);
Ok(())
}
/**
Compile given `loc` into `target` and return an identifier to the compiled
schema.
the argument `loc` can be file path or url with optional fragment.
examples: `http://example.com/schema.json#/defs/address`,
`samples/schema_file.json#defs/address`
if `loc` is already compiled, it simply returns the same [`SchemaIndex`]
*/
pub fn compile(
&mut self,
loc: &str,
target: &mut Schemas,
) -> Result<SchemaIndex, CompileError> {
let uf = UrlFrag::absolute(loc)?;
// resolve anchor
let up = self.roots.resolve_fragment(uf)?;
let result = self.do_compile(up, target);
if let Err(bug @ CompileError::Bug(_)) = &result {
debug_assert!(false, "{bug}");
}
result
}
fn do_compile(
&mut self,
up: UrlPtr,
target: &mut Schemas,
) -> Result<SchemaIndex, CompileError> {
let mut queue = Queue::new();
let mut compiled = Vec::new();
let index = queue.enqueue_schema(target, up);
if queue.schemas.is_empty() {
// already got compiled
return Ok(index);
}
while queue.schemas.len() > compiled.len() {
let up = &queue.schemas[compiled.len()];
self.roots.ensure_subschema(up)?;
let Some(root) = self.roots.get(&up.url) else {
return Err(CompileError::Bug("or_load didn't add".into()));
};
let doc = self.roots.loader.load(&root.url)?;
let v = up.lookup(doc)?;
let sch = self.compile_value(target, v, &up.clone(), root, &mut queue)?;
compiled.push(sch);
self.roots.insert(&mut queue.roots);
}
target.insert(queue.schemas, compiled);
Ok(index)
}
fn compile_value(
&self,
schemas: &Schemas,
v: &Value,
up: &UrlPtr,
root: &Root,
queue: &mut Queue,
) -> Result<Schema, CompileError> {
let mut s = Schema::new(up.to_string());
s.draft_version = root.draft.version;
// we know it is already in queue, we just want to get its index
let len = queue.schemas.len();
s.idx = queue.enqueue_schema(schemas, up.to_owned());
debug_assert_eq!(queue.schemas.len(), len, "{up} should already be in queue");
s.resource = {
let base = UrlPtr {
url: up.url.clone(),
ptr: root.resource(&up.ptr).ptr.clone(),
};
queue.enqueue_schema(schemas, base)
};
// if resource, enqueue dynamicAnchors for compilation
if s.idx == s.resource && root.draft.version >= 2020 {
let res = root.resource(&up.ptr);
for (anchor, anchor_ptr) in &res.anchors {
if res.dynamic_anchors.contains(anchor) {
let up = UrlPtr {
url: up.url.clone(),
ptr: anchor_ptr.clone(),
};
let danchor_sch = queue.enqueue_schema(schemas, up);
s.dynamic_anchors.insert(anchor.to_string(), danchor_sch);
}
}
}
match v {
Value::Object(obj) => {
if obj.is_empty() {
s.boolean = Some(true);
} else {
ObjCompiler {
c: self,
obj,
up,
schemas,
root,
queue,
}
.compile_obj(&mut s)?;
}
}
Value::Bool(b) => s.boolean = Some(*b),
_ => {}
}
s.all_props_evaluated = s.additional_properties.is_some();
s.all_items_evaluated = if s.draft_version < 2020 {
s.additional_items.is_some() || matches!(s.items, Some(Items::SchemaRef(_)))
} else {
s.items2020.is_some()
};
s.num_items_evaluated = if let Some(Items::SchemaRefs(list)) = &s.items {
list.len()
} else {
s.prefix_items.len()
};
Ok(s)
}
}
struct ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> {
c: &'c Compiler,
obj: &'v Map<String, Value>,
up: &'l UrlPtr,
schemas: &'s Schemas,
root: &'r Root,
queue: &'q mut Queue,
}
// compile supported drafts
impl ObjCompiler<'_, '_, '_, '_, '_, '_> {
fn compile_obj(&mut self, s: &mut Schema) -> Result<(), CompileError> {
self.compile_draft4(s)?;
if self.draft_version() >= 6 {
self.compile_draft6(s)?;
}
if self.draft_version() >= 7 {
self.compile_draft7(s)?;
}
if self.draft_version() >= 2019 {
self.compile_draft2019(s)?;
}
if self.draft_version() >= 2020 {
self.compile_draft2020(s)?;
}
Ok(())
}
fn compile_draft4(&mut self, s: &mut Schema) -> Result<(), CompileError> {
if self.has_vocab("core") {
s.ref_ = self.enqueue_ref("$ref")?;
if s.ref_.is_some() && self.draft_version() < 2019 {
// All other properties in a "$ref" object MUST be ignored
return Ok(());
}
}
if self.has_vocab("applicator") {
s.all_of = self.enqueue_arr("allOf");
s.any_of = self.enqueue_arr("anyOf");
s.one_of = self.enqueue_arr("oneOf");
s.not = self.enqueue_prop("not");
if self.draft_version() < 2020 {
match self.value("items") {
Some(Value::Array(_)) => {
s.items = Some(Items::SchemaRefs(self.enqueue_arr("items")));
s.additional_items = self.enquue_additional("additionalItems");
}
_ => s.items = self.enqueue_prop("items").map(Items::SchemaRef),
}
}
s.properties = self.enqueue_map("properties");
s.pattern_properties = {
let mut v = vec![];
if let Some(Value::Object(obj)) = self.value("patternProperties") {
for pname in obj.keys() {
let ecma =
ecma::convert(pname).map_err(|src| CompileError::InvalidRegex {
url: self.up.format("patternProperties"),
regex: pname.to_owned(),
src,
})?;
let regex =
Regex::new(ecma.as_ref()).map_err(|e| CompileError::InvalidRegex {
url: self.up.format("patternProperties"),
regex: ecma.into_owned(),
src: e.into(),
})?;
let ptr = self.up.ptr.append2("patternProperties", pname);
let sch = self.enqueue_schema(ptr);
v.push((regex, sch));
}
}
v
};
s.additional_properties = self.enquue_additional("additionalProperties");
if let Some(Value::Object(deps)) = self.value("dependencies") {
s.dependencies = deps
.iter()
.filter_map(|(k, v)| {
let v = match v {
Value::Array(_) => Some(Dependency::Props(to_strings(v))),
_ => {
let ptr = self.up.ptr.append2("dependencies", k);
Some(Dependency::SchemaRef(self.enqueue_schema(ptr)))
}
};
v.map(|v| (k.clone(), v))
})
.collect();
}
}
if self.has_vocab("validation") {
match self.value("type") {
Some(Value::String(t)) => {
if let Some(t) = Type::from_str(t) {
s.types.add(t)
}
}
Some(Value::Array(arr)) => {
for t in arr {
if let Value::String(t) = t {
if let Some(t) = Type::from_str(t) {
s.types.add(t)
}
}
}
}
_ => {}
}
if let Some(Value::Array(e)) = self.value("enum") {
let mut types = Types::default();
for item in e {
types.add(Type::of(item));
}
s.enum_ = Some(Enum {
types,
values: e.clone(),
});
}
s.multiple_of = self.num("multipleOf");
s.maximum = self.num("maximum");
if let Some(Value::Bool(exclusive)) = self.value("exclusiveMaximum") {
if *exclusive {
s.exclusive_maximum = s.maximum.take();
}
} else {
s.exclusive_maximum = self.num("exclusiveMaximum");
}
s.minimum = self.num("minimum");
if let Some(Value::Bool(exclusive)) = self.value("exclusiveMinimum") {
if *exclusive {
s.exclusive_minimum = s.minimum.take();
}
} else {
s.exclusive_minimum = self.num("exclusiveMinimum");
}
s.max_length = self.usize("maxLength");
s.min_length = self.usize("minLength");
if let Some(Value::String(p)) = self.value("pattern") {
let p = ecma::convert(p).map_err(CompileError::Bug)?;
s.pattern = Some(Regex::new(p.as_ref()).map_err(|e| CompileError::Bug(e.into()))?);
}
s.max_items = self.usize("maxItems");
s.min_items = self.usize("minItems");
s.unique_items = self.bool("uniqueItems");
s.max_properties = self.usize("maxProperties");
s.min_properties = self.usize("minProperties");
if let Some(req) = self.value("required") {
s.required = to_strings(req);
}
}
// format --
if self.c.assert_format
|| self.has_vocab(match self.draft_version().cmp(&2019) {
Ordering::Less => "core",
Ordering::Equal => "format",
Ordering::Greater => "format-assertion",
})
{
if let Some(Value::String(format)) = self.value("format") {
s.format = self
.c
.formats
.get(format.as_str())
.or_else(|| FORMATS.get(format.as_str()))
.cloned();
}
}
Ok(())
}
fn compile_draft6(&mut self, s: &mut Schema) -> Result<(), CompileError> {
if self.has_vocab("applicator") {
s.contains = self.enqueue_prop("contains");
s.property_names = self.enqueue_prop("propertyNames");
}
if self.has_vocab("validation") {
s.constant = self.value("const").cloned();
}
Ok(())
}
fn compile_draft7(&mut self, s: &mut Schema) -> Result<(), CompileError> {
if self.has_vocab("applicator") {
s.if_ = self.enqueue_prop("if");
if s.if_.is_some() {
if !self.bool_schema("if", false) {
s.then = self.enqueue_prop("then");
}
if !self.bool_schema("if", true) {
s.else_ = self.enqueue_prop("else");
}
}
}
if self.c.assert_content {
if let Some(Value::String(encoding)) = self.value("contentEncoding") {
s.content_encoding = self
.c
.decoders
.get(encoding.as_str())
.or_else(|| DECODERS.get(encoding.as_str()))
.cloned();
}
if let Some(Value::String(media_type)) = self.value("contentMediaType") {
s.content_media_type = self
.c
.media_types
.get(media_type.as_str())
.or_else(|| MEDIA_TYPES.get(media_type.as_str()))
.cloned();
}
}
Ok(())
}
fn compile_draft2019(&mut self, s: &mut Schema) -> Result<(), CompileError> {
if self.has_vocab("core") {
s.recursive_ref = self.enqueue_ref("$recursiveRef")?;
s.recursive_anchor = self.bool("$recursiveAnchor");
}
if self.has_vocab("validation") {
if s.contains.is_some() {
s.max_contains = self.usize("maxContains");
s.min_contains = self.usize("minContains");
}
if let Some(Value::Object(dep_req)) = self.value("dependentRequired") {
for (pname, pvalue) in dep_req {
s.dependent_required
.push((pname.clone(), to_strings(pvalue)));
}
}
}
if self.has_vocab("applicator") {
s.dependent_schemas = self.enqueue_map("dependentSchemas");
}
if self.has_vocab(match self.draft_version() {
2019 => "applicator",
_ => "unevaluated",
}) {
s.unevaluated_items = self.enqueue_prop("unevaluatedItems");
s.unevaluated_properties = self.enqueue_prop("unevaluatedProperties");
}
if self.c.assert_content
&& s.content_media_type
.map(|mt| mt.json_compatible)
.unwrap_or(false)
{
s.content_schema = self.enqueue_prop("contentSchema");
}
Ok(())
}
fn compile_draft2020(&mut self, s: &mut Schema) -> Result<(), CompileError> {
if self.has_vocab("core") {
if let Some(sch) = self.enqueue_ref("$dynamicRef")? {
if let Some(Value::String(dref)) = self.value("$dynamicRef") {
let Ok((_, frag)) = Fragment::split(dref) else {
let loc = self.up.format("$dynamicRef");
return Err(CompileError::ParseAnchorError { loc });
};
let anchor = match frag {
Fragment::Anchor(Anchor(s)) => Some(s),
Fragment::JsonPointer(_) => None,
};
s.dynamic_ref = Some(DynamicRef { sch, anchor });
}
};
if let Some(Value::String(anchor)) = self.value("$dynamicAnchor") {
s.dynamic_anchor = Some(anchor.to_owned());
}
}
if self.has_vocab("applicator") {
s.prefix_items = self.enqueue_arr("prefixItems");
s.items2020 = self.enqueue_prop("items");
}
Ok(())
}
}
// enqueue helpers
impl ObjCompiler<'_, '_, '_, '_, '_, '_> {
fn enqueue_schema(&mut self, ptr: JsonPointer) -> SchemaIndex {
let up = UrlPtr {
url: self.up.url.clone(),
ptr,
};
self.queue.enqueue_schema(self.schemas, up)
}
fn enqueue_prop(&mut self, pname: &'static str) -> Option<SchemaIndex> {
if self.obj.contains_key(pname) {
let ptr = self.up.ptr.append(pname);
Some(self.enqueue_schema(ptr))
} else {
None
}
}
fn enqueue_arr(&mut self, pname: &'static str) -> Vec<SchemaIndex> {
if let Some(Value::Array(arr)) = self.obj.get(pname) {
(0..arr.len())
.map(|i| {
let ptr = self.up.ptr.append2(pname, &i.to_string());
self.enqueue_schema(ptr)
})
.collect()
} else {
Vec::new()
}
}
fn enqueue_map<T>(&mut self, pname: &'static str) -> T
where
T: Default,
T: FromIterator<(String, SchemaIndex)>,
{
if let Some(Value::Object(obj)) = self.obj.get(pname) {
obj.keys()
.map(|k| {
let ptr = self.up.ptr.append2(pname, k);
(k.clone(), self.enqueue_schema(ptr))
})
.collect()
} else {
T::default()
}
}
fn enqueue_ref(&mut self, pname: &str) -> Result<Option<SchemaIndex>, CompileError> {
let Some(Value::String(ref_)) = self.obj.get(pname) else {
return Ok(None);
};
let base_url = self.root.base_url(&self.up.ptr);
let abs_ref = UrlFrag::join(base_url, ref_)?;
if let Some(resolved_ref) = self.root.resolve(&abs_ref)? {
// local ref
return Ok(Some(self.enqueue_schema(resolved_ref.ptr)));
}
// remote ref
let up = self.queue.resolve_anchor(abs_ref, &self.c.roots)?;
Ok(Some(self.queue.enqueue_schema(self.schemas, up)))
}
fn enquue_additional(&mut self, pname: &'static str) -> Option<Additional> {
if let Some(Value::Bool(b)) = self.obj.get(pname) {
Some(Additional::Bool(*b))
} else {
self.enqueue_prop(pname).map(Additional::SchemaRef)
}
}
}
// query helpers
impl<'v> ObjCompiler<'_, 'v, '_, '_, '_, '_> {
fn draft_version(&self) -> usize {
self.root.draft.version
}
fn has_vocab(&self, name: &str) -> bool {
self.root.has_vocab(name)
}
fn value(&self, pname: &str) -> Option<&'v Value> {
self.obj.get(pname)
}
fn bool(&self, pname: &str) -> bool {
matches!(self.obj.get(pname), Some(Value::Bool(true)))
}
fn usize(&self, pname: &str) -> Option<usize> {
let Some(Value::Number(n)) = self.obj.get(pname) else {
return None;
};
if n.is_u64() {
n.as_u64().map(|n| n as usize)
} else {
n.as_f64()
.filter(|n| n.is_sign_positive() && n.fract() == 0.0)
.map(|n| n as usize)
}
}
fn num(&self, pname: &str) -> Option<Number> {
if let Some(Value::Number(n)) = self.obj.get(pname) {
Some(n.clone())
} else {
None
}
}
fn bool_schema(&self, pname: &str, b: bool) -> bool {
if let Some(Value::Bool(v)) = self.obj.get(pname) {
return *v == b;
}
false
}
}
/// Error type for compilation failures.
#[derive(Debug)]
pub enum CompileError {
/// Error in parsing `url`.
ParseUrlError { url: String, src: Box<dyn Error> },
/// Failed loading `url`.
LoadUrlError { url: String, src: Box<dyn Error> },
/// no [`UrlLoader`] registered for the `url`
UnsupportedUrlScheme { url: String },
/// Error in parsing `$schema` url.
InvalidMetaSchemaUrl { url: String, src: Box<dyn Error> },
/// draft `url` is not supported
UnsupportedDraft { url: String },
/// Cycle in resolving `$schema` in `url`.
MetaSchemaCycle { url: String },
/// `url` is not valid against metaschema.
ValidationError {
url: String,
src: ValidationError<'static, 'static>,
},
/// Error in parsing id at `loc`
ParseIdError { loc: String },
/// Error in parsing anchor at `loc`
ParseAnchorError { loc: String },
/// Duplicate id `id` in `url` at `ptr1` and `ptr2`.
DuplicateId {
url: String,
id: String,
ptr1: String,
ptr2: String,
},
/// Duplicate anchor `anchor` in `url` at `ptr1` and `ptr2`.
DuplicateAnchor {
anchor: String,
url: String,
ptr1: String,
ptr2: String,
},
/// Not a valid json pointer.
InvalidJsonPointer(String),
/// JsonPointer evaluated to nothing.
JsonPointerNotFound(String),
/// anchor in `reference` not found in `url`.
AnchorNotFound { url: String, reference: String },
/// Unsupported vocabulary `vocabulary` in `url`.
UnsupportedVocabulary { url: String, vocabulary: String },
/// Invalid Regex `regex` at `url`.
InvalidRegex {
url: String,
regex: String,
src: Box<dyn Error>,
},
/// Encountered bug in compiler implementation. Please report
/// this as an issue for this crate.
Bug(Box<dyn Error>),
}
impl Error for CompileError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::ParseUrlError { src, .. } => Some(src.as_ref()),
Self::LoadUrlError { src, .. } => Some(src.as_ref()),
Self::InvalidMetaSchemaUrl { src, .. } => Some(src.as_ref()),
Self::ValidationError { src, .. } => Some(src),
Self::Bug(src) => Some(src.as_ref()),
_ => None,
}
}
}
impl Display for CompileError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ParseUrlError { url, src } => {
if f.alternate() {
write!(f, "error parsing url {url}: {src}")
} else {
write!(f, "error parsing {url}")
}
}
Self::LoadUrlError { url, src } => {
if f.alternate() {
write!(f, "error loading {url}: {src}")
} else {
write!(f, "error loading {url}")
}
}
Self::UnsupportedUrlScheme { url } => write!(f, "unsupported scheme in {url}"),
Self::InvalidMetaSchemaUrl { url, src } => {
if f.alternate() {
write!(f, "invalid $schema in {url}: {src}")
} else {
write!(f, "invalid $schema in {url}")
}
}
Self::UnsupportedDraft { url } => write!(f, "draft {url} is not supported"),
Self::MetaSchemaCycle { url } => {
write!(f, "cycle in resolving $schema in {url}")
}
Self::ValidationError { url, src } => {
if f.alternate() {
write!(f, "{url} is not valid against metaschema: {src}")
} else {
write!(f, "{url} is not valid against metaschema")
}
}
Self::ParseIdError { loc } => write!(f, "error in parsing id at {loc}"),
Self::ParseAnchorError { loc } => write!(f, "error in parsing anchor at {loc}"),
Self::DuplicateId {
url,
id,
ptr1,
ptr2,
} => write!(f, "duplicate $id {id} in {url} at {ptr1:?} and {ptr2:?}"),
Self::DuplicateAnchor {
anchor,
url,
ptr1,
ptr2,
} => {
write!(
f,
"duplicate anchor {anchor:?} in {url} at {ptr1:?} and {ptr2:?}"
)
}
Self::InvalidJsonPointer(loc) => write!(f, "invalid json-pointer {loc}"),
Self::JsonPointerNotFound(loc) => write!(f, "json-pointer in {loc} not found"),
Self::AnchorNotFound { url, reference } => {
write!(
f,
"anchor in reference {reference} is not found in schema {url}"
)
}
Self::UnsupportedVocabulary { url, vocabulary } => {
write!(f, "unsupported vocabulary {vocabulary} in {url}")
}
Self::InvalidRegex { url, regex, src } => {
if f.alternate() {
write!(f, "invalid regex {} at {url}: {src}", quote(regex))
} else {
write!(f, "invalid regex {} at {url}", quote(regex))
}
}
Self::Bug(src) => {
write!(
f,
"encountered bug in jsonschema compiler. please report: {src}"
)
}
}
}
}
// helpers --
fn to_strings(v: &Value) -> Vec<String> {
if let Value::Array(a) = v {
a.iter()
.filter_map(|t| {
if let Value::String(t) = t {
Some(t.clone())
} else {
None
}
})
.collect()
} else {
vec![]
}
}
pub(crate) struct Queue {
pub(crate) schemas: Vec<UrlPtr>,
pub(crate) roots: HashMap<Url, Root>,
}
impl Queue {
fn new() -> Self {
Self {
schemas: vec![],
roots: HashMap::new(),
}
}
pub(crate) fn resolve_anchor(
&mut self,
uf: UrlFrag,
roots: &Roots,
) -> Result<UrlPtr, CompileError> {
match uf.frag {
Fragment::JsonPointer(ptr) => Ok(UrlPtr { url: uf.url, ptr }),
Fragment::Anchor(_) => {
let root = match roots.get(&uf.url).or_else(|| self.roots.get(&uf.url)) {
Some(root) => root,
None => {
let doc = roots.loader.load(&uf.url)?;
let r = roots.create_root(uf.url.clone(), doc)?;
self.roots.entry(uf.url).or_insert(r)
}
};
root.resolve_fragment(&uf.frag)
}
}
}
pub(crate) fn enqueue_schema(&mut self, schemas: &Schemas, up: UrlPtr) -> SchemaIndex {
if let Some(sch) = schemas.get_by_loc(&up) {
// already got compiled
return sch.idx;
}
if let Some(qindex) = self.schemas.iter().position(|e| *e == up) {
// already queued for compilation
return SchemaIndex(schemas.size() + qindex);
}
// new compilation request
self.schemas.push(up);
SchemaIndex(schemas.size() + self.schemas.len() - 1)
}
}

82
validator/src/content.rs Normal file
View File

@ -0,0 +1,82 @@
use std::{collections::HashMap, error::Error};
use base64::Engine;
use once_cell::sync::Lazy;
use serde::de::IgnoredAny;
use serde_json::Value;
// decoders --
/// Defines Decoder for `contentEncoding`.
#[derive(Clone, Copy)]
pub struct Decoder {
/// Name of the encoding
pub name: &'static str,
/// Decodes given string to bytes
#[allow(clippy::type_complexity)]
pub func: fn(s: &str) -> Result<Vec<u8>, Box<dyn Error>>,
}
pub(crate) static DECODERS: Lazy<HashMap<&'static str, Decoder>> = Lazy::new(|| {
let mut m = HashMap::<&'static str, Decoder>::new();
m.insert(
"base64",
Decoder {
name: "base64",
func: decode_base64,
},
);
m
});
fn decode_base64(s: &str) -> Result<Vec<u8>, Box<dyn Error>> {
Ok(base64::engine::general_purpose::STANDARD.decode(s)?)
}
// mediatypes --
/// Defines Mediatype for `contentMediaType`.
#[derive(Clone, Copy)]
pub struct MediaType {
/// Name of this media-type as defined in RFC 2046.
/// Example: `application/json`
pub name: &'static str,
/// whether this media type can be deserialized to json. If so it can
/// be validated by `contentSchema` keyword.
pub json_compatible: bool,
/**
Check whether `bytes` conforms to this media-type.
Should return `Ok(Some(Value))` if `deserialize` is `true`, otherwise it can return `Ok(None)`.
Ideally you could deserialize to `serde::de::IgnoredAny` if `deserialize` is `false` to gain
some performance.
`deserialize` is always `false` if `json_compatible` is `false`.
*/
#[allow(clippy::type_complexity)]
pub func: fn(bytes: &[u8], deserialize: bool) -> Result<Option<Value>, Box<dyn Error>>,
}
pub(crate) static MEDIA_TYPES: Lazy<HashMap<&'static str, MediaType>> = Lazy::new(|| {
let mut m = HashMap::<&'static str, MediaType>::new();
m.insert(
"application/json",
MediaType {
name: "application/json",
json_compatible: true,
func: check_json,
},
);
m
});
fn check_json(bytes: &[u8], deserialize: bool) -> Result<Option<Value>, Box<dyn Error>> {
if deserialize {
return Ok(Some(serde_json::from_slice(bytes)?));
}
serde_json::from_slice::<IgnoredAny>(bytes)?;
Ok(None)
}

576
validator/src/draft.rs Normal file
View File

@ -0,0 +1,576 @@
use std::{
collections::{hash_map::Entry, HashMap},
str::FromStr,
};
use once_cell::sync::Lazy;
use serde_json::{Map, Value};
use url::Url;
use crate::{compiler::*, root::Resource, util::*, SchemaIndex, Schemas};
const POS_SELF: u8 = 1 << 0;
const POS_PROP: u8 = 1 << 1;
const POS_ITEM: u8 = 1 << 2;
pub(crate) static DRAFT4: Lazy<Draft> = Lazy::new(|| Draft {
version: 4,
id: "id",
url: "http://json-schema.org/draft-04/schema",
subschemas: HashMap::from([
// type agnostic
("definitions", POS_PROP),
("not", POS_SELF),
("allOf", POS_ITEM),
("anyOf", POS_ITEM),
("oneOf", POS_ITEM),
// object
("properties", POS_PROP),
("additionalProperties", POS_SELF),
("patternProperties", POS_PROP),
// array
("items", POS_SELF | POS_ITEM),
("additionalItems", POS_SELF),
("dependencies", POS_PROP),
]),
vocab_prefix: "",
all_vocabs: vec![],
default_vocabs: vec![],
});
pub(crate) static DRAFT6: Lazy<Draft> = Lazy::new(|| {
let mut subschemas = DRAFT4.subschemas.clone();
subschemas.extend([("propertyNames", POS_SELF), ("contains", POS_SELF)]);
Draft {
version: 6,
id: "$id",
url: "http://json-schema.org/draft-06/schema",
subschemas,
vocab_prefix: "",
all_vocabs: vec![],
default_vocabs: vec![],
}
});
pub(crate) static DRAFT7: Lazy<Draft> = Lazy::new(|| {
let mut subschemas = DRAFT6.subschemas.clone();
subschemas.extend([("if", POS_SELF), ("then", POS_SELF), ("else", POS_SELF)]);
Draft {
version: 7,
id: "$id",
url: "http://json-schema.org/draft-07/schema",
subschemas,
vocab_prefix: "",
all_vocabs: vec![],
default_vocabs: vec![],
}
});
pub(crate) static DRAFT2019: Lazy<Draft> = Lazy::new(|| {
let mut subschemas = DRAFT7.subschemas.clone();
subschemas.extend([
("$defs", POS_PROP),
("dependentSchemas", POS_PROP),
("unevaluatedProperties", POS_SELF),
("unevaluatedItems", POS_SELF),
("contentSchema", POS_SELF),
]);
Draft {
version: 2019,
id: "$id",
url: "https://json-schema.org/draft/2019-09/schema",
subschemas,
vocab_prefix: "https://json-schema.org/draft/2019-09/vocab/",
all_vocabs: vec![
"core",
"applicator",
"validation",
"meta-data",
"format",
"content",
],
default_vocabs: vec!["core", "applicator", "validation"],
}
});
pub(crate) static DRAFT2020: Lazy<Draft> = Lazy::new(|| {
let mut subschemas = DRAFT2019.subschemas.clone();
subschemas.extend([("prefixItems", POS_ITEM)]);
Draft {
version: 2020,
id: "$id",
url: "https://json-schema.org/draft/2020-12/schema",
subschemas,
vocab_prefix: "https://json-schema.org/draft/2020-12/vocab/",
all_vocabs: vec![
"core",
"applicator",
"unevaluated",
"validation",
"meta-data",
"format-annotation",
"format-assertion",
"content",
],
default_vocabs: vec!["core", "applicator", "unevaluated", "validation"],
}
});
pub(crate) static STD_METASCHEMAS: Lazy<Schemas> =
Lazy::new(|| load_std_metaschemas().expect("std metaschemas must be compilable"));
pub(crate) fn latest() -> &'static Draft {
crate::Draft::default().internal()
}
// --
pub(crate) struct Draft {
pub(crate) version: usize,
pub(crate) url: &'static str,
id: &'static str, // property name used to represent id
subschemas: HashMap<&'static str, u8>, // location of subschemas
pub(crate) vocab_prefix: &'static str, // prefix used for vocabulary
pub(crate) all_vocabs: Vec<&'static str>, // names of supported vocabs
pub(crate) default_vocabs: Vec<&'static str>, // names of default vocabs
}
impl Draft {
pub(crate) fn from_url(url: &str) -> Option<&'static Draft> {
let (mut url, frag) = split(url);
if !frag.is_empty() {
return None;
}
if let Some(s) = url.strip_prefix("http://") {
url = s;
}
if let Some(s) = url.strip_prefix("https://") {
url = s;
}
match url {
"json-schema.org/schema" => Some(latest()),
"json-schema.org/draft/2020-12/schema" => Some(&DRAFT2020),
"json-schema.org/draft/2019-09/schema" => Some(&DRAFT2019),
"json-schema.org/draft-07/schema" => Some(&DRAFT7),
"json-schema.org/draft-06/schema" => Some(&DRAFT6),
"json-schema.org/draft-04/schema" => Some(&DRAFT4),
_ => None,
}
}
fn get_schema(&self) -> Option<SchemaIndex> {
let url = match self.version {
2020 => "https://json-schema.org/draft/2020-12/schema",
2019 => "https://json-schema.org/draft/2019-09/schema",
7 => "http://json-schema.org/draft-07/schema",
6 => "http://json-schema.org/draft-06/schema",
4 => "http://json-schema.org/draft-04/schema",
_ => return None,
};
let up = UrlPtr {
url: Url::parse(url).unwrap_or_else(|_| panic!("{url} should be valid url")),
ptr: "".into(),
};
STD_METASCHEMAS.get_by_loc(&up).map(|s| s.idx)
}
pub(crate) fn validate(&self, up: &UrlPtr, v: &Value) -> Result<(), CompileError> {
let Some(sch) = self.get_schema() else {
return Err(CompileError::Bug(
format!("no metaschema preloaded for draft {}", self.version).into(),
));
};
STD_METASCHEMAS
.validate(v, sch)
.map_err(|src| CompileError::ValidationError {
url: up.to_string(),
src: src.clone_static(),
})
}
fn get_id<'a>(&self, obj: &'a Map<String, Value>) -> Option<&'a str> {
if self.version < 2019 && obj.contains_key("$ref") {
return None; // All other properties in a "$ref" object MUST be ignored
}
let Some(Value::String(id)) = obj.get(self.id) else {
return None;
};
let (id, _) = split(id); // ignore fragment
Some(id).filter(|id| !id.is_empty())
}
pub(crate) fn get_vocabs(
&self,
url: &Url,
doc: &Value,
) -> Result<Option<Vec<String>>, CompileError> {
if self.version < 2019 {
return Ok(None);
}
let Value::Object(obj) = doc else {
return Ok(None);
};
let Some(Value::Object(obj)) = obj.get("$vocabulary") else {
return Ok(None);
};
let mut vocabs = vec![];
for (vocab, reqd) in obj {
if let Value::Bool(true) = reqd {
let name = vocab
.strip_prefix(self.vocab_prefix)
.filter(|name| self.all_vocabs.contains(name));
if let Some(name) = name {
vocabs.push(name.to_owned()); // todo: avoid alloc
} else {
return Err(CompileError::UnsupportedVocabulary {
url: url.as_str().to_owned(),
vocabulary: vocab.to_owned(),
});
}
}
}
Ok(Some(vocabs))
}
// collects anchors/dynamic_achors from `sch` into `res`.
// note this does not collect from subschemas in sch.
pub(crate) fn collect_anchors(
&self,
sch: &Value,
sch_ptr: &JsonPointer,
res: &mut Resource,
url: &Url,
) -> Result<(), CompileError> {
let Value::Object(obj) = sch else {
return Ok(());
};
let mut add_anchor = |anchor: Anchor| match res.anchors.entry(anchor) {
Entry::Occupied(entry) => {
if entry.get() == sch_ptr {
// anchor with same root_ptr already exists
return Ok(());
}
Err(CompileError::DuplicateAnchor {
url: url.as_str().to_owned(),
anchor: entry.key().to_string(),
ptr1: entry.get().to_string(),
ptr2: sch_ptr.to_string(),
})
}
entry => {
entry.or_insert(sch_ptr.to_owned());
Ok(())
}
};
if self.version < 2019 {
if obj.contains_key("$ref") {
return Ok(()); // All other properties in a "$ref" object MUST be ignored
}
// anchor is specified in id
if let Some(Value::String(id)) = obj.get(self.id) {
let Ok((_, frag)) = Fragment::split(id) else {
let loc = UrlFrag::format(url, sch_ptr.as_str());
return Err(CompileError::ParseAnchorError { loc });
};
if let Fragment::Anchor(anchor) = frag {
add_anchor(anchor)?;
};
return Ok(());
}
}
if self.version >= 2019 {
if let Some(Value::String(anchor)) = obj.get("$anchor") {
add_anchor(anchor.as_str().into())?;
}
}
if self.version >= 2020 {
if let Some(Value::String(anchor)) = obj.get("$dynamicAnchor") {
add_anchor(anchor.as_str().into())?;
res.dynamic_anchors.insert(anchor.as_str().into());
}
}
Ok(())
}
// error is json-ptr to invalid id
pub(crate) fn collect_resources(
&self,
sch: &Value,
base: &Url, // base of json
sch_ptr: JsonPointer, // ptr of json
url: &Url,
resources: &mut HashMap<JsonPointer, Resource>,
) -> Result<(), CompileError> {
if resources.contains_key(&sch_ptr) {
// resources are already collected
return Ok(());
}
if let Value::Bool(_) = sch {
if sch_ptr.is_empty() {
// root resource
resources.insert(sch_ptr.clone(), Resource::new(sch_ptr, base.clone()));
}
return Ok(());
}
let Value::Object(obj) = sch else {
return Ok(());
};
let mut base = base;
let tmp;
let res = if let Some(id) = self.get_id(obj) {
let Ok(id) = UrlFrag::join(base, id) else {
let loc = UrlFrag::format(url, sch_ptr.as_str());
return Err(CompileError::ParseIdError { loc });
};
tmp = id.url;
base = &tmp;
Some(Resource::new(sch_ptr.clone(), base.clone()))
} else if sch_ptr.is_empty() {
// root resource
Some(Resource::new(sch_ptr.clone(), base.clone()))
} else {
None
};
if let Some(res) = res {
if let Some(dup) = resources.values_mut().find(|res| res.id == *base) {
return Err(CompileError::DuplicateId {
url: url.to_string(),
id: base.to_string(),
ptr1: res.ptr.to_string(),
ptr2: dup.ptr.to_string(),
});
}
resources.insert(sch_ptr.clone(), res);
}
// collect anchors into base resource
if let Some(res) = resources.values_mut().find(|res| res.id == *base) {
self.collect_anchors(sch, &sch_ptr, res, url)?;
} else {
debug_assert!(false, "base resource must exist");
}
for (&kw, &pos) in &self.subschemas {
let Some(v) = obj.get(kw) else {
continue;
};
if pos & POS_SELF != 0 {
let ptr = sch_ptr.append(kw);
self.collect_resources(v, base, ptr, url, resources)?;
}
if pos & POS_ITEM != 0 {
if let Value::Array(arr) = v {
for (i, item) in arr.iter().enumerate() {
let ptr = sch_ptr.append2(kw, &i.to_string());
self.collect_resources(item, base, ptr, url, resources)?;
}
}
}
if pos & POS_PROP != 0 {
if let Value::Object(obj) = v {
for (pname, pvalue) in obj {
let ptr = sch_ptr.append2(kw, pname);
self.collect_resources(pvalue, base, ptr, url, resources)?;
}
}
}
}
Ok(())
}
pub(crate) fn is_subschema(&self, ptr: &str) -> bool {
if ptr.is_empty() {
return true;
}
fn split(mut ptr: &str) -> (&str, &str) {
ptr = &ptr[1..]; // rm `/` prefix
if let Some(i) = ptr.find('/') {
(&ptr[..i], &ptr[i..])
} else {
(ptr, "")
}
}
let (tok, ptr) = split(ptr);
if let Some(&pos) = self.subschemas.get(tok) {
if pos & POS_SELF != 0 && self.is_subschema(ptr) {
return true;
}
if !ptr.is_empty() {
if pos & POS_PROP != 0 {
let (_, ptr) = split(ptr);
if self.is_subschema(ptr) {
return true;
}
}
if pos & POS_ITEM != 0 {
let (tok, ptr) = split(ptr);
if usize::from_str(tok).is_ok() && self.is_subschema(ptr) {
return true;
}
}
}
}
false
}
}
fn load_std_metaschemas() -> Result<Schemas, CompileError> {
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.enable_format_assertions();
compiler.compile("https://json-schema.org/draft/2020-12/schema", &mut schemas)?;
compiler.compile("https://json-schema.org/draft/2019-09/schema", &mut schemas)?;
compiler.compile("http://json-schema.org/draft-07/schema", &mut schemas)?;
compiler.compile("http://json-schema.org/draft-06/schema", &mut schemas)?;
compiler.compile("http://json-schema.org/draft-04/schema", &mut schemas)?;
Ok(schemas)
}
#[cfg(test)]
mod tests {
use crate::{Compiler, Schemas};
use super::*;
#[test]
fn test_meta() {
let mut schemas = Schemas::default();
let mut compiler = Compiler::default();
let v: Value = serde_json::from_str(include_str!("metaschemas/draft-04/schema")).unwrap();
let url = "https://json-schema.org/draft-04/schema";
compiler.add_resource(url, v).unwrap();
compiler.compile(url, &mut schemas).unwrap();
}
#[test]
fn test_from_url() {
let tests = [
("http://json-schema.org/draft/2020-12/schema", Some(2020)), // http url
("https://json-schema.org/draft/2020-12/schema", Some(2020)), // https url
("https://json-schema.org/schema", Some(latest().version)), // latest
("https://json-schema.org/draft-04/schema", Some(4)),
];
for (url, version) in tests {
let got = Draft::from_url(url).map(|d| d.version);
assert_eq!(got, version, "for {url}");
}
}
#[test]
fn test_collect_ids() {
let url = Url::parse("http://a.com/schema.json").unwrap();
let json: Value = serde_json::from_str(
r#"{
"id": "http://a.com/schemas/schema.json",
"definitions": {
"s1": { "id": "http://a.com/definitions/s1" },
"s2": {
"id": "../s2",
"items": [
{ "id": "http://c.com/item" },
{ "id": "http://d.com/item" }
]
},
"s3": {
"definitions": {
"s1": {
"id": "s3",
"items": {
"id": "http://b.com/item"
}
}
}
},
"s4": { "id": "http://e.com/def#abcd" }
}
}"#,
)
.unwrap();
let want = {
let mut m = HashMap::new();
m.insert("", "http://a.com/schemas/schema.json"); // root with id
m.insert("/definitions/s1", "http://a.com/definitions/s1");
m.insert("/definitions/s2", "http://a.com/s2"); // relative id
m.insert("/definitions/s3/definitions/s1", "http://a.com/schemas/s3");
m.insert("/definitions/s3/definitions/s1/items", "http://b.com/item");
m.insert("/definitions/s2/items/0", "http://c.com/item");
m.insert("/definitions/s2/items/1", "http://d.com/item");
m.insert("/definitions/s4", "http://e.com/def"); // id with fragments
m
};
let mut got = HashMap::new();
DRAFT4
.collect_resources(&json, &url, "".into(), &url, &mut got)
.unwrap();
let got = got
.iter()
.map(|(k, v)| (k.as_str(), v.id.as_str()))
.collect::<HashMap<&str, &str>>();
assert_eq!(got, want);
}
#[test]
fn test_collect_anchors() {
let url = Url::parse("http://a.com/schema.json").unwrap();
let json: Value = serde_json::from_str(
r#"{
"$defs": {
"s2": {
"$id": "http://b.com",
"$anchor": "b1",
"items": [
{ "$anchor": "b2" },
{
"$id": "http//c.com",
"items": [
{"$anchor": "c1"},
{"$dynamicAnchor": "c2"}
]
},
{ "$dynamicAnchor": "b3" }
]
}
}
}"#,
)
.unwrap();
let mut resources = HashMap::new();
DRAFT2020
.collect_resources(&json, &url, "".into(), &url, &mut resources)
.unwrap();
assert!(resources.get("").unwrap().anchors.is_empty());
assert_eq!(resources.get("/$defs/s2").unwrap().anchors, {
let mut want = HashMap::new();
want.insert("b1".into(), "/$defs/s2".into());
want.insert("b2".into(), "/$defs/s2/items/0".into());
want.insert("b3".into(), "/$defs/s2/items/2".into());
want
});
assert_eq!(resources.get("/$defs/s2/items/1").unwrap().anchors, {
let mut want = HashMap::new();
want.insert("c1".into(), "/$defs/s2/items/1/items/0".into());
want.insert("c2".into(), "/$defs/s2/items/1/items/1".into());
want
});
}
#[test]
fn test_is_subschema() {
let tests = vec![("/allOf/0", true), ("/allOf/$defs", false)];
for test in tests {
let got = DRAFT2020.is_subschema(test.0);
assert_eq!(got, test.1, "{}", test.0);
}
}
}

197
validator/src/ecma.rs Normal file
View File

@ -0,0 +1,197 @@
use std::borrow::Cow;
use regex_syntax::ast::parse::Parser;
use regex_syntax::ast::{self, *};
// covert ecma regex to rust regex if possible
// see https://262.ecma-international.org/11.0/#sec-regexp-regular-expression-objects
pub(crate) fn convert(pattern: &str) -> Result<Cow<'_, str>, Box<dyn std::error::Error>> {
let mut pattern = Cow::Borrowed(pattern);
let mut ast = loop {
match Parser::new().parse(pattern.as_ref()) {
Ok(ast) => break ast,
Err(e) => {
if let Some(s) = fix_error(&e) {
pattern = Cow::Owned(s);
} else {
Err(e)?;
}
}
}
};
loop {
let translator = Translator {
pat: pattern.as_ref(),
out: None,
};
if let Some(updated_pattern) = ast::visit(&ast, translator)? {
match Parser::new().parse(&updated_pattern) {
Ok(updated_ast) => {
pattern = Cow::Owned(updated_pattern);
ast = updated_ast;
}
Err(e) => {
debug_assert!(
false,
"ecma::translate changed {:?} to {:?}: {e}",
pattern, updated_pattern
);
break;
}
}
} else {
break;
}
}
Ok(pattern)
}
fn fix_error(e: &Error) -> Option<String> {
if let ErrorKind::EscapeUnrecognized = e.kind() {
let (start, end) = (e.span().start.offset, e.span().end.offset);
let s = &e.pattern()[start..end];
if let r"\c" = s {
// handle \c{control_letter}
if let Some(control_letter) = e.pattern()[end..].chars().next() {
if control_letter.is_ascii_alphabetic() {
return Some(format!(
"{}{}{}",
&e.pattern()[..start],
((control_letter as u8) % 32) as char,
&e.pattern()[end + 1..],
));
}
}
}
}
None
}
/**
handles following translations:
- \d should ascii digits only. so replace with [0-9]
- \D should match everything but ascii digits. so replace with [^0-9]
- \w should match ascii letters only. so replace with [a-zA-Z0-9_]
- \W should match everything but ascii letters. so replace with [^a-zA-Z0-9_]
- \s and \S differences
- \a is not an ECMA 262 control escape
*/
struct Translator<'a> {
pat: &'a str,
out: Option<String>,
}
impl Translator<'_> {
fn replace(&mut self, span: &Span, with: &str) {
let (start, end) = (span.start.offset, span.end.offset);
self.out = Some(format!("{}{with}{}", &self.pat[..start], &self.pat[end..]));
}
fn replace_class_class(&mut self, perl: &ClassPerl) {
match perl.kind {
ClassPerlKind::Digit => {
self.replace(&perl.span, if perl.negated { "[^0-9]" } else { "[0-9]" });
}
ClassPerlKind::Word => {
let with = &if perl.negated {
"[^A-Za-z0-9_]"
} else {
"[A-Za-z0-9_]"
};
self.replace(&perl.span, with);
}
ClassPerlKind::Space => {
let with = &if perl.negated {
"[^ \t\n\r\u{000b}\u{000c}\u{00a0}\u{feff}\u{2003}\u{2029}]"
} else {
"[ \t\n\r\u{000b}\u{000c}\u{00a0}\u{feff}\u{2003}\u{2029}]"
};
self.replace(&perl.span, with);
}
}
}
}
impl Visitor for Translator<'_> {
type Output = Option<String>;
type Err = &'static str;
fn finish(self) -> Result<Self::Output, Self::Err> {
Ok(self.out)
}
fn visit_class_set_item_pre(&mut self, ast: &ast::ClassSetItem) -> Result<(), Self::Err> {
if let ClassSetItem::Perl(perl) = ast {
self.replace_class_class(perl);
}
Ok(())
}
fn visit_post(&mut self, ast: &Ast) -> Result<(), Self::Err> {
if self.out.is_some() {
return Ok(());
}
match ast {
Ast::ClassPerl(perl) => {
self.replace_class_class(perl);
}
Ast::Literal(ref literal) => {
if let Literal {
kind: LiteralKind::Special(SpecialLiteralKind::Bell),
..
} = literal.as_ref()
{
return Err("\\a is not an ECMA 262 control escape");
}
}
_ => (),
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ecma_compat_valid() {
// println!("{:#?}", Parser::new().parse(r#"a\a"#));
let tests = [
(r"ab\cAcde\cBfg", "ab\u{1}cde\u{2}fg"), // \c{control_letter}
(r"\\comment", r"\\comment"), // there is no \c
(r"ab\def", r#"ab[0-9]ef"#), // \d
(r"ab[a-z\d]ef", r#"ab[a-z[0-9]]ef"#), // \d inside classSet
(r"ab\Def", r#"ab[^0-9]ef"#), // \d
(r"ab[a-z\D]ef", r#"ab[a-z[^0-9]]ef"#), // \D inside classSet
];
for (input, want) in tests {
match convert(input) {
Ok(got) => {
if got.as_ref() != want {
panic!("convert({input:?}): got: {got:?}, want: {want:?}");
}
}
Err(e) => {
panic!("convert({input:?}) failed: {e}");
}
}
}
}
#[test]
fn test_ecma_compat_invalid() {
// println!("{:#?}", Parser::new().parse(r#"a\a"#));
let tests = [
r"\c\n", // \c{invalid_char}
r"abc\adef", // \a is not valid
];
for input in tests {
if convert(input).is_ok() {
panic!("convert({input:?}) mut fail");
}
}
}
}

838
validator/src/formats.rs Normal file
View File

@ -0,0 +1,838 @@
use std::{
collections::HashMap,
error::Error,
net::{Ipv4Addr, Ipv6Addr},
};
use once_cell::sync::Lazy;
use percent_encoding::percent_decode_str;
use serde_json::Value;
use url::Url;
use crate::ecma;
/// Defines format for `format` keyword.
#[derive(Clone, Copy)]
pub struct Format {
/// Name of the format
pub name: &'static str,
/// validates given value.
pub func: fn(v: &Value) -> Result<(), Box<dyn Error>>,
}
pub(crate) static FORMATS: Lazy<HashMap<&'static str, Format>> = Lazy::new(|| {
let mut m = HashMap::<&'static str, Format>::new();
let mut register = |name, func| m.insert(name, Format { name, func });
register("regex", validate_regex);
register("ipv4", validate_ipv4);
register("ipv6", validate_ipv6);
register("hostname", validate_hostname);
register("idn-hostname", validate_idn_hostname);
register("email", validate_email);
register("idn-email", validate_idn_email);
register("date", validate_date);
register("time", validate_time);
register("date-time", validate_date_time);
register("duration", validate_duration);
register("period", validate_period);
register("json-pointer", validate_json_pointer);
register("relative-json-pointer", validate_relative_json_pointer);
register("uuid", validate_uuid);
register("uri", validate_uri);
register("iri", validate_iri);
register("uri-reference", validate_uri_reference);
register("iri-reference", validate_iri_reference);
register("uri-template", validate_uri_template);
m
});
fn validate_regex(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
ecma::convert(s).map(|_| ())
}
fn validate_ipv4(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
s.parse::<Ipv4Addr>()?;
Ok(())
}
fn validate_ipv6(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
s.parse::<Ipv6Addr>()?;
Ok(())
}
fn validate_date(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_date(s)
}
fn matches_char(s: &str, index: usize, ch: char) -> bool {
s.is_char_boundary(index) && s[index..].starts_with(ch)
}
// see https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
fn check_date(s: &str) -> Result<(), Box<dyn Error>> {
// yyyy-mm-dd
if s.len() != 10 {
Err("must be 10 characters long")?;
}
if !matches_char(s, 4, '-') || !matches_char(s, 7, '-') {
Err("missing hyphen in correct place")?;
}
let mut ymd = s.splitn(3, '-').filter_map(|t| t.parse::<usize>().ok());
let (Some(y), Some(m), Some(d)) = (ymd.next(), ymd.next(), ymd.next()) else {
Err("non-positive year/month/day")?
};
if !matches!(m, 1..=12) {
Err(format!("{m} months in year"))?;
}
if !matches!(d, 1..=31) {
Err(format!("{d} days in month"))?;
}
match m {
2 => {
let mut feb_days = 28;
if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
feb_days += 1; // leap year
};
if d > feb_days {
Err(format!("february has {feb_days} days only"))?;
}
}
4 | 6 | 9 | 11 => {
if d > 30 {
Err("month has 30 days only")?;
}
}
_ => {}
}
Ok(())
}
fn validate_time(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_time(s)
}
fn check_time(mut str: &str) -> Result<(), Box<dyn Error>> {
// min: hh:mm:ssZ
if str.len() < 9 {
Err("less than 9 characters long")?
}
if !matches_char(str, 2, ':') || !matches_char(str, 5, ':') {
Err("missing colon in correct place")?
}
// parse hh:mm:ss
if !str.is_char_boundary(8) {
Err("contains non-ascii char")?
}
let mut hms = (str[..8])
.splitn(3, ':')
.filter_map(|t| t.parse::<usize>().ok());
let (Some(mut h), Some(mut m), Some(s)) = (hms.next(), hms.next(), hms.next()) else {
Err("non-positive hour/min/sec")?
};
if h > 23 || m > 59 || s > 60 {
Err("hour/min/sec out of range")?
}
str = &str[8..];
// parse sec-frac if present
if let Some(rem) = str.strip_prefix('.') {
let n_digits = rem.chars().take_while(char::is_ascii_digit).count();
if n_digits == 0 {
Err("no digits in second fraction")?;
}
str = &rem[n_digits..];
}
if str != "z" && str != "Z" {
// parse time-numoffset
if str.len() != 6 {
Err("offset must be 6 characters long")?;
}
let sign: isize = match str.chars().next() {
Some('+') => -1,
Some('-') => 1,
_ => return Err("offset must begin with plus/minus")?,
};
str = &str[1..];
if !matches_char(str, 2, ':') {
Err("missing colon in offset at correct place")?
}
let mut zhm = str.splitn(2, ':').filter_map(|t| t.parse::<usize>().ok());
let (Some(zh), Some(zm)) = (zhm.next(), zhm.next()) else {
Err("non-positive hour/min in offset")?
};
if zh > 23 || zm > 59 {
Err("hour/min in offset out of range")?
}
// apply timezone
let mut hm = (h * 60 + m) as isize + sign * (zh * 60 + zm) as isize;
if hm < 0 {
hm += 24 * 60;
debug_assert!(hm >= 0);
}
let hm = hm as usize;
(h, m) = (hm / 60, hm % 60);
}
// check leap second
if !(s < 60 || (h == 23 && m == 59)) {
Err("invalid leap second")?
}
Ok(())
}
fn validate_date_time(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_date_time(s)
}
fn check_date_time(s: &str) -> Result<(), Box<dyn Error>> {
// min: yyyy-mm-ddThh:mm:ssZ
if s.len() < 20 {
Err("less than 20 characters long")?;
}
if !s.is_char_boundary(10) || !s[10..].starts_with(['t', 'T']) {
Err("11th character must be t or T")?;
}
if let Err(e) = check_date(&s[..10]) {
Err(format!("invalid date element: {e}"))?;
}
if let Err(e) = check_time(&s[11..]) {
Err(format!("invalid time element: {e}"))?;
}
Ok(())
}
fn validate_duration(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_duration(s)
}
// see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
fn check_duration(s: &str) -> Result<(), Box<dyn Error>> {
// must start with 'P'
let Some(s) = s.strip_prefix('P') else {
Err("must start with P")?
};
if s.is_empty() {
Err("nothing after P")?
}
// dur-week
if let Some(s) = s.strip_suffix('W') {
if s.is_empty() {
Err("no number in week")?
}
if !s.chars().all(|c| c.is_ascii_digit()) {
Err("invalid week")?
}
return Ok(());
}
static UNITS: [&str; 2] = ["YMD", "HMS"];
for (i, s) in s.split('T').enumerate() {
let mut s = s;
if i != 0 && s.is_empty() {
Err("no time elements")?
}
let Some(mut units) = UNITS.get(i).cloned() else {
Err("more than one T")?
};
while !s.is_empty() {
let digit_count = s.chars().take_while(char::is_ascii_digit).count();
if digit_count == 0 {
Err("missing number")?
}
s = &s[digit_count..];
let Some(unit) = s.chars().next() else {
Err("missing unit")?
};
let Some(j) = units.find(unit) else {
if UNITS[i].contains(unit) {
Err(format!("unit {unit} out of order"))?
}
Err(format!("invalid unit {unit}"))?
};
units = &units[j + 1..];
s = &s[1..];
}
}
Ok(())
}
// see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
fn validate_period(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
let Some(slash) = s.find('/') else {
Err("missing slash")?
};
let (start, end) = (&s[..slash], &s[slash + 1..]);
if start.starts_with('P') {
if let Err(e) = check_duration(start) {
Err(format!("invalid start duration: {e}"))?
}
if let Err(e) = check_date_time(end) {
Err(format!("invalid end date-time: {e}"))?
}
} else {
if let Err(e) = check_date_time(start) {
Err(format!("invalid start date-time: {e}"))?
}
if end.starts_with('P') {
if let Err(e) = check_duration(end) {
Err(format!("invalid end duration: {e}"))?;
}
} else if let Err(e) = check_date_time(end) {
Err(format!("invalid end date-time: {e}"))?;
}
}
Ok(())
}
fn validate_hostname(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_hostname(s)
}
// see https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
fn check_hostname(mut s: &str) -> Result<(), Box<dyn Error>> {
// entire hostname (including the delimiting dots but not a trailing dot) has a maximum of 253 ASCII characters
s = s.strip_suffix('.').unwrap_or(s);
if s.len() > 253 {
Err("more than 253 characters long")?
}
// Hostnames are composed of series of labels concatenated with dots, as are all domain names
for label in s.split('.') {
// Each label must be from 1 to 63 characters long
if !matches!(label.len(), 1..=63) {
Err("label must be 1 to 63 characters long")?;
}
// labels must not start or end with a hyphen
if label.starts_with('-') {
Err("label starts with hyphen")?;
}
if label.ends_with('-') {
Err("label ends with hyphen")?;
}
// labels may contain only the ASCII letters 'a' through 'z' (in a case-insensitive manner),
// the digits '0' through '9', and the hyphen ('-')
if let Some(ch) = label
.chars()
.find(|c| !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-'))
{
Err(format!("invalid character {ch:?}"))?;
}
}
Ok(())
}
fn validate_idn_hostname(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_idn_hostname(s)
}
fn check_idn_hostname(s: &str) -> Result<(), Box<dyn Error>> {
let s = idna::domain_to_ascii_strict(s)?;
let unicode = idna::domain_to_unicode(&s).0;
// see https://www.rfc-editor.org/rfc/rfc5892#section-2.6
{
static DISALLOWED: [char; 10] = [
'\u{0640}', // ARABIC TATWEEL
'\u{07FA}', // NKO LAJANYALAN
'\u{302E}', // HANGUL SINGLE DOT TONE MARK
'\u{302F}', // HANGUL DOUBLE DOT TONE MARK
'\u{3031}', // VERTICAL KANA REPEAT MARK
'\u{3032}', // VERTICAL KANA REPEAT WITH VOICED SOUND MARK
'\u{3033}', // VERTICAL KANA REPEAT MARK UPPER HALF
'\u{3034}', // VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HA
'\u{3035}', // VERTICAL KANA REPEAT MARK LOWER HALF
'\u{303B}', // VERTICAL IDEOGRAPHIC ITERATION MARK
];
if unicode.contains(DISALLOWED) {
Err("contains disallowed character")?;
}
}
// unicode string must not contain "--" in 3rd and 4th position
// and must not start and end with a '-'
// see https://www.rfc-editor.org/rfc/rfc5891#section-4.2.3.1
{
let count: usize = unicode
.chars()
.skip(2)
.take(2)
.map(|c| if c == '-' { 1 } else { 0 })
.sum();
if count == 2 {
Err("unicode string must not contain '--' in 3rd and 4th position")?;
}
}
// MIDDLE DOT is allowed between 'l' characters only
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.3
{
let middle_dot = '\u{00b7}';
let mut s = unicode.as_str();
while let Some(i) = s.find(middle_dot) {
let prefix = &s[..i];
let suffix = &s[i + middle_dot.len_utf8()..];
if !prefix.ends_with('l') || !suffix.ends_with('l') {
Err("MIDDLE DOT is allowed between 'l' characters only")?;
}
s = suffix;
}
}
// Greek KERAIA must be followed by Greek character
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.4
{
let keralia = '\u{0375}';
let greek = '\u{0370}'..='\u{03FF}';
let mut s = unicode.as_str();
while let Some(i) = s.find(keralia) {
let suffix = &s[i + keralia.len_utf8()..];
if !suffix.starts_with(|c| greek.contains(&c)) {
Err("Greek KERAIA must be followed by Greek character")?;
}
s = suffix;
}
}
// Hebrew GERESH must be preceded by Hebrew character
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.5
//
// Hebrew GERSHAYIM must be preceded by Hebrew character
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.6
{
let geresh = '\u{05F3}';
let gereshayim = '\u{05F4}';
let hebrew = '\u{0590}'..='\u{05FF}';
for ch in [geresh, gereshayim] {
let mut s = unicode.as_str();
while let Some(i) = s.find(ch) {
let prefix = &s[..i];
let suffix = &s[i + ch.len_utf8()..];
if !prefix.ends_with(|c| hebrew.contains(&c)) {
if i == 0 {
Err("Hebrew GERESH must be preceded by Hebrew character")?;
} else {
Err("Hebrew GERESHYIM must be preceded by Hebrew character")?;
}
}
s = suffix;
}
}
}
// KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.7
{
let katakana_middle_dot = '\u{30FB}';
let hiragana = '\u{3040}'..='\u{309F}';
let katakana = '\u{30A0}'..='\u{30FF}';
let han = '\u{4E00}'..='\u{9FFF}'; // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block): is this range correct??
if unicode.contains(katakana_middle_dot) {
if unicode.contains(|c| hiragana.contains(&c))
|| unicode.contains(|c| c != katakana_middle_dot && katakana.contains(&c))
|| unicode.contains(|c| han.contains(&c))
{
// ok
} else {
Err("KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han")?;
}
}
}
// ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.8
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.9
{
let arabic_indic_digits = '\u{0660}'..='\u{0669}';
let extended_arabic_indic_digits = '\u{06F0}'..='\u{06F9}';
if unicode.contains(|c| arabic_indic_digits.contains(&c))
&& unicode.contains(|c| extended_arabic_indic_digits.contains(&c))
{
Err("ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed")?;
}
}
// ZERO WIDTH JOINER must be preceded by Virama
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.2
{
let zero_width_jointer = '\u{200D}';
static VIRAMA: [char; 61] = [
'\u{094D}',
'\u{09CD}',
'\u{0A4D}',
'\u{0ACD}',
'\u{0B4D}',
'\u{0BCD}',
'\u{0C4D}',
'\u{0CCD}',
'\u{0D3B}',
'\u{0D3C}',
'\u{0D4D}',
'\u{0DCA}',
'\u{0E3A}',
'\u{0EBA}',
'\u{0F84}',
'\u{1039}',
'\u{103A}',
'\u{1714}',
'\u{1734}',
'\u{17D2}',
'\u{1A60}',
'\u{1B44}',
'\u{1BAA}',
'\u{1BAB}',
'\u{1BF2}',
'\u{1BF3}',
'\u{2D7F}',
'\u{A806}',
'\u{A82C}',
'\u{A8C4}',
'\u{A953}',
'\u{A9C0}',
'\u{AAF6}',
'\u{ABED}',
'\u{10A3F}',
'\u{11046}',
'\u{1107F}',
'\u{110B9}',
'\u{11133}',
'\u{11134}',
'\u{111C0}',
'\u{11235}',
'\u{112EA}',
'\u{1134D}',
'\u{11442}',
'\u{114C2}',
'\u{115BF}',
'\u{1163F}',
'\u{116B6}',
'\u{1172B}',
'\u{11839}',
'\u{1193D}',
'\u{1193E}',
'\u{119E0}',
'\u{11A34}',
'\u{11A47}',
'\u{11A99}',
'\u{11C3F}',
'\u{11D44}',
'\u{11D45}',
'\u{11D97}',
]; // https://www.compart.com/en/unicode/combining/9
let mut s = unicode.as_str();
while let Some(i) = s.find(zero_width_jointer) {
let prefix = &s[..i];
let suffix = &s[i + zero_width_jointer.len_utf8()..];
if !prefix.ends_with(VIRAMA) {
Err("ZERO WIDTH JOINER must be preceded by Virama")?;
}
s = suffix;
}
}
check_hostname(&s)
}
fn validate_email(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_email(s)
}
// see https://en.wikipedia.org/wiki/Email_address
fn check_email(s: &str) -> Result<(), Box<dyn Error>> {
// entire email address to be no more than 254 characters long
if s.len() > 254 {
Err("more than 254 characters long")?
}
// email address is generally recognized as having two parts joined with an at-sign
let Some(at) = s.rfind('@') else {
Err("missing @")?
};
let (local, domain) = (&s[..at], &s[at + 1..]);
// local part may be up to 64 characters long
if local.len() > 64 {
Err("local part more than 64 characters long")?
}
if local.len() > 1 && local.starts_with('"') && local.ends_with('"') {
// quoted
let local = &local[1..local.len() - 1];
if local.contains(['\\', '"']) {
Err("backslash and quote not allowed within quoted local part")?
}
} else {
// unquoted
if local.starts_with('.') {
Err("starts with dot")?
}
if local.ends_with('.') {
Err("ends with dot")?
}
// consecutive dots not allowed
if local.contains("..") {
Err("consecutive dots")?
}
// check allowd chars
if let Some(ch) = local
.chars()
.find(|c| !(c.is_ascii_alphanumeric() || ".!#$%&'*+-/=?^_`{|}~".contains(*c)))
{
Err(format!("invalid character {ch:?}"))?
}
}
// domain if enclosed in brackets, must match an IP address
if domain.starts_with('[') && domain.ends_with(']') {
let s = &domain[1..domain.len() - 1];
if let Some(s) = s.strip_prefix("IPv6:") {
if let Err(e) = s.parse::<Ipv6Addr>() {
Err(format!("invalid ipv6 address: {e}"))?
}
return Ok(());
}
if let Err(e) = s.parse::<Ipv4Addr>() {
Err(format!("invalid ipv4 address: {e}"))?
}
return Ok(());
}
// domain must match the requirements for a hostname
if let Err(e) = check_hostname(domain) {
Err(format!("invalid domain: {e}"))?
}
Ok(())
}
fn validate_idn_email(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
let Some(at) = s.rfind('@') else {
Err("missing @")?
};
let (local, domain) = (&s[..at], &s[at + 1..]);
let local = idna::domain_to_ascii_strict(local)?;
let domain = idna::domain_to_ascii_strict(domain)?;
if let Err(e) = check_idn_hostname(&domain) {
Err(format!("invalid domain: {e}"))?
}
check_email(&format!("{local}@{domain}"))
}
fn validate_json_pointer(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_json_pointer(s)
}
// see https://www.rfc-editor.org/rfc/rfc6901#section-3
fn check_json_pointer(s: &str) -> Result<(), Box<dyn Error>> {
if s.is_empty() {
return Ok(());
}
if !s.starts_with('/') {
Err("not starting with slash")?;
}
for token in s.split('/').skip(1) {
let mut chars = token.chars();
while let Some(ch) = chars.next() {
if ch == '~' {
if !matches!(chars.next(), Some('0' | '1')) {
Err("~ must be followed by 0 or 1")?;
}
} else if !matches!(ch, '\x00'..='\x2E' | '\x30'..='\x7D' | '\x7F'..='\u{10FFFF}') {
Err("contains disallowed character")?;
}
}
}
Ok(())
}
// see https://tools.ietf.org/html/draft-handrews-relative-json-pointer-01#section-3
fn validate_relative_json_pointer(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
// start with non-negative-integer
let num_digits = s.chars().take_while(char::is_ascii_digit).count();
if num_digits == 0 {
Err("must start with non-negative integer")?;
}
if num_digits > 1 && s.starts_with('0') {
Err("starts with zero")?;
}
let s = &s[num_digits..];
// followed by either json-pointer or '#'
if s == "#" {
return Ok(());
}
if let Err(e) = check_json_pointer(s) {
Err(format!("invalid json-pointer element: {e}"))?;
}
Ok(())
}
// see https://datatracker.ietf.org/doc/html/rfc4122#page-4
fn validate_uuid(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
static HEX_GROUPS: [usize; 5] = [8, 4, 4, 4, 12];
let mut i = 0;
for group in s.split('-') {
if i >= HEX_GROUPS.len() {
Err("more than 5 elements")?;
}
if group.len() != HEX_GROUPS[i] {
Err(format!(
"element {} must be {} characters long",
i + 1,
HEX_GROUPS[i]
))?;
}
if let Some(ch) = group.chars().find(|c| !c.is_ascii_hexdigit()) {
Err(format!("non-hex character {ch:?}"))?;
}
i += 1;
}
if i != HEX_GROUPS.len() {
Err("must have 5 elements")?;
}
Ok(())
}
fn validate_uri(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
if fluent_uri::UriRef::parse(s.as_str())?.scheme().is_none() {
Err("relative url")?;
};
Ok(())
}
fn validate_iri(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
match Url::parse(s) {
Ok(_) => Ok(()),
Err(url::ParseError::RelativeUrlWithoutBase) => Err("relative url")?,
Err(e) => Err(e)?,
}
}
static TEMP_URL: Lazy<Url> = Lazy::new(|| Url::parse("http://temp.com").unwrap());
fn parse_uri_reference(s: &str) -> Result<Url, Box<dyn Error>> {
if s.contains('\\') {
Err("contains \\\\")?;
}
Ok(TEMP_URL.join(s)?)
}
fn validate_uri_reference(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
fluent_uri::UriRef::parse(s.as_str())?;
Ok(())
}
fn validate_iri_reference(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
parse_uri_reference(s)?;
Ok(())
}
fn validate_uri_template(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
let url = parse_uri_reference(s)?;
let path = url.path();
// path we got has curly bases percent encoded
let path = percent_decode_str(path).decode_utf8()?;
// ensure curly brackets are not nested and balanced
for part in path.as_ref().split('/') {
let mut want = true;
for got in part
.chars()
.filter(|c| matches!(c, '{' | '}'))
.map(|c| c == '{')
{
if got != want {
Err("nested curly braces")?;
}
want = !want;
}
if !want {
Err("no matching closing brace")?
}
}
Ok(())
}

716
validator/src/lib.rs Normal file
View File

@ -0,0 +1,716 @@
/*! This crate supports JsonSchema validation for drafts `2020-12`, `2019-09`, `7`, `6` and `4`.
```rust,no_run
# use std::fs::File;
# use std::error::Error;
# use boon::*;
# use serde_json::Value;
# fn main() -> Result<(), Box<dyn Error>>{
let mut schemas = Schemas::new(); // container for compiled schemas
let mut compiler = Compiler::new();
let sch_index = compiler.compile("schema.json", &mut schemas)?;
let instance: Value = serde_json::from_reader(File::open("instance.json")?)?;
let valid = schemas.validate(&instance, sch_index).is_ok();
# Ok(())
# }
```
If schema file has no `$schema`, it assumes latest draft.
You can override this:
```rust,no_run
# use boon::*;
# let mut compiler = Compiler::new();
compiler.set_default_draft(Draft::V7);
```
The use of this option is HIGHLY encouraged to ensure continued
correct operation of your schema. The current default value will
not stay the same over time.
# Examples
- [example_from_strings]: loading schemas from Strings
- [example_from_https]: loading schemas from `http(s)`
- [example_custom_format]: registering custom format
- [example_custom_content_encoding]: registering custom contentEncoding
- [example_custom_content_media_type]: registering custom contentMediaType
# Compile Errors
```no_compile
println!("{compile_error}");
println!("{compile_error:#}"); // prints cause if any
```
Using alterate form in display will print cause if any.
This will be useful in cases like [`CompileError::LoadUrlError`],
as it would be useful to know whether the url does not exist or
the resource at url is not a valid json document.
# Validation Errors
[`ValidationError`] may have multiple `causes` resulting
in tree of errors.
`println!("{validation_error}")` prints:
```no_compile
jsonschema validation failed with file:///tmp/customer.json#
at '': missing properties 'age'
at '/billing_address': missing properties 'street_address', 'city', 'state'
```
The alternate form `println!("{validation_error:#}")` prints:
```no_compile
jsonschema validation failed with file:///tmp/customer.json#
[I#] [S#/required] missing properties 'age'
[I#/billing_address] [S#/properties/billing_address/$ref] validation failed with file:///tmp/address.json#
[I#/billing_address] [S#/required] missing properties 'street_address', 'city', 'state'
```
here `I` refers to the instance document and `S` refers to last schema document.
for example:
- after line 1: `S` refers to `file:///tmp/customer.json`
- after line 3: `S` refers to `file://tmp/address.json`
# Output Formats
[`ValidationError`] can be converted into following output formats:
- [flag] `validation_error.flag_output()`
- [basic] `validation_error.basic_output()`
- [detailed] `validation_error.detailed_output()`
The output object implements `serde::Serialize`.
It also implement `Display` to print json:
```no_compile
println!("{output}"); // prints unformatted json
println!("{output:#}"); // prints indented json
```
[example_from_strings]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L22
[example_from_https]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L62
[example_from_yaml_files]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L86
[example_custom_format]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L119
[example_custom_content_encoding]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L153
[example_custom_content_media_type]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L198
[flag]: https://json-schema.org/draft/2020-12/json-schema-core.html#name-flag
[basic]: https://json-schema.org/draft/2020-12/json-schema-core.html#name-basic
[detailed]: https://json-schema.org/draft/2020-12/json-schema-core.html#name-detailed
*/
mod compiler;
mod content;
mod draft;
mod ecma;
mod formats;
mod loader;
mod output;
mod root;
mod roots;
mod util;
mod validator;
#[cfg(not(target_arch = "wasm32"))]
pub use loader::FileLoader;
pub use {
compiler::{CompileError, Compiler, Draft},
content::{Decoder, MediaType},
formats::Format,
loader::{SchemeUrlLoader, UrlLoader},
output::{
AbsoluteKeywordLocation, FlagOutput, KeywordPath, OutputError, OutputUnit, SchemaToken,
},
validator::{InstanceLocation, InstanceToken},
};
use std::{borrow::Cow, collections::HashMap, error::Error, fmt::Display};
use ahash::AHashMap;
use regex::Regex;
use serde_json::{Number, Value};
use util::*;
/// Identifier to compiled schema.
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct SchemaIndex(usize);
/// Collection of compiled schemas.
#[derive(Default)]
pub struct Schemas {
list: Vec<Schema>,
map: HashMap<UrlPtr, usize>, // loc => schema-index
}
impl Schemas {
pub fn new() -> Self {
Self::default()
}
fn insert(&mut self, locs: Vec<UrlPtr>, compiled: Vec<Schema>) {
for (up, sch) in locs.into_iter().zip(compiled.into_iter()) {
let i = self.list.len();
self.list.push(sch);
self.map.insert(up, i);
}
}
fn get(&self, idx: SchemaIndex) -> &Schema {
&self.list[idx.0] // todo: return bug
}
fn get_by_loc(&self, up: &UrlPtr) -> Option<&Schema> {
self.map.get(up).and_then(|&i| self.list.get(i))
}
/// Returns true if `sch_index` is generated for this instance.
pub fn contains(&self, sch_index: SchemaIndex) -> bool {
self.list.get(sch_index.0).is_some()
}
pub fn size(&self) -> usize {
self.list.len()
}
/**
Validates `v` with schema identified by `sch_index`
# Panics
Panics if `sch_index` is not generated for this instance.
[`Schemas::contains`] can be used too ensure that it does not panic.
*/
pub fn validate<'s, 'v>(
&'s self,
v: &'v Value,
sch_index: SchemaIndex,
) -> Result<(), ValidationError<'s, 'v>> {
let Some(sch) = self.list.get(sch_index.0) else {
panic!("Schemas::validate: schema index out of bounds");
};
validator::validate(v, sch, self)
}
}
#[derive(Default)]
struct Schema {
draft_version: usize,
idx: SchemaIndex,
loc: String,
resource: SchemaIndex,
dynamic_anchors: HashMap<String, SchemaIndex>,
all_props_evaluated: bool,
all_items_evaluated: bool,
num_items_evaluated: usize,
// type agnostic --
boolean: Option<bool>, // boolean schema
ref_: Option<SchemaIndex>,
recursive_ref: Option<SchemaIndex>,
recursive_anchor: bool,
dynamic_ref: Option<DynamicRef>,
dynamic_anchor: Option<String>,
types: Types,
enum_: Option<Enum>,
constant: Option<Value>,
not: Option<SchemaIndex>,
all_of: Vec<SchemaIndex>,
any_of: Vec<SchemaIndex>,
one_of: Vec<SchemaIndex>,
if_: Option<SchemaIndex>,
then: Option<SchemaIndex>,
else_: Option<SchemaIndex>,
format: Option<Format>,
// object --
min_properties: Option<usize>,
max_properties: Option<usize>,
required: Vec<String>,
properties: AHashMap<String, SchemaIndex>,
pattern_properties: Vec<(Regex, SchemaIndex)>,
property_names: Option<SchemaIndex>,
additional_properties: Option<Additional>,
dependent_required: Vec<(String, Vec<String>)>,
dependent_schemas: Vec<(String, SchemaIndex)>,
dependencies: Vec<(String, Dependency)>,
unevaluated_properties: Option<SchemaIndex>,
// array --
min_items: Option<usize>,
max_items: Option<usize>,
unique_items: bool,
min_contains: Option<usize>,
max_contains: Option<usize>,
contains: Option<SchemaIndex>,
items: Option<Items>,
additional_items: Option<Additional>,
prefix_items: Vec<SchemaIndex>,
items2020: Option<SchemaIndex>,
unevaluated_items: Option<SchemaIndex>,
// string --
min_length: Option<usize>,
max_length: Option<usize>,
pattern: Option<Regex>,
content_encoding: Option<Decoder>,
content_media_type: Option<MediaType>,
content_schema: Option<SchemaIndex>,
// number --
minimum: Option<Number>,
maximum: Option<Number>,
exclusive_minimum: Option<Number>,
exclusive_maximum: Option<Number>,
multiple_of: Option<Number>,
}
#[derive(Debug)]
struct Enum {
/// types that occur in enum
types: Types,
/// values in enum
values: Vec<Value>,
}
#[derive(Debug)]
enum Items {
SchemaRef(SchemaIndex),
SchemaRefs(Vec<SchemaIndex>),
}
#[derive(Debug)]
enum Additional {
Bool(bool),
SchemaRef(SchemaIndex),
}
#[derive(Debug)]
enum Dependency {
Props(Vec<String>),
SchemaRef(SchemaIndex),
}
struct DynamicRef {
sch: SchemaIndex,
anchor: Option<String>,
}
impl Schema {
fn new(loc: String) -> Self {
Self {
loc,
..Default::default()
}
}
}
/// JSON data types for JSONSchema
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum Type {
Null = 1,
Boolean = 2,
Number = 4,
Integer = 8,
String = 16,
Array = 32,
Object = 64,
}
impl Type {
fn of(v: &Value) -> Self {
match v {
Value::Null => Type::Null,
Value::Bool(_) => Type::Boolean,
Value::Number(_) => Type::Number,
Value::String(_) => Type::String,
Value::Array(_) => Type::Array,
Value::Object(_) => Type::Object,
}
}
fn from_str(value: &str) -> Option<Self> {
match value {
"null" => Some(Self::Null),
"boolean" => Some(Self::Boolean),
"number" => Some(Self::Number),
"integer" => Some(Self::Integer),
"string" => Some(Self::String),
"array" => Some(Self::Array),
"object" => Some(Self::Object),
_ => None,
}
}
fn primitive(v: &Value) -> bool {
!matches!(Self::of(v), Self::Array | Self::Object)
}
}
impl Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Type::Null => write!(f, "null"),
Type::Boolean => write!(f, "boolean"),
Type::Number => write!(f, "number"),
Type::Integer => write!(f, "integer"),
Type::String => write!(f, "string"),
Type::Array => write!(f, "array"),
Type::Object => write!(f, "object"),
}
}
}
/// Set of [`Type`]s
#[derive(Debug, Default, Clone, Copy)]
pub struct Types(u8);
impl Types {
fn is_empty(self) -> bool {
self.0 == 0
}
fn add(&mut self, t: Type) {
self.0 |= t as u8;
}
/// Returns `true` if this set contains given type.
pub fn contains(&self, t: Type) -> bool {
self.0 & t as u8 != 0
}
/// Returns an iterator over types.
pub fn iter(&self) -> impl Iterator<Item = Type> + '_ {
static TYPES: [Type; 7] = [
Type::Null,
Type::Boolean,
Type::Number,
Type::Integer,
Type::String,
Type::Array,
Type::Object,
];
TYPES.iter().cloned().filter(|t| self.contains(*t))
}
}
impl FromIterator<Type> for Types {
fn from_iter<T: IntoIterator<Item = Type>>(iter: T) -> Self {
let mut types = Types::default();
for t in iter {
types.add(t);
}
types
}
}
/// Error type for validation failures.
#[derive(Debug)]
pub struct ValidationError<'s, 'v> {
/// The absolute, dereferenced schema location.
pub schema_url: &'s str,
/// The location of the JSON value within the instance being validated
pub instance_location: InstanceLocation<'v>,
/// kind of error
pub kind: ErrorKind<'s, 'v>,
/// Holds nested errors
pub causes: Vec<ValidationError<'s, 'v>>,
}
impl Error for ValidationError<'_, '_> {}
/// A list specifying general categories of validation errors.
#[derive(Debug)]
pub enum ErrorKind<'s, 'v> {
Group,
Schema {
url: &'s str,
},
ContentSchema,
PropertyName {
prop: String,
},
Reference {
kw: &'static str,
url: &'s str,
},
RefCycle {
url: &'s str,
kw_loc1: String,
kw_loc2: String,
},
FalseSchema,
Type {
got: Type,
want: Types,
},
Enum {
want: &'s Vec<Value>,
},
Const {
want: &'s Value,
},
Format {
got: Cow<'v, Value>,
want: &'static str,
err: Box<dyn Error>,
},
MinProperties {
got: usize,
want: usize,
},
MaxProperties {
got: usize,
want: usize,
},
AdditionalProperties {
got: Vec<Cow<'v, str>>,
},
Required {
want: Vec<&'s str>,
},
Dependency {
/// dependency of prop that failed.
prop: &'s str,
/// missing props.
missing: Vec<&'s str>,
},
DependentRequired {
/// dependency of prop that failed.
prop: &'s str,
/// missing props.
missing: Vec<&'s str>,
},
MinItems {
got: usize,
want: usize,
},
MaxItems {
got: usize,
want: usize,
},
Contains,
MinContains {
got: Vec<usize>,
want: usize,
},
MaxContains {
got: Vec<usize>,
want: usize,
},
UniqueItems {
got: [usize; 2],
},
AdditionalItems {
got: usize,
},
MinLength {
got: usize,
want: usize,
},
MaxLength {
got: usize,
want: usize,
},
Pattern {
got: Cow<'v, str>,
want: &'s str,
},
ContentEncoding {
want: &'static str,
err: Box<dyn Error>,
},
ContentMediaType {
got: Vec<u8>,
want: &'static str,
err: Box<dyn Error>,
},
Minimum {
got: Cow<'v, Number>,
want: &'s Number,
},
Maximum {
got: Cow<'v, Number>,
want: &'s Number,
},
ExclusiveMinimum {
got: Cow<'v, Number>,
want: &'s Number,
},
ExclusiveMaximum {
got: Cow<'v, Number>,
want: &'s Number,
},
MultipleOf {
got: Cow<'v, Number>,
want: &'s Number,
},
Not,
/// none of the subschemas matched
AllOf,
/// none of the subschemas matched.
AnyOf,
/// - `None`: none of the schemas matched.
/// - Some(i, j): subschemas at i, j matched
OneOf(Option<(usize, usize)>),
}
impl Display for ErrorKind<'_, '_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Group => write!(f, "validation failed"),
Self::Schema { url } => write!(f, "validation failed with {url}"),
Self::ContentSchema => write!(f, "contentSchema failed"),
Self::PropertyName { prop } => write!(f, "invalid property {}", quote(prop)),
Self::Reference { .. } => {
write!(f, "validation failed")
}
Self::RefCycle {
url,
kw_loc1,
kw_loc2,
} => write!(
f,
"both {} and {} resolve to {url} causing reference cycle",
quote(&kw_loc1.to_string()),
quote(&kw_loc2.to_string())
),
Self::FalseSchema => write!(f, "false schema"),
Self::Type { got, want } => {
// todo: why join not working for Type struct ??
let want = join_iter(want.iter(), " or ");
write!(f, "want {want}, but got {got}",)
}
Self::Enum { want } => {
if want.iter().all(Type::primitive) {
if want.len() == 1 {
write!(f, "value must be ")?;
display(f, &want[0])
} else {
let want = join_iter(want.iter().map(string), ", ");
write!(f, "value must be one of {want}")
}
} else {
write!(f, "enum failed")
}
}
Self::Const { want } => {
if Type::primitive(want) {
write!(f, "value must be ")?;
display(f, want)
} else {
write!(f, "const failed")
}
}
Self::Format { got, want, err } => {
display(f, got)?;
write!(f, " is not valid {want}: {err}")
}
Self::MinProperties { got, want } => write!(
f,
"minimum {want} properties required, but got {got} properties"
),
Self::MaxProperties { got, want } => write!(
f,
"maximum {want} properties required, but got {got} properties"
),
Self::AdditionalProperties { got } => {
write!(
f,
"additionalProperties {} not allowed",
join_iter(got.iter().map(quote), ", ")
)
}
Self::Required { want } => write!(
f,
"missing properties {}",
join_iter(want.iter().map(quote), ", ")
),
Self::Dependency { prop, missing } => {
write!(
f,
"properties {} required, if {} property exists",
join_iter(missing.iter().map(quote), ", "),
quote(prop)
)
}
Self::DependentRequired { prop, missing } => write!(
f,
"properties {} required, if {} property exists",
join_iter(missing.iter().map(quote), ", "),
quote(prop)
),
Self::MinItems { got, want } => {
write!(f, "minimum {want} items required, but got {got} items")
}
Self::MaxItems { got, want } => {
write!(f, "maximum {want} items required, but got {got} items")
}
Self::MinContains { got, want } => {
if got.is_empty() {
write!(
f,
"minimum {want} items required to match contains schema, but found none",
)
} else {
write!(
f,
"minimum {want} items required to match contains schema, but found {} items at {}",
got.len(),
join_iter(got, ", ")
)
}
}
Self::Contains => write!(f, "no items match contains schema"),
Self::MaxContains { got, want } => {
write!(
f,
"maximum {want} items required to match contains schema, but found {} items at {}",
got.len(),
join_iter(got, ", ")
)
}
Self::UniqueItems { got: [i, j] } => write!(f, "items at {i} and {j} are equal"),
Self::AdditionalItems { got } => write!(f, "last {got} additionalItems not allowed"),
Self::MinLength { got, want } => write!(f, "length must be >={want}, but got {got}"),
Self::MaxLength { got, want } => write!(f, "length must be <={want}, but got {got}"),
Self::Pattern { got, want } => {
write!(f, "{} does not match pattern {}", quote(got), quote(want))
}
Self::ContentEncoding { want, err } => {
write!(f, "value is not {} encoded: {err}", quote(want))
}
Self::ContentMediaType { want, err, .. } => {
write!(f, "value is not of mediatype {}: {err}", quote(want))
}
Self::Minimum { got, want } => write!(f, "must be >={want}, but got {got}"),
Self::Maximum { got, want } => write!(f, "must be <={want}, but got {got}"),
Self::ExclusiveMinimum { got, want } => write!(f, "must be > {want} but got {got}"),
Self::ExclusiveMaximum { got, want } => write!(f, "must be < {want} but got {got}"),
Self::MultipleOf { got, want } => write!(f, "{got} is not multipleOf {want}"),
Self::Not => write!(f, "not failed"),
Self::AllOf => write!(f, "allOf failed",),
Self::AnyOf => write!(f, "anyOf failed"),
Self::OneOf(None) => write!(f, "oneOf failed, none matched"),
Self::OneOf(Some((i, j))) => write!(f, "oneOf failed, subschemas {i}, {j} matched"),
}
}
}
fn display(f: &mut std::fmt::Formatter, v: &Value) -> std::fmt::Result {
match v {
Value::String(s) => write!(f, "{}", quote(s)),
Value::Array(_) | Value::Object(_) => write!(f, "value"),
_ => write!(f, "{v}"),
}
}
fn string(primitive: &Value) -> String {
if let Value::String(s) = primitive {
quote(s)
} else {
format!("{primitive}")
}
}

243
validator/src/loader.rs Normal file
View File

@ -0,0 +1,243 @@
use std::{
cell::RefCell,
collections::{HashMap, HashSet},
error::Error,
};
#[cfg(not(target_arch = "wasm32"))]
use std::fs::File;
use appendlist::AppendList;
use once_cell::sync::Lazy;
use serde_json::Value;
use url::Url;
use crate::{
compiler::CompileError,
draft::{latest, Draft},
util::split,
UrlPtr,
};
/// A trait for loading json from given `url`
pub trait UrlLoader {
/// Loads json from given absolute `url`.
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>>;
}
// --
#[cfg(not(target_arch = "wasm32"))]
pub struct FileLoader;
#[cfg(not(target_arch = "wasm32"))]
impl UrlLoader for FileLoader {
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>> {
let url = Url::parse(url)?;
let path = url.to_file_path().map_err(|_| "invalid file path")?;
let file = File::open(path)?;
Ok(serde_json::from_reader(file)?)
}
}
// --
#[derive(Default)]
pub struct SchemeUrlLoader {
loaders: HashMap<&'static str, Box<dyn UrlLoader>>,
}
impl SchemeUrlLoader {
pub fn new() -> Self {
Self::default()
}
/// Registers [`UrlLoader`] for given url `scheme`
pub fn register(&mut self, scheme: &'static str, url_loader: Box<dyn UrlLoader>) {
self.loaders.insert(scheme, url_loader);
}
}
impl UrlLoader for SchemeUrlLoader {
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>> {
let url = Url::parse(url)?;
let Some(loader) = self.loaders.get(url.scheme()) else {
return Err(CompileError::UnsupportedUrlScheme {
url: url.as_str().to_owned(),
}
.into());
};
loader.load(url.as_str())
}
}
// --
pub(crate) struct DefaultUrlLoader {
doc_map: RefCell<HashMap<Url, usize>>,
doc_list: AppendList<Value>,
loader: Box<dyn UrlLoader>,
}
impl DefaultUrlLoader {
#[cfg_attr(target_arch = "wasm32", allow(unused_mut))]
pub fn new() -> Self {
let mut loader = SchemeUrlLoader::new();
#[cfg(not(target_arch = "wasm32"))]
loader.register("file", Box::new(FileLoader));
Self {
doc_map: Default::default(),
doc_list: AppendList::new(),
loader: Box::new(loader),
}
}
pub fn get_doc(&self, url: &Url) -> Option<&Value> {
self.doc_map
.borrow()
.get(url)
.and_then(|i| self.doc_list.get(*i))
}
pub fn add_doc(&self, url: Url, json: Value) {
if self.get_doc(&url).is_some() {
return;
}
self.doc_list.push(json);
self.doc_map
.borrow_mut()
.insert(url, self.doc_list.len() - 1);
}
pub fn use_loader(&mut self, loader: Box<dyn UrlLoader>) {
self.loader = loader;
}
pub(crate) fn load(&self, url: &Url) -> Result<&Value, CompileError> {
if let Some(doc) = self.get_doc(url) {
return Ok(doc);
}
// check in STD_METAFILES
let doc = if let Some(content) = load_std_meta(url.as_str()) {
serde_json::from_str::<Value>(content).map_err(|e| CompileError::LoadUrlError {
url: url.to_string(),
src: e.into(),
})?
} else {
self.loader
.load(url.as_str())
.map_err(|src| CompileError::LoadUrlError {
url: url.as_str().to_owned(),
src,
})?
};
self.add_doc(url.clone(), doc);
self.get_doc(url)
.ok_or(CompileError::Bug("doc must exist".into()))
}
pub(crate) fn get_draft(
&self,
up: &UrlPtr,
doc: &Value,
default_draft: &'static Draft,
mut cycle: HashSet<Url>,
) -> Result<&'static Draft, CompileError> {
let Value::Object(obj) = &doc else {
return Ok(default_draft);
};
let Some(Value::String(sch)) = obj.get("$schema") else {
return Ok(default_draft);
};
if let Some(draft) = Draft::from_url(sch) {
return Ok(draft);
}
let (sch, _) = split(sch);
let sch = Url::parse(sch).map_err(|e| CompileError::InvalidMetaSchemaUrl {
url: up.to_string(),
src: e.into(),
})?;
if up.ptr.is_empty() && sch == up.url {
return Err(CompileError::UnsupportedDraft { url: sch.into() });
}
if !cycle.insert(sch.clone()) {
return Err(CompileError::MetaSchemaCycle { url: sch.into() });
}
let doc = self.load(&sch)?;
let up = UrlPtr {
url: sch,
ptr: "".into(),
};
self.get_draft(&up, doc, default_draft, cycle)
}
pub(crate) fn get_meta_vocabs(
&self,
doc: &Value,
draft: &'static Draft,
) -> Result<Option<Vec<String>>, CompileError> {
let Value::Object(obj) = &doc else {
return Ok(None);
};
let Some(Value::String(sch)) = obj.get("$schema") else {
return Ok(None);
};
if Draft::from_url(sch).is_some() {
return Ok(None);
}
let (sch, _) = split(sch);
let sch = Url::parse(sch).map_err(|e| CompileError::ParseUrlError {
url: sch.to_string(),
src: e.into(),
})?;
let doc = self.load(&sch)?;
draft.get_vocabs(&sch, doc)
}
}
pub(crate) static STD_METAFILES: Lazy<HashMap<String, &str>> = Lazy::new(|| {
let mut files = HashMap::new();
macro_rules! add {
($path:expr) => {
files.insert(
$path["metaschemas/".len()..].to_owned(),
include_str!($path),
);
};
}
add!("metaschemas/draft-04/schema");
add!("metaschemas/draft-06/schema");
add!("metaschemas/draft-07/schema");
add!("metaschemas/draft/2019-09/schema");
add!("metaschemas/draft/2019-09/meta/core");
add!("metaschemas/draft/2019-09/meta/applicator");
add!("metaschemas/draft/2019-09/meta/validation");
add!("metaschemas/draft/2019-09/meta/meta-data");
add!("metaschemas/draft/2019-09/meta/format");
add!("metaschemas/draft/2019-09/meta/content");
add!("metaschemas/draft/2020-12/schema");
add!("metaschemas/draft/2020-12/meta/core");
add!("metaschemas/draft/2020-12/meta/applicator");
add!("metaschemas/draft/2020-12/meta/unevaluated");
add!("metaschemas/draft/2020-12/meta/validation");
add!("metaschemas/draft/2020-12/meta/meta-data");
add!("metaschemas/draft/2020-12/meta/content");
add!("metaschemas/draft/2020-12/meta/format-annotation");
add!("metaschemas/draft/2020-12/meta/format-assertion");
files
});
fn load_std_meta(url: &str) -> Option<&'static str> {
let meta = url
.strip_prefix("http://json-schema.org/")
.or_else(|| url.strip_prefix("https://json-schema.org/"));
if let Some(meta) = meta {
if meta == "schema" {
return load_std_meta(latest().url);
}
return STD_METAFILES.get(meta).cloned();
}
None
}

View File

@ -0,0 +1,151 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"positiveInteger": {
"type": "integer",
"minimum": 0
},
"positiveIntegerDefault0": {
"allOf": [ { "$ref": "#/definitions/positiveInteger" }, { "default": 0 } ]
},
"simpleTypes": {
"enum": [ "array", "boolean", "integer", "null", "number", "object", "string" ]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"minItems": 1,
"uniqueItems": true
}
},
"type": "object",
"properties": {
"id": {
"type": "string",
"format": "uriref"
},
"$schema": {
"type": "string",
"format": "uri"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": {},
"multipleOf": {
"type": "number",
"minimum": 0,
"exclusiveMinimum": true
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "boolean",
"default": false
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "boolean",
"default": false
},
"maxLength": { "$ref": "#/definitions/positiveInteger" },
"minLength": { "$ref": "#/definitions/positiveIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": {
"anyOf": [
{ "type": "boolean" },
{ "$ref": "#" }
],
"default": {}
},
"items": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/schemaArray" }
],
"default": {}
},
"maxItems": { "$ref": "#/definitions/positiveInteger" },
"minItems": { "$ref": "#/definitions/positiveIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"maxProperties": { "$ref": "#/definitions/positiveInteger" },
"minProperties": { "$ref": "#/definitions/positiveIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": {
"anyOf": [
{ "type": "boolean" },
{ "$ref": "#" }
],
"default": {}
},
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/stringArray" }
]
}
},
"enum": {
"type": "array",
"minItems": 1,
"uniqueItems": true
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" },
"format": { "type": "string" },
"$ref": { "type": "string" }
},
"dependencies": {
"exclusiveMaximum": [ "maximum" ],
"exclusiveMinimum": [ "minimum" ]
},
"default": {}
}

View File

@ -0,0 +1,151 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"$id": "http://json-schema.org/draft-06/schema#",
"title": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"allOf": [
{ "$ref": "#/definitions/nonNegativeInteger" },
{ "default": 0 }
]
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
},
"type": ["object", "boolean"],
"properties": {
"$id": {
"type": "string",
"format": "uri-reference"
},
"$schema": {
"type": "string",
"format": "uri"
},
"$ref": {
"type": "string",
"format": "uri-reference"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": {},
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/definitions/nonNegativeInteger" },
"minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": { "$ref": "#" },
"items": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/schemaArray" }
],
"default": {}
},
"maxItems": { "$ref": "#/definitions/nonNegativeInteger" },
"minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"contains": { "$ref": "#" },
"maxProperties": { "$ref": "#/definitions/nonNegativeInteger" },
"minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": { "$ref": "#" },
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"regexProperties": true,
"additionalProperties": { "$ref": "#" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/stringArray" }
]
}
},
"propertyNames": { "$ref": "#" },
"const": {},
"enum": {
"type": "array",
"minItems": 1,
"uniqueItems": true
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"format": { "type": "string" },
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" }
},
"default": {}
}

View File

@ -0,0 +1,172 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://json-schema.org/draft-07/schema#",
"title": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"allOf": [
{ "$ref": "#/definitions/nonNegativeInteger" },
{ "default": 0 }
]
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
},
"type": ["object", "boolean"],
"properties": {
"$id": {
"type": "string",
"format": "uri-reference"
},
"$schema": {
"type": "string",
"format": "uri"
},
"$ref": {
"type": "string",
"format": "uri-reference"
},
"$comment": {
"type": "string"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": true,
"readOnly": {
"type": "boolean",
"default": false
},
"writeOnly": {
"type": "boolean",
"default": false
},
"examples": {
"type": "array",
"items": true
},
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/definitions/nonNegativeInteger" },
"minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": { "$ref": "#" },
"items": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/schemaArray" }
],
"default": true
},
"maxItems": { "$ref": "#/definitions/nonNegativeInteger" },
"minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"contains": { "$ref": "#" },
"maxProperties": { "$ref": "#/definitions/nonNegativeInteger" },
"minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": { "$ref": "#" },
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"propertyNames": { "format": "regex" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/stringArray" }
]
}
},
"propertyNames": { "$ref": "#" },
"const": true,
"enum": {
"type": "array",
"items": true,
"minItems": 1,
"uniqueItems": true
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"format": { "type": "string" },
"contentMediaType": { "type": "string" },
"contentEncoding": { "type": "string" },
"if": { "$ref": "#" },
"then": { "$ref": "#" },
"else": { "$ref": "#" },
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" }
},
"default": true
}

View File

@ -0,0 +1,55 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/applicator",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/applicator": true
},
"$recursiveAnchor": true,
"title": "Applicator vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"additionalItems": { "$recursiveRef": "#" },
"unevaluatedItems": { "$recursiveRef": "#" },
"items": {
"anyOf": [
{ "$recursiveRef": "#" },
{ "$ref": "#/$defs/schemaArray" }
]
},
"contains": { "$recursiveRef": "#" },
"additionalProperties": { "$recursiveRef": "#" },
"unevaluatedProperties": { "$recursiveRef": "#" },
"properties": {
"type": "object",
"additionalProperties": { "$recursiveRef": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$recursiveRef": "#" },
"propertyNames": { "format": "regex" },
"default": {}
},
"dependentSchemas": {
"type": "object",
"additionalProperties": {
"$recursiveRef": "#"
}
},
"propertyNames": { "$recursiveRef": "#" },
"if": { "$recursiveRef": "#" },
"then": { "$recursiveRef": "#" },
"else": { "$recursiveRef": "#" },
"allOf": { "$ref": "#/$defs/schemaArray" },
"anyOf": { "$ref": "#/$defs/schemaArray" },
"oneOf": { "$ref": "#/$defs/schemaArray" },
"not": { "$recursiveRef": "#" }
},
"$defs": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$recursiveRef": "#" }
}
}
}

View File

@ -0,0 +1,15 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/content",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/content": true
},
"$recursiveAnchor": true,
"title": "Content vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"contentMediaType": { "type": "string" },
"contentEncoding": { "type": "string" },
"contentSchema": { "$recursiveRef": "#" }
}
}

View File

@ -0,0 +1,56 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/core",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/core": true
},
"$recursiveAnchor": true,
"title": "Core vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"$id": {
"type": "string",
"format": "uri-reference",
"$comment": "Non-empty fragments not allowed.",
"pattern": "^[^#]*#?$"
},
"$schema": {
"type": "string",
"format": "uri"
},
"$anchor": {
"type": "string",
"pattern": "^[A-Za-z][-A-Za-z0-9.:_]*$"
},
"$ref": {
"type": "string",
"format": "uri-reference"
},
"$recursiveRef": {
"type": "string",
"format": "uri-reference"
},
"$recursiveAnchor": {
"type": "boolean",
"default": false
},
"$vocabulary": {
"type": "object",
"propertyNames": {
"type": "string",
"format": "uri"
},
"additionalProperties": {
"type": "boolean"
}
},
"$comment": {
"type": "string"
},
"$defs": {
"type": "object",
"additionalProperties": { "$recursiveRef": "#" },
"default": {}
}
}
}

View File

@ -0,0 +1,13 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/format",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/format": true
},
"$recursiveAnchor": true,
"title": "Format vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"format": { "type": "string" }
}
}

View File

@ -0,0 +1,35 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/meta-data",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/meta-data": true
},
"$recursiveAnchor": true,
"title": "Meta-data vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": true,
"deprecated": {
"type": "boolean",
"default": false
},
"readOnly": {
"type": "boolean",
"default": false
},
"writeOnly": {
"type": "boolean",
"default": false
},
"examples": {
"type": "array",
"items": true
}
}
}

View File

@ -0,0 +1,97 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/validation",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/validation": true
},
"$recursiveAnchor": true,
"title": "Validation vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/$defs/nonNegativeInteger" },
"minLength": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"maxItems": { "$ref": "#/$defs/nonNegativeInteger" },
"minItems": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"maxContains": { "$ref": "#/$defs/nonNegativeInteger" },
"minContains": {
"$ref": "#/$defs/nonNegativeInteger",
"default": 1
},
"maxProperties": { "$ref": "#/$defs/nonNegativeInteger" },
"minProperties": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/$defs/stringArray" },
"dependentRequired": {
"type": "object",
"additionalProperties": {
"$ref": "#/$defs/stringArray"
}
},
"const": true,
"enum": {
"type": "array",
"items": true
},
"type": {
"anyOf": [
{ "$ref": "#/$defs/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/$defs/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
}
},
"$defs": {
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"$ref": "#/$defs/nonNegativeInteger",
"default": 0
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
}
}

View File

@ -0,0 +1,41 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/schema",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/core": true,
"https://json-schema.org/draft/2019-09/vocab/applicator": true,
"https://json-schema.org/draft/2019-09/vocab/validation": true,
"https://json-schema.org/draft/2019-09/vocab/meta-data": true,
"https://json-schema.org/draft/2019-09/vocab/format": false,
"https://json-schema.org/draft/2019-09/vocab/content": true
},
"$recursiveAnchor": true,
"title": "Core and Validation specifications meta-schema",
"allOf": [
{"$ref": "meta/core"},
{"$ref": "meta/applicator"},
{"$ref": "meta/validation"},
{"$ref": "meta/meta-data"},
{"$ref": "meta/format"},
{"$ref": "meta/content"}
],
"type": ["object", "boolean"],
"properties": {
"definitions": {
"$comment": "While no longer an official keyword as it is replaced by $defs, this keyword is retained in the meta-schema to prevent incompatible extensions as it remains in common use.",
"type": "object",
"additionalProperties": { "$recursiveRef": "#" },
"default": {}
},
"dependencies": {
"$comment": "\"dependencies\" is no longer a keyword, but schema authors should avoid redefining it to facilitate a smooth transition to \"dependentSchemas\" and \"dependentRequired\"",
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$recursiveRef": "#" },
{ "$ref": "meta/validation#/$defs/stringArray" }
]
}
}
}
}

View File

@ -0,0 +1,47 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/applicator",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/applicator": true
},
"$dynamicAnchor": "meta",
"title": "Applicator vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"prefixItems": { "$ref": "#/$defs/schemaArray" },
"items": { "$dynamicRef": "#meta" },
"contains": { "$dynamicRef": "#meta" },
"additionalProperties": { "$dynamicRef": "#meta" },
"properties": {
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" },
"propertyNames": { "format": "regex" },
"default": {}
},
"dependentSchemas": {
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" },
"default": {}
},
"propertyNames": { "$dynamicRef": "#meta" },
"if": { "$dynamicRef": "#meta" },
"then": { "$dynamicRef": "#meta" },
"else": { "$dynamicRef": "#meta" },
"allOf": { "$ref": "#/$defs/schemaArray" },
"anyOf": { "$ref": "#/$defs/schemaArray" },
"oneOf": { "$ref": "#/$defs/schemaArray" },
"not": { "$dynamicRef": "#meta" }
},
"$defs": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$dynamicRef": "#meta" }
}
}
}

View File

@ -0,0 +1,15 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/content",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/content": true
},
"$dynamicAnchor": "meta",
"title": "Content vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"contentEncoding": { "type": "string" },
"contentMediaType": { "type": "string" },
"contentSchema": { "$dynamicRef": "#meta" }
}
}

View File

@ -0,0 +1,50 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/core",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/core": true
},
"$dynamicAnchor": "meta",
"title": "Core vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"$id": {
"$ref": "#/$defs/uriReferenceString",
"$comment": "Non-empty fragments not allowed.",
"pattern": "^[^#]*#?$"
},
"$schema": { "$ref": "#/$defs/uriString" },
"$ref": { "$ref": "#/$defs/uriReferenceString" },
"$anchor": { "$ref": "#/$defs/anchorString" },
"$dynamicRef": { "$ref": "#/$defs/uriReferenceString" },
"$dynamicAnchor": { "$ref": "#/$defs/anchorString" },
"$vocabulary": {
"type": "object",
"propertyNames": { "$ref": "#/$defs/uriString" },
"additionalProperties": {
"type": "boolean"
}
},
"$comment": {
"type": "string"
},
"$defs": {
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" }
}
},
"$defs": {
"anchorString": {
"type": "string",
"pattern": "^[A-Za-z_][-A-Za-z0-9._]*$"
},
"uriString": {
"type": "string",
"format": "uri"
},
"uriReferenceString": {
"type": "string",
"format": "uri-reference"
}
}
}

View File

@ -0,0 +1,13 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/format-annotation",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/format-annotation": true
},
"$dynamicAnchor": "meta",
"title": "Format vocabulary meta-schema for annotation results",
"type": ["object", "boolean"],
"properties": {
"format": { "type": "string" }
}
}

View File

@ -0,0 +1,13 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/format-assertion",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/format-assertion": true
},
"$dynamicAnchor": "meta",
"title": "Format vocabulary meta-schema for assertion results",
"type": ["object", "boolean"],
"properties": {
"format": { "type": "string" }
}
}

View File

@ -0,0 +1,35 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/meta-data",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/meta-data": true
},
"$dynamicAnchor": "meta",
"title": "Meta-data vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": true,
"deprecated": {
"type": "boolean",
"default": false
},
"readOnly": {
"type": "boolean",
"default": false
},
"writeOnly": {
"type": "boolean",
"default": false
},
"examples": {
"type": "array",
"items": true
}
}
}

View File

@ -0,0 +1,14 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/unevaluated",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/unevaluated": true
},
"$dynamicAnchor": "meta",
"title": "Unevaluated applicator vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"unevaluatedItems": { "$dynamicRef": "#meta" },
"unevaluatedProperties": { "$dynamicRef": "#meta" }
}
}

View File

@ -0,0 +1,97 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/validation",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/validation": true
},
"$dynamicAnchor": "meta",
"title": "Validation vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"type": {
"anyOf": [
{ "$ref": "#/$defs/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/$defs/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"const": true,
"enum": {
"type": "array",
"items": true
},
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/$defs/nonNegativeInteger" },
"minLength": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"maxItems": { "$ref": "#/$defs/nonNegativeInteger" },
"minItems": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"maxContains": { "$ref": "#/$defs/nonNegativeInteger" },
"minContains": {
"$ref": "#/$defs/nonNegativeInteger",
"default": 1
},
"maxProperties": { "$ref": "#/$defs/nonNegativeInteger" },
"minProperties": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/$defs/stringArray" },
"dependentRequired": {
"type": "object",
"additionalProperties": {
"$ref": "#/$defs/stringArray"
}
}
},
"$defs": {
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"$ref": "#/$defs/nonNegativeInteger",
"default": 0
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
}
}

View File

@ -0,0 +1,57 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/schema",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/core": true,
"https://json-schema.org/draft/2020-12/vocab/applicator": true,
"https://json-schema.org/draft/2020-12/vocab/unevaluated": true,
"https://json-schema.org/draft/2020-12/vocab/validation": true,
"https://json-schema.org/draft/2020-12/vocab/meta-data": true,
"https://json-schema.org/draft/2020-12/vocab/format-annotation": true,
"https://json-schema.org/draft/2020-12/vocab/content": true
},
"$dynamicAnchor": "meta",
"title": "Core and Validation specifications meta-schema",
"allOf": [
{"$ref": "meta/core"},
{"$ref": "meta/applicator"},
{"$ref": "meta/unevaluated"},
{"$ref": "meta/validation"},
{"$ref": "meta/meta-data"},
{"$ref": "meta/format-annotation"},
{"$ref": "meta/content"}
],
"type": ["object", "boolean"],
"$comment": "This meta-schema also defines keywords that have appeared in previous drafts in order to prevent incompatible extensions as they remain in common use.",
"properties": {
"definitions": {
"$comment": "\"definitions\" has been replaced by \"$defs\".",
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" },
"deprecated": true,
"default": {}
},
"dependencies": {
"$comment": "\"dependencies\" has been split and replaced by \"dependentSchemas\" and \"dependentRequired\" in order to serve their differing semantics.",
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$dynamicRef": "#meta" },
{ "$ref": "meta/validation#/$defs/stringArray" }
]
},
"deprecated": true,
"default": {}
},
"$recursiveAnchor": {
"$comment": "\"$recursiveAnchor\" has been replaced by \"$dynamicAnchor\".",
"$ref": "meta/core#/$defs/anchorString",
"deprecated": true
},
"$recursiveRef": {
"$comment": "\"$recursiveRef\" has been replaced by \"$dynamicRef\".",
"$ref": "meta/core#/$defs/uriReferenceString",
"deprecated": true
}
}
}

622
validator/src/output.rs Normal file
View File

@ -0,0 +1,622 @@
use std::{
borrow::Cow,
fmt::{Display, Formatter, Write},
};
use serde::{
ser::{SerializeMap, SerializeSeq},
Serialize,
};
use crate::{util::*, ErrorKind, InstanceLocation, ValidationError};
impl<'s> ValidationError<'s, '_> {
fn absolute_keyword_location(&self) -> AbsoluteKeywordLocation<'s> {
if let ErrorKind::Reference { url, .. } = &self.kind {
AbsoluteKeywordLocation {
schema_url: url,
keyword_path: None,
}
} else {
AbsoluteKeywordLocation {
schema_url: self.schema_url,
keyword_path: self.kind.keyword_path(),
}
}
}
fn skip(&self) -> bool {
self.causes.len() == 1 && matches!(self.kind, ErrorKind::Reference { .. })
}
/// The `Flag` output format, merely the boolean result.
pub fn flag_output(&self) -> FlagOutput {
FlagOutput { valid: false }
}
/// The `Basic` structure, a flat list of output units.
pub fn basic_output(&self) -> OutputUnit<'_, '_, '_> {
let mut outputs = vec![];
let mut in_ref = InRef::default();
let mut kw_loc = KeywordLocation::default();
for node in DfsIterator::new(self) {
match node {
DfsItem::Pre(e) => {
in_ref.pre(e);
kw_loc.pre(e);
if e.skip() || matches!(e.kind, ErrorKind::Schema { .. }) {
continue;
}
let absolute_keyword_location = if in_ref.get() {
Some(e.absolute_keyword_location())
} else {
None
};
outputs.push(OutputUnit {
valid: false,
keyword_location: kw_loc.get(e),
absolute_keyword_location,
instance_location: &e.instance_location,
error: OutputError::Leaf(&e.kind),
});
}
DfsItem::Post(e) => {
in_ref.post();
kw_loc.post();
if e.skip() || matches!(e.kind, ErrorKind::Schema { .. }) {
continue;
}
}
}
}
let error = if outputs.is_empty() {
OutputError::Leaf(&self.kind)
} else {
OutputError::Branch(outputs)
};
OutputUnit {
valid: false,
keyword_location: String::new(),
absolute_keyword_location: None,
instance_location: &self.instance_location,
error,
}
}
/// The `Detailed` structure, based on the schema.
pub fn detailed_output(&self) -> OutputUnit<'_, '_, '_> {
let mut root = None;
let mut stack: Vec<OutputUnit> = vec![];
let mut in_ref = InRef::default();
let mut kw_loc = KeywordLocation::default();
for node in DfsIterator::new(self) {
match node {
DfsItem::Pre(e) => {
in_ref.pre(e);
kw_loc.pre(e);
if e.skip() {
continue;
}
let absolute_keyword_location = if in_ref.get() {
Some(e.absolute_keyword_location())
} else {
None
};
stack.push(OutputUnit {
valid: false,
keyword_location: kw_loc.get(e),
absolute_keyword_location,
instance_location: &e.instance_location,
error: OutputError::Leaf(&e.kind),
});
}
DfsItem::Post(e) => {
in_ref.post();
kw_loc.post();
if e.skip() {
continue;
}
let output = stack.pop().unwrap();
if let Some(parent) = stack.last_mut() {
match &mut parent.error {
OutputError::Leaf(_) => {
parent.error = OutputError::Branch(vec![output]);
}
OutputError::Branch(v) => v.push(output),
}
} else {
root.replace(output);
}
}
}
}
root.unwrap()
}
}
// DfsIterator --
impl Display for ValidationError<'_, '_> {
/// Formats error hierarchy. Use `#` to show the schema location.
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut indent = Indent::default();
let mut sloc = SchemaLocation::default();
// let mut kw_loc = KeywordLocation::default();
for node in DfsIterator::new(self) {
match node {
DfsItem::Pre(e) => {
// kw_loc.pre(e);
if e.skip() {
continue;
}
indent.pre(f)?;
if f.alternate() {
sloc.pre(e);
}
if let ErrorKind::Schema { .. } = &e.kind {
write!(f, "jsonschema {}", e.kind)?;
} else {
write!(f, "at {}", quote(&e.instance_location.to_string()))?;
if f.alternate() {
write!(f, " [{}]", sloc)?;
// write!(f, " [{}]", kw_loc.get(e))?;
// write!(f, " [{}]", e.absolute_keyword_location())?;
}
write!(f, ": {}", e.kind)?;
}
}
DfsItem::Post(e) => {
// kw_loc.post();
if e.skip() {
continue;
}
indent.post();
sloc.post();
}
}
}
Ok(())
}
}
struct DfsIterator<'a, 'v, 's> {
root: Option<&'a ValidationError<'v, 's>>,
stack: Vec<Frame<'a, 'v, 's>>,
}
impl<'a, 'v, 's> DfsIterator<'a, 'v, 's> {
fn new(err: &'a ValidationError<'v, 's>) -> Self {
DfsIterator {
root: Some(err),
stack: vec![],
}
}
}
impl<'a, 'v, 's> Iterator for DfsIterator<'a, 'v, 's> {
type Item = DfsItem<&'a ValidationError<'v, 's>>;
fn next(&mut self) -> Option<Self::Item> {
let Some(mut frame) = self.stack.pop() else {
if let Some(err) = self.root.take() {
self.stack.push(Frame::from(err));
return Some(DfsItem::Pre(err));
} else {
return None;
}
};
if frame.causes.is_empty() {
return Some(DfsItem::Post(frame.err));
}
let err = &frame.causes[0];
frame.causes = &frame.causes[1..];
self.stack.push(frame);
self.stack.push(Frame::from(err));
Some(DfsItem::Pre(err))
}
}
struct Frame<'a, 'v, 's> {
err: &'a ValidationError<'v, 's>,
causes: &'a [ValidationError<'v, 's>],
}
impl<'a, 'v, 's> Frame<'a, 'v, 's> {
fn from(err: &'a ValidationError<'v, 's>) -> Self {
Self {
err,
causes: &err.causes,
}
}
}
enum DfsItem<T> {
Pre(T),
Post(T),
}
// Indent --
#[derive(Default)]
struct Indent {
n: usize,
}
impl Indent {
fn pre(&mut self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if self.n > 0 {
writeln!(f)?;
for _ in 0..self.n - 1 {
write!(f, " ")?;
}
write!(f, "- ")?;
}
self.n += 1;
Ok(())
}
fn post(&mut self) {
self.n -= 1;
}
}
// SchemaLocation
#[derive(Default)]
struct SchemaLocation<'a, 's, 'v> {
stack: Vec<&'a ValidationError<'s, 'v>>,
}
impl<'a, 's, 'v> SchemaLocation<'a, 's, 'v> {
fn pre(&mut self, e: &'a ValidationError<'s, 'v>) {
self.stack.push(e);
}
fn post(&mut self) {
self.stack.pop();
}
}
impl Display for SchemaLocation<'_, '_, '_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut iter = self.stack.iter().cloned();
let cur = iter.next_back().unwrap();
let cur: Cow<str> = match &cur.kind {
ErrorKind::Schema { url } => Cow::Borrowed(url),
ErrorKind::Reference { url, .. } => Cow::Borrowed(url),
_ => Cow::Owned(cur.absolute_keyword_location().to_string()),
};
let Some(prev) = iter.next_back() else {
return write!(f, "{cur}");
};
let p = match &prev.kind {
ErrorKind::Schema { url } => {
let (p, _) = split(url);
p
}
ErrorKind::Reference { url, .. } => {
let (p, _) = split(url);
p
}
_ => {
let (p, _) = split(prev.schema_url);
p
}
};
let (c, frag) = split(cur.as_ref());
if c == p {
write!(f, "S#{frag}")
} else {
write!(f, "{cur}")
}
}
}
// KeywordLocation --
#[derive(Default)]
struct KeywordLocation<'a> {
loc: String,
stack: Vec<(&'a str, usize)>, // (schema_url, len)
}
impl<'a> KeywordLocation<'a> {
fn pre(&mut self, e: &'a ValidationError) {
let cur = match &e.kind {
ErrorKind::Schema { url } => url,
ErrorKind::Reference { url, .. } => url,
_ => e.schema_url,
};
if let Some((prev, _)) = self.stack.last() {
self.loc.push_str(&e.schema_url[prev.len()..]); // todo: url-decode
if let ErrorKind::Reference { kw, .. } = &e.kind {
self.loc.push('/');
self.loc.push_str(kw);
}
}
self.stack.push((cur, self.loc.len()));
}
fn post(&mut self) {
self.stack.pop();
if let Some((_, len)) = self.stack.last() {
self.loc.truncate(*len);
}
}
fn get(&mut self, cur: &'a ValidationError) -> String {
if let ErrorKind::Reference { .. } = &cur.kind {
self.loc.clone()
} else if let Some(kw_path) = &cur.kind.keyword_path() {
let len = self.loc.len();
self.loc.push('/');
write!(self.loc, "{}", kw_path).expect("write kw_path to String should not fail");
let loc = self.loc.clone();
self.loc.truncate(len);
loc
} else {
self.loc.clone()
}
}
}
#[derive(Default)]
struct InRef {
stack: Vec<bool>,
}
impl InRef {
fn pre(&mut self, e: &ValidationError) {
let in_ref: bool = self.get() || matches!(e.kind, ErrorKind::Reference { .. });
self.stack.push(in_ref);
}
fn post(&mut self) {
self.stack.pop();
}
fn get(&self) -> bool {
self.stack.last().cloned().unwrap_or_default()
}
}
// output formats --
/// Simplest output format, merely the boolean result.
pub struct FlagOutput {
pub valid: bool,
}
impl Serialize for FlagOutput {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let mut map = serializer.serialize_map(Some(1))?;
map.serialize_entry("valid", &self.valid)?;
map.end()
}
}
impl Display for FlagOutput {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write_json_to_fmt(f, self)
}
}
/// Single OutputUnit used in Basic/Detailed output formats.
pub struct OutputUnit<'e, 's, 'v> {
pub valid: bool,
pub keyword_location: String,
/// The absolute, dereferenced location of the validating keyword
pub absolute_keyword_location: Option<AbsoluteKeywordLocation<'s>>,
/// The location of the JSON value within the instance being validated
pub instance_location: &'e InstanceLocation<'v>,
pub error: OutputError<'e, 's, 'v>,
}
impl Serialize for OutputUnit<'_, '_, '_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let n = 4 + self.absolute_keyword_location.as_ref().map_or(0, |_| 1);
let mut map = serializer.serialize_map(Some(n))?;
map.serialize_entry("valid", &self.valid)?;
map.serialize_entry("keywordLocation", &self.keyword_location.to_string())?;
if let Some(s) = &self.absolute_keyword_location {
map.serialize_entry("absoluteKeywordLocation", &s.to_string())?;
}
map.serialize_entry("instanceLocation", &self.instance_location.to_string())?;
let pname = match self.error {
OutputError::Leaf(_) => "error",
OutputError::Branch(_) => "errors",
};
map.serialize_entry(pname, &self.error)?;
map.end()
}
}
impl Display for OutputUnit<'_, '_, '_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write_json_to_fmt(f, self)
}
}
/// Error of [`OutputUnit`].
pub enum OutputError<'e, 's, 'v> {
/// Single.
Leaf(&'e ErrorKind<'s, 'v>),
/// Nested.
Branch(Vec<OutputUnit<'e, 's, 'v>>),
}
impl Serialize for OutputError<'_, '_, '_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
OutputError::Leaf(kind) => serializer.serialize_str(&kind.to_string()),
OutputError::Branch(units) => {
let mut seq = serializer.serialize_seq(Some(units.len()))?;
for unit in units {
seq.serialize_element(unit)?;
}
seq.end()
}
}
}
}
// AbsoluteKeywordLocation --
impl<'s> ErrorKind<'s, '_> {
pub fn keyword_path(&self) -> Option<KeywordPath<'s>> {
#[inline(always)]
fn kw(kw: &'static str) -> Option<KeywordPath<'static>> {
Some(KeywordPath {
keyword: kw,
token: None,
})
}
#[inline(always)]
fn kw_prop<'s>(kw: &'static str, prop: &'s str) -> Option<KeywordPath<'s>> {
Some(KeywordPath {
keyword: kw,
token: Some(SchemaToken::Prop(prop)),
})
}
use ErrorKind::*;
match self {
Group => None,
Schema { .. } => None,
ContentSchema => kw("contentSchema"),
PropertyName { .. } => kw("propertyNames"),
Reference { kw: kword, .. } => kw(kword),
RefCycle { .. } => None,
FalseSchema => None,
Type { .. } => kw("type"),
Enum { .. } => kw("enum"),
Const { .. } => kw("const"),
Format { .. } => kw("format"),
MinProperties { .. } => kw("minProperties"),
MaxProperties { .. } => kw("maxProperties"),
AdditionalProperties { .. } => kw("additionalProperty"),
Required { .. } => kw("required"),
Dependency { prop, .. } => kw_prop("dependencies", prop),
DependentRequired { prop, .. } => kw_prop("dependentRequired", prop),
MinItems { .. } => kw("minItems"),
MaxItems { .. } => kw("maxItems"),
Contains => kw("contains"),
MinContains { .. } => kw("minContains"),
MaxContains { .. } => kw("maxContains"),
UniqueItems { .. } => kw("uniqueItems"),
AdditionalItems { .. } => kw("additionalItems"),
MinLength { .. } => kw("minLength"),
MaxLength { .. } => kw("maxLength"),
Pattern { .. } => kw("pattern"),
ContentEncoding { .. } => kw("contentEncoding"),
ContentMediaType { .. } => kw("contentMediaType"),
Minimum { .. } => kw("minimum"),
Maximum { .. } => kw("maximum"),
ExclusiveMinimum { .. } => kw("exclusiveMinimum"),
ExclusiveMaximum { .. } => kw("exclusiveMaximum"),
MultipleOf { .. } => kw("multipleOf"),
Not => kw("not"),
AllOf => kw("allOf"),
AnyOf => kw("anyOf"),
OneOf(_) => kw("oneOf"),
}
}
}
/// The absolute, dereferenced location of the validating keyword
#[derive(Debug, Clone)]
pub struct AbsoluteKeywordLocation<'s> {
/// The absolute, dereferenced schema location.
pub schema_url: &'s str,
/// Location within the `schema_url`.
pub keyword_path: Option<KeywordPath<'s>>,
}
impl Display for AbsoluteKeywordLocation<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.schema_url.fmt(f)?;
if let Some(path) = &self.keyword_path {
f.write_str("/")?;
path.keyword.fmt(f)?;
if let Some(token) = &path.token {
f.write_str("/")?;
match token {
SchemaToken::Prop(p) => write!(f, "{}", escape(p))?, // todo: url-encode
SchemaToken::Item(i) => write!(f, "{i}")?,
}
}
}
Ok(())
}
}
#[derive(Debug, Clone)]
/// JsonPointer in schema.
pub struct KeywordPath<'s> {
/// The first token.
pub keyword: &'static str,
/// Optinal token within keyword.
pub token: Option<SchemaToken<'s>>,
}
impl Display for KeywordPath<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.keyword.fmt(f)?;
if let Some(token) = &self.token {
f.write_str("/")?;
token.fmt(f)?;
}
Ok(())
}
}
/// Token for schema.
#[derive(Debug, Clone)]
pub enum SchemaToken<'s> {
/// Token for property.
Prop(&'s str),
/// Token for array item.
Item(usize),
}
impl Display for SchemaToken<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SchemaToken::Prop(p) => write!(f, "{}", escape(p)),
SchemaToken::Item(i) => write!(f, "{i}"),
}
}
}
// helpers --
fn write_json_to_fmt<T>(f: &mut std::fmt::Formatter, value: &T) -> Result<(), std::fmt::Error>
where
T: ?Sized + Serialize,
{
let s = if f.alternate() {
serde_json::to_string_pretty(value)
} else {
serde_json::to_string(value)
};
let s = s.map_err(|_| std::fmt::Error)?;
f.write_str(&s)
}

128
validator/src/root.rs Normal file
View File

@ -0,0 +1,128 @@
use std::collections::{HashMap, HashSet};
use crate::{compiler::CompileError, draft::*, util::*};
use serde_json::Value;
use url::Url;
pub(crate) struct Root {
pub(crate) draft: &'static Draft,
pub(crate) resources: HashMap<JsonPointer, Resource>, // ptr => _
pub(crate) url: Url,
pub(crate) meta_vocabs: Option<Vec<String>>,
}
impl Root {
pub(crate) fn has_vocab(&self, name: &str) -> bool {
if self.draft.version < 2019 || name == "core" {
return true;
}
if let Some(vocabs) = &self.meta_vocabs {
return vocabs.iter().any(|s| s == name);
}
self.draft.default_vocabs.contains(&name)
}
fn resolve_fragment_in(&self, frag: &Fragment, res: &Resource) -> Result<UrlPtr, CompileError> {
let ptr = match frag {
Fragment::Anchor(anchor) => {
let Some(ptr) = res.anchors.get(anchor) else {
return Err(CompileError::AnchorNotFound {
url: self.url.to_string(),
reference: UrlFrag::format(&res.id, frag.as_str()),
});
};
ptr.clone()
}
Fragment::JsonPointer(ptr) => res.ptr.concat(ptr),
};
Ok(UrlPtr {
url: self.url.clone(),
ptr,
})
}
pub(crate) fn resolve_fragment(&self, frag: &Fragment) -> Result<UrlPtr, CompileError> {
let res = self.resources.get("").ok_or(CompileError::Bug(
format!("no root resource found for {}", self.url).into(),
))?;
self.resolve_fragment_in(frag, res)
}
// resolves `UrlFrag` to `UrlPtr` from root.
// returns `None` if it is external.
pub(crate) fn resolve(&self, uf: &UrlFrag) -> Result<Option<UrlPtr>, CompileError> {
let res = {
if uf.url == self.url {
self.resources.get("").ok_or(CompileError::Bug(
format!("no root resource found for {}", self.url).into(),
))?
} else {
// look for resource with id==uf.url
let Some(res) = self.resources.values().find(|res| res.id == uf.url) else {
return Ok(None); // external url
};
res
}
};
self.resolve_fragment_in(&uf.frag, res).map(Some)
}
pub(crate) fn resource(&self, ptr: &JsonPointer) -> &Resource {
let mut ptr = ptr.as_str();
loop {
if let Some(res) = self.resources.get(ptr) {
return res;
}
let Some((prefix, _)) = ptr.rsplit_once('/') else {
break;
};
ptr = prefix;
}
self.resources.get("").expect("root resource should exist")
}
pub(crate) fn base_url(&self, ptr: &JsonPointer) -> &Url {
&self.resource(ptr).id
}
pub(crate) fn add_subschema(
&mut self,
doc: &Value,
ptr: &JsonPointer,
) -> Result<(), CompileError> {
let v = ptr.lookup(doc, &self.url)?;
let base_url = self.base_url(ptr).clone();
self.draft
.collect_resources(v, &base_url, ptr.clone(), &self.url, &mut self.resources)?;
// collect anchors
if !self.resources.contains_key(ptr) {
let res = self.resource(ptr);
if let Some(res) = self.resources.get_mut(&res.ptr.clone()) {
self.draft.collect_anchors(v, ptr, res, &self.url)?;
}
}
Ok(())
}
}
#[derive(Debug)]
pub(crate) struct Resource {
pub(crate) ptr: JsonPointer, // from root
pub(crate) id: Url,
pub(crate) anchors: HashMap<Anchor, JsonPointer>, // anchor => ptr
pub(crate) dynamic_anchors: HashSet<Anchor>,
}
impl Resource {
pub(crate) fn new(ptr: JsonPointer, id: Url) -> Self {
Self {
ptr,
id,
anchors: HashMap::new(),
dynamic_anchors: HashSet::new(),
}
}
}

107
validator/src/roots.rs Normal file
View File

@ -0,0 +1,107 @@
use std::collections::{HashMap, HashSet};
use crate::{compiler::CompileError, draft::*, loader::DefaultUrlLoader, root::Root, util::*};
use serde_json::Value;
use url::Url;
// --
pub(crate) struct Roots {
pub(crate) default_draft: &'static Draft,
map: HashMap<Url, Root>,
pub(crate) loader: DefaultUrlLoader,
}
impl Roots {
fn new() -> Self {
Self {
default_draft: latest(),
map: Default::default(),
loader: DefaultUrlLoader::new(),
}
}
}
impl Default for Roots {
fn default() -> Self {
Self::new()
}
}
impl Roots {
pub(crate) fn get(&self, url: &Url) -> Option<&Root> {
self.map.get(url)
}
pub(crate) fn resolve_fragment(&mut self, uf: UrlFrag) -> Result<UrlPtr, CompileError> {
self.or_load(uf.url.clone())?;
let Some(root) = self.map.get(&uf.url) else {
return Err(CompileError::Bug("or_load didn't add".into()));
};
root.resolve_fragment(&uf.frag)
}
pub(crate) fn ensure_subschema(&mut self, up: &UrlPtr) -> Result<(), CompileError> {
self.or_load(up.url.clone())?;
let Some(root) = self.map.get_mut(&up.url) else {
return Err(CompileError::Bug("or_load didn't add".into()));
};
if !root.draft.is_subschema(up.ptr.as_str()) {
let doc = self.loader.load(&root.url)?;
let v = up.ptr.lookup(doc, &up.url)?;
root.draft.validate(up, v)?;
root.add_subschema(doc, &up.ptr)?;
}
Ok(())
}
pub(crate) fn or_load(&mut self, url: Url) -> Result<(), CompileError> {
debug_assert!(url.fragment().is_none(), "trying to add root with fragment");
if self.map.contains_key(&url) {
return Ok(());
}
let doc = self.loader.load(&url)?;
let r = self.create_root(url.clone(), doc)?;
self.map.insert(url, r);
Ok(())
}
pub(crate) fn create_root(&self, url: Url, doc: &Value) -> Result<Root, CompileError> {
let draft = {
let up = UrlPtr {
url: url.clone(),
ptr: "".into(),
};
self.loader
.get_draft(&up, doc, self.default_draft, HashSet::new())?
};
let vocabs = self.loader.get_meta_vocabs(doc, draft)?;
let resources = {
let mut m = HashMap::default();
draft.collect_resources(doc, &url, "".into(), &url, &mut m)?;
m
};
if !matches!(url.host_str(), Some("json-schema.org")) {
draft.validate(
&UrlPtr {
url: url.clone(),
ptr: "".into(),
},
doc,
)?;
}
Ok(Root {
draft,
resources,
url: url.clone(),
meta_vocabs: vocabs,
})
}
pub(crate) fn insert(&mut self, roots: &mut HashMap<Url, Root>) {
self.map.extend(roots.drain());
}
}

545
validator/src/util.rs Normal file
View File

@ -0,0 +1,545 @@
use std::{
borrow::{Borrow, Cow},
fmt::Display,
hash::{Hash, Hasher},
str::FromStr,
};
use ahash::{AHashMap, AHasher};
use percent_encoding::{percent_decode_str, AsciiSet, CONTROLS};
use serde_json::Value;
use url::Url;
use crate::CompileError;
// --
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub(crate) struct JsonPointer(pub(crate) String);
impl JsonPointer {
pub(crate) fn escape(token: &str) -> Cow<'_, str> {
const SPECIAL: [char; 2] = ['~', '/'];
if token.contains(SPECIAL) {
token.replace('~', "~0").replace('/', "~1").into()
} else {
token.into()
}
}
pub(crate) fn unescape(mut tok: &str) -> Result<Cow<'_, str>, ()> {
let Some(mut tilde) = tok.find('~') else {
return Ok(Cow::Borrowed(tok));
};
let mut s = String::with_capacity(tok.len());
loop {
s.push_str(&tok[..tilde]);
tok = &tok[tilde + 1..];
match tok.chars().next() {
Some('1') => s.push('/'),
Some('0') => s.push('~'),
_ => return Err(()),
}
tok = &tok[1..];
let Some(i) = tok.find('~') else {
s.push_str(tok);
break;
};
tilde = i;
}
Ok(Cow::Owned(s))
}
pub(crate) fn lookup<'a>(
&self,
mut v: &'a Value,
v_url: &Url,
) -> Result<&'a Value, CompileError> {
for tok in self.0.split('/').skip(1) {
let Ok(tok) = Self::unescape(tok) else {
let loc = UrlFrag::format(v_url, self.as_str());
return Err(CompileError::InvalidJsonPointer(loc));
};
match v {
Value::Object(obj) => {
if let Some(pvalue) = obj.get(tok.as_ref()) {
v = pvalue;
continue;
}
}
Value::Array(arr) => {
if let Ok(i) = usize::from_str(tok.as_ref()) {
if let Some(item) = arr.get(i) {
v = item;
continue;
}
};
}
_ => {}
}
let loc = UrlFrag::format(v_url, self.as_str());
return Err(CompileError::JsonPointerNotFound(loc));
}
Ok(v)
}
pub(crate) fn as_str(&self) -> &str {
&self.0
}
pub(crate) fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub(crate) fn concat(&self, next: &Self) -> Self {
JsonPointer(format!("{}{}", self.0, next.0))
}
pub(crate) fn append(&self, tok: &str) -> Self {
Self(format!("{}/{}", self, Self::escape(tok)))
}
pub(crate) fn append2(&self, tok1: &str, tok2: &str) -> Self {
Self(format!(
"{}/{}/{}",
self,
Self::escape(tok1),
Self::escape(tok2)
))
}
}
impl Display for JsonPointer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl Borrow<str> for JsonPointer {
fn borrow(&self) -> &str {
&self.0
}
}
impl From<&str> for JsonPointer {
fn from(value: &str) -> Self {
Self(value.into())
}
}
// --
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub(crate) struct Anchor(pub(crate) String);
impl Display for Anchor {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl Borrow<str> for Anchor {
fn borrow(&self) -> &str {
&self.0
}
}
impl From<&str> for Anchor {
fn from(value: &str) -> Self {
Self(value.into())
}
}
// --
#[derive(Debug, Clone, Eq, PartialEq)]
pub(crate) enum Fragment {
Anchor(Anchor),
JsonPointer(JsonPointer),
}
impl Fragment {
pub(crate) fn split(s: &str) -> Result<(&str, Fragment), CompileError> {
let (u, frag) = split(s);
let frag = percent_decode_str(frag)
.decode_utf8()
.map_err(|src| CompileError::ParseUrlError {
url: s.to_string(),
src: src.into(),
})?
.to_string();
let frag = if frag.is_empty() || frag.starts_with('/') {
Fragment::JsonPointer(JsonPointer(frag))
} else {
Fragment::Anchor(Anchor(frag))
};
Ok((u, frag))
}
pub(crate) fn encode(frag: &str) -> String {
// https://url.spec.whatwg.org/#fragment-percent-encode-set
const FRAGMENT: &AsciiSet = &CONTROLS
.add(b'%')
.add(b' ')
.add(b'"')
.add(b'<')
.add(b'>')
.add(b'`');
percent_encoding::utf8_percent_encode(frag, FRAGMENT).to_string()
}
pub(crate) fn as_str(&self) -> &str {
match self {
Fragment::Anchor(s) => &s.0,
Fragment::JsonPointer(s) => &s.0,
}
}
}
// --
#[derive(Clone)]
pub(crate) struct UrlFrag {
pub(crate) url: Url,
pub(crate) frag: Fragment,
}
impl UrlFrag {
pub(crate) fn absolute(input: &str) -> Result<UrlFrag, CompileError> {
let (u, frag) = Fragment::split(input)?;
// note: windows drive letter is treated as url scheme by url parser
#[cfg(not(target_arch = "wasm32"))]
if std::env::consts::OS == "windows" && starts_with_windows_drive(u) {
let url = Url::from_file_path(u)
.map_err(|_| CompileError::Bug(format!("failed to convert {u} into url").into()))?;
return Ok(UrlFrag { url, frag });
}
match Url::parse(u) {
Ok(url) => Ok(UrlFrag { url, frag }),
#[cfg(not(target_arch = "wasm32"))]
Err(url::ParseError::RelativeUrlWithoutBase) => {
let p = std::path::absolute(u).map_err(|e| CompileError::ParseUrlError {
url: u.to_owned(),
src: e.into(),
})?;
let url = Url::from_file_path(p).map_err(|_| {
CompileError::Bug(format!("failed to convert {u} into url").into())
})?;
Ok(UrlFrag { url, frag })
}
Err(e) => Err(CompileError::ParseUrlError {
url: u.to_owned(),
src: e.into(),
}),
}
}
pub(crate) fn join(url: &Url, input: &str) -> Result<UrlFrag, CompileError> {
let (input, frag) = Fragment::split(input)?;
if input.is_empty() {
return Ok(UrlFrag {
url: url.clone(),
frag,
});
}
let url = url.join(input).map_err(|e| CompileError::ParseUrlError {
url: input.to_string(),
src: e.into(),
})?;
Ok(UrlFrag { url, frag })
}
pub(crate) fn format(url: &Url, frag: &str) -> String {
if frag.is_empty() {
url.to_string()
} else {
format!("{}#{}", url, Fragment::encode(frag))
}
}
}
impl Display for UrlFrag {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}#{}", self.url, Fragment::encode(self.frag.as_str()))
}
}
// --
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub(crate) struct UrlPtr {
pub(crate) url: Url,
pub(crate) ptr: JsonPointer,
}
impl UrlPtr {
pub(crate) fn lookup<'a>(&self, doc: &'a Value) -> Result<&'a Value, CompileError> {
self.ptr.lookup(doc, &self.url)
}
pub(crate) fn format(&self, tok: &str) -> String {
format!(
"{}#{}/{}",
self.url,
Fragment::encode(self.ptr.as_str()),
Fragment::encode(JsonPointer::escape(tok).as_ref()),
)
}
}
impl Display for UrlPtr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}#{}", self.url, Fragment::encode(self.ptr.as_str()))
}
}
// --
pub(crate) fn is_integer(v: &Value) -> bool {
match v {
Value::Number(n) => {
n.is_i64() || n.is_u64() || n.as_f64().filter(|n| n.fract() == 0.0).is_some()
}
_ => false,
}
}
#[cfg(not(target_arch = "wasm32"))]
fn starts_with_windows_drive(p: &str) -> bool {
p.chars().next().filter(char::is_ascii_uppercase).is_some() && p[1..].starts_with(":\\")
}
/// returns single-quoted string
pub(crate) fn quote<T>(s: &T) -> String
where
T: AsRef<str> + std::fmt::Debug + ?Sized,
{
let s = format!("{s:?}").replace(r#"\""#, "\"").replace('\'', r"\'");
format!("'{}'", &s[1..s.len() - 1])
}
pub(crate) fn join_iter<T>(iterable: T, sep: &str) -> String
where
T: IntoIterator,
T::Item: Display,
{
iterable
.into_iter()
.map(|e| e.to_string())
.collect::<Vec<_>>()
.join(sep)
}
pub(crate) fn escape(token: &str) -> Cow<'_, str> {
JsonPointer::escape(token)
}
pub(crate) fn split(url: &str) -> (&str, &str) {
if let Some(i) = url.find('#') {
(&url[..i], &url[i + 1..])
} else {
(url, "")
}
}
/// serde_json treats 0 and 0.0 not equal. so we cannot simply use v1==v2
pub(crate) fn equals(v1: &Value, v2: &Value) -> bool {
match (v1, v2) {
(Value::Null, Value::Null) => true,
(Value::Bool(b1), Value::Bool(b2)) => b1 == b2,
(Value::Number(n1), Value::Number(n2)) => {
if let (Some(n1), Some(n2)) = (n1.as_u64(), n2.as_u64()) {
return n1 == n2;
}
if let (Some(n1), Some(n2)) = (n1.as_i64(), n2.as_i64()) {
return n1 == n2;
}
if let (Some(n1), Some(n2)) = (n1.as_f64(), n2.as_f64()) {
return n1 == n2;
}
false
}
(Value::String(s1), Value::String(s2)) => s1 == s2,
(Value::Array(arr1), Value::Array(arr2)) => {
if arr1.len() != arr2.len() {
return false;
}
arr1.iter().zip(arr2).all(|(e1, e2)| equals(e1, e2))
}
(Value::Object(obj1), Value::Object(obj2)) => {
if obj1.len() != obj2.len() {
return false;
}
for (k1, v1) in obj1 {
if let Some(v2) = obj2.get(k1) {
if !equals(v1, v2) {
return false;
}
} else {
return false;
}
}
true
}
_ => false,
}
}
pub(crate) fn duplicates(arr: &Vec<Value>) -> Option<(usize, usize)> {
match arr.as_slice() {
[e0, e1] => {
if equals(e0, e1) {
return Some((0, 1));
}
}
[e0, e1, e2] => {
if equals(e0, e1) {
return Some((0, 1));
} else if equals(e0, e2) {
return Some((0, 2));
} else if equals(e1, e2) {
return Some((1, 2));
}
}
_ => {
let len = arr.len();
if len <= 20 {
for i in 0..len - 1 {
for j in i + 1..len {
if equals(&arr[i], &arr[j]) {
return Some((i, j));
}
}
}
} else {
let mut seen = AHashMap::with_capacity(len);
for (i, item) in arr.iter().enumerate() {
if let Some(j) = seen.insert(HashedValue(item), i) {
return Some((j, i));
}
}
}
}
}
None
}
// HashedValue --
// Based on implementation proposed by Sven Marnach:
// https://stackoverflow.com/questions/60882381/what-is-the-fastest-correct-way-to-detect-that-there-are-no-duplicates-in-a-json
pub(crate) struct HashedValue<'a>(pub(crate) &'a Value);
impl PartialEq for HashedValue<'_> {
fn eq(&self, other: &Self) -> bool {
equals(self.0, other.0)
}
}
impl Eq for HashedValue<'_> {}
impl Hash for HashedValue<'_> {
fn hash<H: Hasher>(&self, state: &mut H) {
match self.0 {
Value::Null => state.write_u32(3_221_225_473), // chosen randomly
Value::Bool(ref b) => b.hash(state),
Value::Number(ref num) => {
if let Some(num) = num.as_f64() {
num.to_bits().hash(state);
} else if let Some(num) = num.as_u64() {
num.hash(state);
} else if let Some(num) = num.as_i64() {
num.hash(state);
}
}
Value::String(ref str) => str.hash(state),
Value::Array(ref arr) => {
for item in arr {
HashedValue(item).hash(state);
}
}
Value::Object(ref obj) => {
let mut hash = 0;
for (pname, pvalue) in obj {
// We have no way of building a new hasher of type `H`, so we
// hardcode using the default hasher of a hash map.
let mut hasher = AHasher::default();
pname.hash(&mut hasher);
HashedValue(pvalue).hash(&mut hasher);
hash ^= hasher.finish();
}
state.write_u64(hash);
}
}
}
}
#[cfg(test)]
mod tests {
use ahash::AHashMap;
use serde_json::json;
use super::*;
#[test]
fn test_quote() {
assert_eq!(quote(r#"abc"def'ghi"#), r#"'abc"def\'ghi'"#);
}
#[test]
fn test_fragment_split() {
let tests = [
("#", Fragment::JsonPointer("".into())),
("#/a/b", Fragment::JsonPointer("/a/b".into())),
("#abcd", Fragment::Anchor("abcd".into())),
("#%61%62%63%64", Fragment::Anchor("abcd".into())),
(
"#%2F%61%62%63%64%2fef",
Fragment::JsonPointer("/abcd/ef".into()),
), // '/' is encoded
("#abcd+ef", Fragment::Anchor("abcd+ef".into())), // '+' should not traslate to space
];
for test in tests {
let (_, got) = Fragment::split(test.0).unwrap();
assert_eq!(got, test.1, "Fragment::split({:?})", test.0);
}
}
#[test]
fn test_unescape() {
let tests = [
("bar~0", Some("bar~")),
("bar~1", Some("bar/")),
("bar~01", Some("bar~1")),
("bar~", None),
("bar~~", None),
];
for (tok, want) in tests {
let res = JsonPointer::unescape(tok).ok();
let got = res.as_ref().map(|c| c.as_ref());
assert_eq!(got, want, "unescape({:?})", tok)
}
}
#[test]
fn test_equals() {
let tests = [["1.0", "1"], ["-1.0", "-1"]];
for [a, b] in tests {
let a = serde_json::from_str(a).unwrap();
let b = serde_json::from_str(b).unwrap();
assert!(equals(&a, &b));
}
}
#[test]
fn test_hashed_value() {
let mut seen = AHashMap::with_capacity(10);
let (v1, v2) = (json!(2), json!(2.0));
assert!(equals(&v1, &v2));
assert!(seen.insert(HashedValue(&v1), 1).is_none());
assert!(seen.insert(HashedValue(&v2), 1).is_some());
}
}

1169
validator/src/validator.rs Normal file

File diff suppressed because it is too large Load Diff