From d6b34c99bb418184587b5c92e7101893d8f3afed Mon Sep 17 00:00:00 2001 From: Alex Groleau Date: Tue, 30 Sep 2025 19:56:34 -0400 Subject: [PATCH] jspg additional properties bug squashed --- Cargo.lock | 2 + GEMINI.md | 20 +- out.txt | 44 + src/lib.rs | 1810 ++++++++++++------------- src/schemas.rs | 6 +- src/tests.rs | 50 +- validator/Cargo.toml | 2 + validator/src/compiler.rs | 1680 +++++++++++------------ validator/src/content.rs | 90 +- validator/src/draft.rs | 996 +++++++------- validator/src/ecma.rs | 302 ++--- validator/src/formats.rs | 1312 +++++++++--------- validator/src/lib.rs | 986 +++++++------- validator/src/loader.rs | 368 ++--- validator/src/output.rs | 888 ++++++------ validator/src/root.rs | 196 +-- validator/src/roots.rs | 166 +-- validator/src/util.rs | 770 +++++------ validator/src/validator.rs | 2024 ++++++++++++++-------------- validator/tests/compiler.rs | 126 +- validator/tests/debug.rs | 52 +- validator/tests/examples.rs | 322 ++--- validator/tests/filepaths.rs | 24 +- validator/tests/invalid-schemas.rs | 86 +- validator/tests/output.rs | 180 +-- validator/tests/suite.rs | 166 +-- 26 files changed, 6340 insertions(+), 6328 deletions(-) create mode 100644 out.txt diff --git a/Cargo.lock b/Cargo.lock index 7162baa..bb2911f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -243,6 +243,8 @@ dependencies = [ "idna", "once_cell", "percent-encoding", + "pgrx", + "pgrx-tests", "regex", "regex-syntax", "rustls", diff --git a/GEMINI.md b/GEMINI.md index 3f0231a..c4f5cd4 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -15,19 +15,11 @@ It works by: The version of `boon` located in the `validator/` directory has been modified to address specific requirements of the `jspg` project. The key deviations from the upstream `boon` crate are as follows: -### 1. Correct Unevaluated Property Propagation in `$ref` +### 1. Recursive Runtime Strictness Control -- **Problem:** In the original `boon` implementation, if a schema validation failed inside a `$ref`, the set of properties that had been evaluated by that referenced schema was not correctly propagated back up to the parent validator. This caused the parent to incorrectly flag already-evaluated properties as "unevaluated," leading to spurious `unevaluatedProperties` errors. +- **Problem:** The `jspg` project requires that certain schemas enforce a strict "no extra properties" policy (specifically, schemas for public `puncs` and global `type`s). This strictness needs to cascade through the entire validation hierarchy, including all nested objects and `$ref` chains. A compile-time flag was unsuitable because it would incorrectly apply strictness to shared, reusable schemas. -- **Solution:** The `Uneval::merge` function in `validator/src/validator.rs` was modified. The original logic, which performed an *intersection* of unevaluated properties (`retain`), was replaced with a direct *assignment*. Now, the parent validator's set of unevaluated properties is completely replaced by the final set from the child validator. This ensures that the most current state of evaluated properties is always passed up the chain, regardless of validation success or failure within the `$ref`. - -### 2. Runtime Strictness Control - -- **Problem:** The `jspg` project requires that certain schemas (e.g., those for public `puncs`) enforce a strict "no extra properties" policy, while others do not. This strictness needs to cascade through the entire validation hierarchy, including all `$ref` chains. A compile-time flag was unsuitable because it would incorrectly apply strictness to shared, reusable schemas. - -- **Solution:** A runtime validation option was implemented. - 1. A `ValidationOptions { be_strict: bool }` struct was added and is passed to the core `validate` function in `validator.rs`. - 2. The `jspg` code determines whether a validation run should be strict (based on the `punc`'s `public` flag or if we are validating a a global `type`) and passes the appropriate option. - 3. The `Validator` struct carries these options through the entire recursive validation process. - 4. The `uneval_validate` function was modified to only enforce this strict check if `options.be_strict` is `true` **and** it is at the root of the validation scope (`self.scope.parent.is_none()`). This ensures the check only happens at the very end of a top-level validation, after all `$ref`s and sub-schemas have been processed. - 5. When this runtime strictness check fails, it now generates a more descriptive `ADDITIONAL_PROPERTIES_NOT_ALLOWED` error, rather than a generic `FALSE_SCHEMA` error. +- **Solution:** A runtime validation option was implemented to enforce strictness recursively. + 1. A `ValidationOptions { be_strict: bool }` struct was added. The `jspg` code in `src/lib.rs` determines whether a validation run should be strict (based on the `punc`'s `public` flag or if validating a global `type`) and passes the appropriate option to the validator. + 2. The `be_strict` option is propagated through the entire recursive validation process. A bug was fixed in `_validate_self` (which handles `$ref`s) to ensure that the sub-validator is always initialized to track unevaluated properties when `be_strict` is enabled. Previously, tracking was only initiated if the parent was already tracking unevaluated properties, causing strictness to be dropped across certain `$ref` boundaries. + 3. At any time, if `unevaluatedProperties` or `additionalProperties` is found in the schema, it should override the strict (or non-strict) validation at that level. \ No newline at end of file diff --git a/out.txt b/out.txt new file mode 100644 index 0000000..a571398 --- /dev/null +++ b/out.txt @@ -0,0 +1,44 @@ + +running 23 tests + Building extension with features pg_test pg17 + Running command "/opt/homebrew/bin/cargo" "build" "--lib" "--features" "pg_test pg17" "--message-format=json-render-diagnostics" + Installing extension + Copying control file to /opt/homebrew/share/postgresql@17/extension/jspg.control + Copying shared library to /opt/homebrew/lib/postgresql@17/jspg.dylib + Finished installing jspg +test tests::pg_test_cache_invalid ... ok +test tests::pg_test_validate_nested_req_deps ... ok +test tests::pg_test_validate_format_empty_string_with_ref ... ok +test tests::pg_test_validate_format_normal ... ok +test tests::pg_test_validate_format_empty_string ... ok +test tests::pg_test_validate_dependencies ... ok +test tests::pg_test_validate_dependencies_merging ... ok +test tests::pg_test_validate_additional_properties ... ok +test tests::pg_test_validate_enum_schema ... ok +test tests::pg_test_validate_errors ... ok +test tests::pg_test_validate_not_cached ... ok +test tests::pg_test_validate_oneof ... ok +test tests::pg_test_validate_punc_with_refs ... ok +test tests::pg_test_validate_property_merging ... ok +test tests::pg_test_validate_punc_local_refs ... ok +test tests::pg_test_validate_required_merging ... ok +test tests::pg_test_validate_required ... ok +test tests::pg_test_validate_simple ... ok +test tests::pg_test_validate_root_types ... ok +test tests::pg_test_validate_strict ... ok +test tests::pg_test_validate_title_override ... ok +test tests::pg_test_validate_unevaluated_properties ... ok +test tests::pg_test_validate_type_matching ... ok + +test result: ok. 23 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 7.66s + + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s + + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s + diff --git a/src/lib.rs b/src/lib.rs index d40a43a..b88bfdc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,7 @@ use pgrx::*; pg_module_magic!(); -use boon::{CompileError, Compiler, ErrorKind, SchemaIndex, Schemas, ValidationError, Type, Types}; +use boon::{CompileError, Compiler, ErrorKind, SchemaIndex, Schemas, ValidationError, Type, Types, ValidationOptions}; use lazy_static::lazy_static; use serde_json::{json, Value, Number}; use std::borrow::Cow; @@ -11,979 +11,929 @@ use std::{collections::HashMap, sync::RwLock}; #[derive(Clone, Copy, Debug, PartialEq)] enum SchemaType { - Enum, - Type, - PublicPunc, - PrivatePunc, + Enum, + Type, + PublicPunc, + PrivatePunc, } struct Schema { - index: SchemaIndex, - t: SchemaType, - value: Value, + index: SchemaIndex, + t: SchemaType, + value: Value, } struct Cache { - schemas: Schemas, - map: HashMap, + schemas: Schemas, + map: HashMap, } // Structure to hold error information without lifetimes #[derive(Debug)] struct Error { - path: String, - code: String, - message: String, - cause: Value, // Changed from String to Value to store JSON + path: String, + code: String, + message: String, + cause: Value, // Changed from String to Value to store JSON } lazy_static! { - static ref SCHEMA_CACHE: RwLock = RwLock::new(Cache { - schemas: Schemas::new(), - map: HashMap::new(), - }); + static ref SCHEMA_CACHE: RwLock = RwLock::new(Cache { + schemas: Schemas::new(), + map: HashMap::new(), + }); } #[pg_extern(strict)] fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB { - let mut cache = SCHEMA_CACHE.write().unwrap(); - let enums_value: Value = enums.0; - let types_value: Value = types.0; - let puncs_value: Value = puncs.0; - - *cache = Cache { - schemas: Schemas::new(), - map: HashMap::new(), - }; - - let mut compiler = Compiler::new(); - compiler.enable_format_assertions(); - - let mut errors = Vec::new(); - let mut schemas_to_compile = Vec::new(); - - // Phase 1: Enums - if let Some(enums_array) = enums_value.as_array() { - for enum_row in enums_array { - if let Some(schemas_raw) = enum_row.get("schemas") { - if let Some(schemas_array) = schemas_raw.as_array() { - for schema_def in schemas_array { - if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) { - schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), SchemaType::Enum)); - } - } - } - } - } - } - - // Phase 2: Types - if let Some(types_array) = types_value.as_array() { - for type_row in types_array { - if let Some(schemas_raw) = type_row.get("schemas") { - if let Some(schemas_array) = schemas_raw.as_array() { - for schema_def in schemas_array { - if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) { - schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), SchemaType::Type)); - } - } - } - } - } - } - - // Phase 3: Puncs - if let Some(puncs_array) = puncs_value.as_array() { - for punc_row in puncs_array { - if let Some(punc_obj) = punc_row.as_object() { - if let Some(punc_name) = punc_obj.get("name").and_then(|v| v.as_str()) { - let is_public = punc_obj.get("public").and_then(|v| v.as_bool()).unwrap_or(false); - let punc_schema_type = if is_public { SchemaType::PublicPunc } else { SchemaType::PrivatePunc }; - if let Some(schemas_raw) = punc_obj.get("schemas") { - if let Some(schemas_array) = schemas_raw.as_array() { - for schema_def in schemas_array { - if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) { - let request_schema_id = format!("{}.request", punc_name); - let response_schema_id = format!("{}.response", punc_name); - let schema_type_for_def = if schema_id == request_schema_id || schema_id == response_schema_id { - punc_schema_type - } else { - SchemaType::Type - }; - schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), schema_type_for_def)); - } - } - } - } - } - } - } - } - - // Add all resources to compiler first - for (id, value, schema_type) in &schemas_to_compile { - add_schema_resource(&mut compiler, id, value.clone(), *schema_type, &mut errors); - } - - if !errors.is_empty() { - return JsonB(json!({ "errors": errors })); - } - - // Compile all schemas - compile_all_schemas(&mut compiler, &mut cache, &schemas_to_compile, &mut errors); - - if errors.is_empty() { - JsonB(json!({ "response": "success" })) - } else { - JsonB(json!({ "errors": errors })) - } -} - -// Helper function to add a schema resource (without compiling) -fn add_schema_resource( - compiler: &mut Compiler, - schema_id: &str, - mut schema_value: Value, - schema_type: SchemaType, - errors: &mut Vec -) { - match schema_type { - SchemaType::Enum | SchemaType::PrivatePunc => {}, - SchemaType::Type | SchemaType::PublicPunc => apply_strict_validation(&mut schema_value, schema_type), - } - if let Err(e) = compiler.add_resource(schema_id, schema_value) { - errors.push(json!({ - "code": "SCHEMA_RESOURCE_FAILED", - "message": format!("Failed to add schema resource '{}'", schema_id), - "details": { "schema": schema_id, "cause": format!("{}", e) } - })); - } -} - -// Helper function to compile all added resources -fn compile_all_schemas( - compiler: &mut Compiler, - cache: &mut Cache, - schemas_to_compile: &[(String, Value, SchemaType)], - errors: &mut Vec, -) { - for (id, value, schema_type) in schemas_to_compile { - match compiler.compile(id, &mut cache.schemas) { - Ok(index) => { - cache.map.insert(id.clone(), Schema { index, t: *schema_type, value: value.clone() }); - } - Err(e) => { - match &e { - CompileError::ValidationError { src, .. } => { - let mut error_list = Vec::new(); - collect_errors(src, &mut error_list); - let formatted_errors = format_errors(error_list, value, id); - errors.extend(formatted_errors); - } - _ => { - errors.push(json!({ - "code": "SCHEMA_COMPILATION_FAILED", - "message": format!("Schema '{}' compilation failed", id), - "details": { "schema": id, "cause": format!("{:?}", e) } - })); - } - }; - } - } - } -} - -// Helper function to apply strict validation to a schema -// -// This recursively adds unevaluatedProperties: false to object-type schemas, -// but SKIPS schemas inside if/then/else to avoid breaking conditional validation. -// For type schemas, it skips the top level to allow inheritance. -fn apply_strict_validation(schema: &mut Value, schema_type: SchemaType) { - apply_strict_validation_recursive(schema, false, schema_type, true); -} - -fn apply_strict_validation_recursive(schema: &mut Value, inside_conditional: bool, schema_type: SchemaType, is_top_level: bool) { - match schema { - Value::Object(map) => { - // Skip adding strict validation if we're inside a conditional - // OR if we're at the top level of a type schema (types should be extensible) - let skip_strict = inside_conditional || (matches!(schema_type, SchemaType::Type) && is_top_level); - - if !skip_strict { - // Apply unevaluatedProperties: false to schemas that have $ref OR type: "object" - let has_ref = map.contains_key("$ref"); - let has_object_type = map.get("type").and_then(|v| v.as_str()) == Some("object"); - - if (has_ref || has_object_type) && !map.contains_key("unevaluatedProperties") && !map.contains_key("additionalProperties") { - // Use unevaluatedProperties: false to prevent extra properties - // This considers all evaluated properties from all schemas including refs - map.insert("unevaluatedProperties".to_string(), Value::Bool(false)); - } - } - - // Recurse into all properties - for (key, value) in map.iter_mut() { - // Mark when we're inside conditional branches - let in_conditional = inside_conditional || matches!(key.as_str(), "if" | "then" | "else"); - apply_strict_validation_recursive(value, in_conditional, schema_type, false) - } - } - Value::Array(arr) => { - // Recurse into array items - for item in arr.iter_mut() { - apply_strict_validation_recursive(item, inside_conditional, schema_type, false); - } - } - _ => {} - } -} - -fn walk_and_validate_refs( - instance: &Value, - schema: &Value, - cache: &std::sync::RwLockReadGuard, - path_parts: &mut Vec, - type_validated: bool, - top_level_id: Option<&str>, - errors: &mut Vec, -) { - if let Some(ref_url) = schema.get("$ref").and_then(|v| v.as_str()) { - if let Some(s) = cache.map.get(ref_url) { - let mut new_type_validated = type_validated; - if !type_validated && s.t == SchemaType::Type { - let id_to_use = top_level_id.unwrap_or(ref_url); - let expected_type = id_to_use.split('.').next().unwrap_or(id_to_use); - if let Some(actual_type) = instance.get("type").and_then(|v| v.as_str()) { - if actual_type == expected_type { - new_type_validated = true; - } else { - path_parts.push("type".to_string()); - let path = format!("/{}", path_parts.join("/")); - path_parts.pop(); - errors.push(json!({ - "code": "TYPE_MISMATCH", - "message": format!("Instance type '{}' does not match expected type '{}' derived from schema $ref", actual_type, expected_type), - "details": { "path": path, "context": instance, "cause": { "expected": expected_type, "actual": actual_type }, "schema": ref_url } - })); - } - } else { - if top_level_id.is_some() { - let path = if path_parts.is_empty() { "".to_string() } else { format!("/{}", path_parts.join("/")) }; - errors.push(json!({ - "code": "TYPE_MISMATCH", - "message": "Instance is missing 'type' property required for schema validation", - "details": { "path": path, "context": instance, "cause": { "expected": expected_type }, "schema": ref_url } - })); - } - } - } - walk_and_validate_refs(instance, &s.value, cache, path_parts, new_type_validated, None, errors); - } - } - - if let Some(properties) = schema.get("properties").and_then(|v| v.as_object()) { - for (prop_name, prop_schema) in properties { - if let Some(prop_value) = instance.get(prop_name) { - path_parts.push(prop_name.clone()); - walk_and_validate_refs(prop_value, prop_schema, cache, path_parts, type_validated, None, errors); - path_parts.pop(); - } - } - } - - if let Some(items_schema) = schema.get("items") { - if let Some(instance_array) = instance.as_array() { - for (i, item) in instance_array.iter().enumerate() { - path_parts.push(i.to_string()); - walk_and_validate_refs(item, items_schema, cache, path_parts, false, None, errors); - path_parts.pop(); - } - } - } - - if let Some(all_of_array) = schema.get("allOf").and_then(|v| v.as_array()) { - for sub_schema in all_of_array { - walk_and_validate_refs(instance, sub_schema, cache, path_parts, type_validated, None, errors); - } - } - - if let Some(any_of_array) = schema.get("anyOf").and_then(|v| v.as_array()) { - for sub_schema in any_of_array { - walk_and_validate_refs(instance, sub_schema, cache, path_parts, type_validated, None, errors); - } - } - - if let Some(one_of_array) = schema.get("oneOf").and_then(|v| v.as_array()) { - for sub_schema in one_of_array { - walk_and_validate_refs(instance, sub_schema, cache, path_parts, type_validated, None, errors); - } - } - - if let Some(if_schema) = schema.get("if") { - walk_and_validate_refs(instance, if_schema, cache, path_parts, type_validated, None, errors); - } - - if let Some(then_schema) = schema.get("then") { - walk_and_validate_refs(instance, then_schema, cache, path_parts, type_validated, None, errors); - } - - if let Some(else_schema) = schema.get("else") { - walk_and_validate_refs(instance, else_schema, cache, path_parts, type_validated, None, errors); - } - - if let Some(not_schema) = schema.get("not") { - walk_and_validate_refs(instance, not_schema, cache, path_parts, type_validated, None, errors); - } -} - -#[pg_extern(strict, parallel_safe)] -fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { - let cache = SCHEMA_CACHE.read().unwrap(); - match cache.map.get(schema_id) { - None => JsonB(json!({ - "errors": [{ - "code": "SCHEMA_NOT_FOUND", - "message": format!("Schema '{}' not found in cache", schema_id), - "details": { - "schema": schema_id, - "cause": "Schema was not found in bulk cache - ensure cache_json_schemas was called" - } - }] - })), - Some(schema) => { - let instance_value: Value = instance.0; - match cache.schemas.validate(&instance_value, schema.index) { - Ok(_) => { - let mut custom_errors = Vec::new(); - if schema.t == SchemaType::Type || schema.t == SchemaType::PublicPunc || schema.t == SchemaType::PrivatePunc { - let mut path_parts = vec![]; - let top_level_id = if schema.t == SchemaType::Type { Some(schema_id) } else { None }; - walk_and_validate_refs(&instance_value, &schema.value, &cache, &mut path_parts, false, top_level_id, &mut custom_errors); - } - - if custom_errors.is_empty() { - JsonB(json!({ "response": "success" })) - } else { - JsonB(json!({ "errors": custom_errors })) - } - } - Err(validation_error) => { - let mut error_list = Vec::new(); - collect_errors(&validation_error, &mut error_list); - let errors = format_errors(error_list, &instance_value, schema_id); - if errors.is_empty() { - JsonB(json!({ "response": "success" })) - } else { - JsonB(json!({ "errors": errors })) - } - } - } - } - } -} - -// Recursively collects validation errors -fn collect_errors(error: &ValidationError, errors_list: &mut Vec) { - // Check if this is a structural error that we should skip - let is_structural = matches!( - &error.kind, - ErrorKind::Group | ErrorKind::AllOf | ErrorKind::AnyOf | ErrorKind::Not | ErrorKind::OneOf(_) - ); - - if error.causes.is_empty() && !is_structural { - let base_path = error.instance_location.to_string(); - - // Match on error kind and handle each type - let errors_to_add = match &error.kind { - ErrorKind::Type { got, want } => handle_type_error(&base_path, got, want), - ErrorKind::Required { want } => handle_required_error(&base_path, want), - ErrorKind::Dependency { prop, missing } => handle_dependency_error(&base_path, prop, missing, false), - ErrorKind::DependentRequired { prop, missing } => handle_dependency_error(&base_path, prop, missing, true), - ErrorKind::AdditionalProperties { got } => handle_additional_properties_error(&base_path, got), - ErrorKind::Enum { want } => handle_enum_error(&base_path, want), - ErrorKind::Const { want } => handle_const_error(&base_path, want), - ErrorKind::MinLength { got, want } => handle_min_length_error(&base_path, *got, *want), - ErrorKind::MaxLength { got, want } => handle_max_length_error(&base_path, *got, *want), - ErrorKind::Pattern { got, want } => handle_pattern_error(&base_path, got, want), - ErrorKind::Minimum { got, want } => handle_minimum_error(&base_path, got, want), - ErrorKind::Maximum { got, want } => handle_maximum_error(&base_path, got, want), - ErrorKind::ExclusiveMinimum { got, want } => handle_exclusive_minimum_error(&base_path, got, want), - ErrorKind::ExclusiveMaximum { got, want } => handle_exclusive_maximum_error(&base_path, got, want), - ErrorKind::MultipleOf { got, want } => handle_multiple_of_error(&base_path, got, want), - ErrorKind::MinItems { got, want } => handle_min_items_error(&base_path, *got, *want), - ErrorKind::MaxItems { got, want } => handle_max_items_error(&base_path, *got, *want), - ErrorKind::UniqueItems { got } => handle_unique_items_error(&base_path, got), - ErrorKind::MinProperties { got, want } => handle_min_properties_error(&base_path, *got, *want), - ErrorKind::MaxProperties { got, want } => handle_max_properties_error(&base_path, *got, *want), - ErrorKind::AdditionalItems { got } => handle_additional_items_error(&base_path, *got), - ErrorKind::Format { want, got, err } => handle_format_error(&base_path, want, got, err), - ErrorKind::PropertyName { prop } => handle_property_name_error(&base_path, prop), - ErrorKind::Contains => handle_contains_error(&base_path), - ErrorKind::MinContains { got, want } => handle_min_contains_error(&base_path, got, *want), - ErrorKind::MaxContains { got, want } => handle_max_contains_error(&base_path, got, *want), - ErrorKind::ContentEncoding { want, err } => handle_content_encoding_error(&base_path, want, err), - ErrorKind::ContentMediaType { want, err, .. } => handle_content_media_type_error(&base_path, want, err), - ErrorKind::FalseSchema => handle_false_schema_error(&base_path), - ErrorKind::Not => handle_not_error(&base_path), - ErrorKind::RefCycle { url, kw_loc1, kw_loc2 } => handle_ref_cycle_error(&base_path, url, kw_loc1, kw_loc2), - ErrorKind::Reference { kw, url } => handle_reference_error(&base_path, kw, url), - ErrorKind::Schema { url } => handle_schema_error(&base_path, url), - ErrorKind::ContentSchema => handle_content_schema_error(&base_path), - ErrorKind::Group => handle_group_error(&base_path), - ErrorKind::AllOf => handle_all_of_error(&base_path), - ErrorKind::AnyOf => handle_any_of_error(&base_path), - ErrorKind::OneOf(matched) => handle_one_of_error(&base_path, matched), - }; - - // Add all generated errors - for error in errors_to_add { - errors_list.push(error); - } - } else { - // Recurse into causes - for cause in &error.causes { - collect_errors(cause, errors_list); - } - } -} - -// Handler functions for each error kind -fn handle_type_error(base_path: &str, got: &Type, want: &Types) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "TYPE_MISMATCH".to_string(), - message: format!("Expected {} but got {}", - want.iter().map(|t| t.to_string()).collect::>().join(" or "), - got - ), - cause: json!({ - "got": got.to_string(), - "want": want.iter().map(|t| t.to_string()).collect::>() - }), - }] -} - -fn handle_required_error(base_path: &str, want: &[&str]) -> Vec { - // Create a separate error for each missing required field - want.iter().map(|missing_field| { - let field_path = if base_path.is_empty() { - format!("/{}", missing_field) - } else { - format!("{}/{}", base_path, missing_field) - }; - - Error { - path: field_path, - code: "REQUIRED_FIELD_MISSING".to_string(), - message: format!("Required field '{}' is missing", missing_field), - cause: json!({ "want": [missing_field] }), - } - }).collect() -} - -fn handle_dependency_error(base_path: &str, prop: &str, missing: &[&str], is_dependent_required: bool) -> Vec { - // Create a separate error for each missing field - missing.iter().map(|missing_field| { - let field_path = if base_path.is_empty() { - format!("/{}", missing_field) - } else { - format!("{}/{}", base_path, missing_field) - }; - - let (code, message) = if is_dependent_required { - ( - "DEPENDENT_REQUIRED_MISSING".to_string(), - format!("Field '{}' is required when '{}' is present", missing_field, prop), - ) - } else { - ( - "DEPENDENCY_FAILED".to_string(), - format!("Field '{}' is required when '{}' is present", missing_field, prop), - ) - }; - - Error { - path: field_path, - code, - message, - cause: json!({ "prop": prop, "missing": [missing_field] }), - } - }).collect() -} - -fn handle_additional_properties_error(base_path: &str, got: &[Cow]) -> Vec { - // Create a separate error for each additional property that's not allowed - got.iter().map(|extra_prop| { - let field_path = if base_path.is_empty() { - format!("/{}", extra_prop) - } else { - format!("{}/{}", base_path, extra_prop) - }; - - Error { - path: field_path, - code: "ADDITIONAL_PROPERTIES_NOT_ALLOWED".to_string(), - message: format!("Property '{}' is not allowed", extra_prop), - cause: json!({ "got": [extra_prop.to_string()] }), - } - }).collect() -} - -fn handle_enum_error(base_path: &str, want: &[Value]) -> Vec { - let message = if want.len() == 1 { - format!("Value must be {}", serde_json::to_string(&want[0]).unwrap_or_else(|_| "unknown".to_string())) - } else { - format!("Value must be one of: {}", - want.iter() - .map(|v| serde_json::to_string(v).unwrap_or_else(|_| "unknown".to_string())) - .collect::>() - .join(", ") - ) - }; - - vec![Error { - path: base_path.to_string(), - code: "ENUM_VIOLATED".to_string(), - message, - cause: json!({ "want": want }), - }] -} - -fn handle_const_error(base_path: &str, want: &Value) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "CONST_VIOLATED".to_string(), - message: format!("Value must be exactly {}", serde_json::to_string(want).unwrap_or_else(|_| "unknown".to_string())), - cause: json!({ "want": want }), - }] -} - -fn handle_min_length_error(base_path: &str, got: usize, want: usize) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "MIN_LENGTH_VIOLATED".to_string(), - message: format!("String length must be at least {} characters, but got {}", want, got), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_max_length_error(base_path: &str, got: usize, want: usize) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "MAX_LENGTH_VIOLATED".to_string(), - message: format!("String length must be at most {} characters, but got {}", want, got), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_pattern_error(base_path: &str, got: &Cow, want: &str) -> Vec { - let display_value = if got.len() > 50 { - format!("{}...", &got[..50]) - } else { - got.to_string() - }; - - vec![Error { - path: base_path.to_string(), - code: "PATTERN_VIOLATED".to_string(), - message: format!("Value '{}' does not match pattern '{}'", display_value, want), - cause: json!({ "got": got.to_string(), "want": want }), - }] -} - -fn handle_minimum_error(base_path: &str, got: &Cow, want: &Number) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "MINIMUM_VIOLATED".to_string(), - message: format!("Value must be at least {}, but got {}", want, got), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_maximum_error(base_path: &str, got: &Cow, want: &Number) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "MAXIMUM_VIOLATED".to_string(), - message: format!("Value must be at most {}, but got {}", want, got), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_exclusive_minimum_error(base_path: &str, got: &Cow, want: &Number) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "EXCLUSIVE_MINIMUM_VIOLATED".to_string(), - message: format!("Value must be greater than {}, but got {}", want, got), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_exclusive_maximum_error(base_path: &str, got: &Cow, want: &Number) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "EXCLUSIVE_MAXIMUM_VIOLATED".to_string(), - message: format!("Value must be less than {}, but got {}", want, got), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_multiple_of_error(base_path: &str, got: &Cow, want: &Number) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "MULTIPLE_OF_VIOLATED".to_string(), - message: format!("{} is not a multiple of {}", got, want), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_min_items_error(base_path: &str, got: usize, want: usize) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "MIN_ITEMS_VIOLATED".to_string(), - message: format!("Array must have at least {} items, but has {}", want, got), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_max_items_error(base_path: &str, got: usize, want: usize) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "MAX_ITEMS_VIOLATED".to_string(), - message: format!("Array must have at most {} items, but has {}", want, got), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_unique_items_error(base_path: &str, got: &[usize; 2]) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "UNIQUE_ITEMS_VIOLATED".to_string(), - message: format!("Array items at positions {} and {} are duplicates", got[0], got[1]), - cause: json!({ "got": got }), - }] -} - -fn handle_min_properties_error(base_path: &str, got: usize, want: usize) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "MIN_PROPERTIES_VIOLATED".to_string(), - message: format!("Object must have at least {} properties, but has {}", want, got), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_max_properties_error(base_path: &str, got: usize, want: usize) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "MAX_PROPERTIES_VIOLATED".to_string(), - message: format!("Object must have at most {} properties, but has {}", want, got), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_additional_items_error(base_path: &str, got: usize) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "ADDITIONAL_ITEMS_NOT_ALLOWED".to_string(), - message: format!("Last {} array items are not allowed", got), - cause: json!({ "got": got }), - }] -} - -fn handle_format_error(base_path: &str, want: &str, got: &Cow, err: &Box) -> Vec { - // If the value is an empty string, skip format validation. - if let Value::String(s) = got.as_ref() { - if s.is_empty() { - return vec![]; - } - } - - vec![Error { - path: base_path.to_string(), - code: "FORMAT_INVALID".to_string(), - message: format!("Value {} is not a valid {} format", - serde_json::to_string(got.as_ref()).unwrap_or_else(|_| "unknown".to_string()), - want - ), - cause: json!({ "got": got, "want": want, "err": err.to_string() }), - }] -} - -fn handle_property_name_error(base_path: &str, prop: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "INVALID_PROPERTY_NAME".to_string(), - message: format!("Property name '{}' is invalid", prop), - cause: json!({ "prop": prop }), - }] -} - -fn handle_contains_error(base_path: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "CONTAINS_FAILED".to_string(), - message: "No array items match the required schema".to_string(), - cause: json!({}), - }] -} - -fn handle_min_contains_error(base_path: &str, got: &[usize], want: usize) -> Vec { - let message = if got.is_empty() { - format!("At least {} array items must match the schema, but none do", want) - } else { - format!("At least {} array items must match the schema, but only {} do (at positions {})", - want, - got.len(), - got.iter().map(|i| i.to_string()).collect::>().join(", ") - ) - }; - - vec![Error { - path: base_path.to_string(), - code: "MIN_CONTAINS_VIOLATED".to_string(), - message, - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_max_contains_error(base_path: &str, got: &[usize], want: usize) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "MAX_CONTAINS_VIOLATED".to_string(), - message: format!("At most {} array items can match the schema, but {} do (at positions {})", - want, - got.len(), - got.iter().map(|i| i.to_string()).collect::>().join(", ") - ), - cause: json!({ "got": got, "want": want }), - }] -} - -fn handle_content_encoding_error(base_path: &str, want: &str, err: &Box) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "CONTENT_ENCODING_INVALID".to_string(), - message: format!("Content is not valid {} encoding: {}", want, err), - cause: json!({ "want": want, "err": err.to_string() }), - }] -} - -fn handle_content_media_type_error(base_path: &str, want: &str, err: &Box) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "CONTENT_MEDIA_TYPE_INVALID".to_string(), - message: format!("Content is not valid {} media type: {}", want, err), - cause: json!({ "want": want, "err": err.to_string() }), - }] -} - -fn handle_false_schema_error(base_path: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "FALSE_SCHEMA".to_string(), - message: "This schema always fails validation".to_string(), - cause: json!({}), - }] -} - -fn handle_not_error(base_path: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "NOT_VIOLATED".to_string(), - message: "Value matches a schema that it should not match".to_string(), - cause: json!({}), - }] -} - -fn handle_ref_cycle_error(base_path: &str, url: &str, kw_loc1: &str, kw_loc2: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "REFERENCE_CYCLE".to_string(), - message: format!("Reference cycle detected: both '{}' and '{}' resolve to '{}'", kw_loc1, kw_loc2, url), - cause: json!({ "url": url, "kw_loc1": kw_loc1, "kw_loc2": kw_loc2 }), - }] -} - -fn handle_reference_error(base_path: &str, kw: &str, url: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "REFERENCE_FAILED".to_string(), - message: format!("{} reference to '{}' failed validation", kw, url), - cause: json!({ "kw": kw, "url": url }), - }] -} - -fn handle_schema_error(base_path: &str, url: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "SCHEMA_FAILED".to_string(), - message: format!("Schema '{}' validation failed", url), - cause: json!({ "url": url }), - }] -} - -fn handle_content_schema_error(base_path: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "CONTENT_SCHEMA_FAILED".to_string(), - message: "Content schema validation failed".to_string(), - cause: json!({}), - }] -} - -fn handle_group_error(base_path: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "VALIDATION_FAILED".to_string(), - message: "Validation failed".to_string(), - cause: json!({}), - }] -} - -fn handle_all_of_error(base_path: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "ALL_OF_VIOLATED".to_string(), - message: "Value does not match all required schemas".to_string(), - cause: json!({}), - }] -} - -fn handle_any_of_error(base_path: &str) -> Vec { - vec![Error { - path: base_path.to_string(), - code: "ANY_OF_VIOLATED".to_string(), - message: "Value does not match any of the allowed schemas".to_string(), - cause: json!({}), - }] -} - -fn handle_one_of_error(base_path: &str, matched: &Option<(usize, usize)>) -> Vec { - let (message, cause) = match matched { - None => ( - "Value must match exactly one schema, but matches none".to_string(), - json!({ "matched_indices": null }) - ), - Some((i, j)) => ( - format!("Value must match exactly one schema, but matches schemas at positions {} and {}", i, j), - json!({ "matched_indices": [i, j] }) - ), - }; - - vec![Error { - path: base_path.to_string(), - code: "ONE_OF_VIOLATED".to_string(), - message, - cause, - }] -} - -// Formats errors according to DropError structure -fn format_errors(errors: Vec, instance: &Value, schema_id: &str) -> Vec { - // Deduplicate by instance_path and format as DropError - let mut unique_errors: HashMap = HashMap::new(); - for error in errors { - if let Entry::Vacant(entry) = unique_errors.entry(error.path.clone()) { - // Extract the failing value from the instance - let failing_value = extract_value_at_path(instance, &error.path); - entry.insert(json!({ - "code": error.code, - "message": error.message, - "details": { - "path": error.path, - "context": failing_value, - "cause": error.cause, - "schema": schema_id - } - })); - } - } - - unique_errors.into_values().collect() -} - -// Helper function to extract value at a JSON pointer path -fn extract_value_at_path(instance: &Value, path: &str) -> Value { - let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); - let mut current = instance; - - for part in parts { - match current { - Value::Object(map) => { - if let Some(value) = map.get(part) { - current = value; - } else { - return Value::Null; - } - } - Value::Array(arr) => { - if let Ok(index) = part.parse::() { - if let Some(value) = arr.get(index) { - current = value; - } else { - return Value::Null; - } - } else { - return Value::Null; - } - } - _ => return Value::Null, - } - } - - current.clone() -} - -#[pg_extern(strict, parallel_safe)] -fn json_schema_cached(schema_id: &str) -> bool { - let cache = SCHEMA_CACHE.read().unwrap(); - cache.map.contains_key(schema_id) -} - -#[pg_extern(strict)] -fn clear_json_schemas() -> JsonB { let mut cache = SCHEMA_CACHE.write().unwrap(); + let enums_value: Value = enums.0; + let types_value: Value = types.0; + let puncs_value: Value = puncs.0; + *cache = Cache { schemas: Schemas::new(), map: HashMap::new(), }; - JsonB(json!({ "response": "success" })) + + let mut compiler = Compiler::new(); + compiler.enable_format_assertions(); + + let mut errors = Vec::new(); + let mut schemas_to_compile = Vec::new(); + + // Phase 1: Enums + if let Some(enums_array) = enums_value.as_array() { + for enum_row in enums_array { + if let Some(schemas_raw) = enum_row.get("schemas") { + if let Some(schemas_array) = schemas_raw.as_array() { + for schema_def in schemas_array { + if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) { + schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), SchemaType::Enum)); + } + } + } + } + } + } + + // Phase 2: Types + if let Some(types_array) = types_value.as_array() { + for type_row in types_array { + if let Some(schemas_raw) = type_row.get("schemas") { + if let Some(schemas_array) = schemas_raw.as_array() { + for schema_def in schemas_array { + if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) { + schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), SchemaType::Type)); + } + } + } + } + } + } + + // Phase 3: Puncs + if let Some(puncs_array) = puncs_value.as_array() { + for punc_row in puncs_array { + if let Some(punc_obj) = punc_row.as_object() { + if let Some(punc_name) = punc_obj.get("name").and_then(|v| v.as_str()) { + let is_public = punc_obj.get("public").and_then(|v| v.as_bool()).unwrap_or(false); + let punc_schema_type = if is_public { SchemaType::PublicPunc } else { SchemaType::PrivatePunc }; + if let Some(schemas_raw) = punc_obj.get("schemas") { + if let Some(schemas_array) = schemas_raw.as_array() { + for schema_def in schemas_array { + if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) { + let request_schema_id = format!("{}.request", punc_name); + let response_schema_id = format!("{}.response", punc_name); + let schema_type_for_def = if schema_id == request_schema_id || schema_id == response_schema_id { + punc_schema_type + } else { + SchemaType::Type + }; + schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), schema_type_for_def)); + } + } + } + } + } + } + } + } + + // Add all resources to compiler first + for (id, value, schema_type) in &schemas_to_compile { + add_schema_resource(&mut compiler, id, value.clone(), *schema_type, &mut errors); + } + + if !errors.is_empty() { + return JsonB(json!({ "errors": errors })); + } + + // Compile all schemas + compile_all_schemas(&mut compiler, &mut cache, &schemas_to_compile, &mut errors); + + if errors.is_empty() { + JsonB(json!({ "response": "success" })) + } else { + JsonB(json!({ "errors": errors })) + } +} + +// Helper function to add a schema resource (without compiling) +fn add_schema_resource( + compiler: &mut Compiler, + schema_id: &str, + schema_value: Value, + _schema_type: SchemaType, + errors: &mut Vec +) { + if let Err(e) = compiler.add_resource(schema_id, schema_value) { + errors.push(json!({ + "code": "SCHEMA_RESOURCE_FAILED", + "message": format!("Failed to add schema resource '{}'", schema_id), + "details": { "schema": schema_id, "cause": format!("{}", e) } + })); + } +} + +// Helper function to compile all added resources +fn compile_all_schemas( + compiler: &mut Compiler, + cache: &mut Cache, + schemas_to_compile: &[(String, Value, SchemaType)], + errors: &mut Vec, +) { + for (id, value, schema_type) in schemas_to_compile { + match compiler.compile(id, &mut cache.schemas) { + Ok(index) => { + cache.map.insert(id.clone(), Schema { index, t: *schema_type, value: value.clone() }); + } + Err(e) => { + match &e { + CompileError::ValidationError { src, .. } => { + let mut error_list = Vec::new(); + collect_errors(src, &mut error_list); + let formatted_errors = format_errors(error_list, value, id); + errors.extend(formatted_errors); + } + _ => { + errors.push(json!({ + "code": "SCHEMA_COMPILATION_FAILED", + "message": format!("Schema '{}' compilation failed", id), + "details": { "schema": id, "cause": format!("{:?}", e) } + })); + } + }; + } + } + } +} + +fn walk_and_validate_refs( + instance: &Value, + schema: &Value, + cache: &std::sync::RwLockReadGuard, + path_parts: &mut Vec, + type_validated: bool, + top_level_id: Option<&str>, + errors: &mut Vec, +) { + if let Some(ref_url) = schema.get("$ref").and_then(|v| v.as_str()) { + if let Some(s) = cache.map.get(ref_url) { + let mut new_type_validated = type_validated; + if !type_validated && s.t == SchemaType::Type { + let id_to_use = top_level_id.unwrap_or(ref_url); + let expected_type = id_to_use.split('.').next().unwrap_or(id_to_use); + if let Some(actual_type) = instance.get("type").and_then(|v| v.as_str()) { + if actual_type == expected_type { + new_type_validated = true; + } else { + path_parts.push("type".to_string()); + let path = format!("/{}", path_parts.join("/")); + path_parts.pop(); + errors.push(json!({ + "code": "TYPE_MISMATCH", + "message": format!("Instance type '{}' does not match expected type '{}' derived from schema $ref", actual_type, expected_type), + "details": { "path": path, "context": instance, "cause": { "expected": expected_type, "actual": actual_type }, "schema": ref_url } + })); + } + } else { + if top_level_id.is_some() { + let path = if path_parts.is_empty() { "".to_string() } else { format!("/{}", path_parts.join("/")) }; + errors.push(json!({ + "code": "TYPE_MISMATCH", + "message": "Instance is missing 'type' property required for schema validation", + "details": { "path": path, "context": instance, "cause": { "expected": expected_type }, "schema": ref_url } + })); + } + } + } + walk_and_validate_refs(instance, &s.value, cache, path_parts, new_type_validated, None, errors); + } + } + + if let Some(properties) = schema.get("properties").and_then(|v| v.as_object()) { + for (prop_name, prop_schema) in properties { + if let Some(prop_value) = instance.get(prop_name) { + path_parts.push(prop_name.clone()); + walk_and_validate_refs(prop_value, prop_schema, cache, path_parts, type_validated, None, errors); + path_parts.pop(); + } + } + } + + if let Some(items_schema) = schema.get("items") { + if let Some(instance_array) = instance.as_array() { + for (i, item) in instance_array.iter().enumerate() { + path_parts.push(i.to_string()); + walk_and_validate_refs(item, items_schema, cache, path_parts, false, None, errors); + path_parts.pop(); + } + } + } + + if let Some(all_of_array) = schema.get("allOf").and_then(|v| v.as_array()) { + for sub_schema in all_of_array { + walk_and_validate_refs(instance, sub_schema, cache, path_parts, type_validated, None, errors); + } + } + + if let Some(any_of_array) = schema.get("anyOf").and_then(|v| v.as_array()) { + for sub_schema in any_of_array { + walk_and_validate_refs(instance, sub_schema, cache, path_parts, type_validated, None, errors); + } + } + + if let Some(one_of_array) = schema.get("oneOf").and_then(|v| v.as_array()) { + for sub_schema in one_of_array { + walk_and_validate_refs(instance, sub_schema, cache, path_parts, type_validated, None, errors); + } + } + + if let Some(if_schema) = schema.get("if") { + walk_and_validate_refs(instance, if_schema, cache, path_parts, type_validated, None, errors); + } + + if let Some(then_schema) = schema.get("then") { + walk_and_validate_refs(instance, then_schema, cache, path_parts, type_validated, None, errors); + } + + if let Some(else_schema) = schema.get("else") { + walk_and_validate_refs(instance, else_schema, cache, path_parts, type_validated, None, errors); + } + + if let Some(not_schema) = schema.get("not") { + walk_and_validate_refs(instance, not_schema, cache, path_parts, type_validated, None, errors); + } +} + +#[pg_extern(strict, parallel_safe)] +fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { + let cache = SCHEMA_CACHE.read().unwrap(); + match cache.map.get(schema_id) { + None => JsonB(json!({ + "errors": [{ + "code": "SCHEMA_NOT_FOUND", + "message": format!("Schema '{}' not found in cache", schema_id), + "details": { + "schema": schema_id, + "cause": "Schema was not found in bulk cache - ensure cache_json_schemas was called" + } + }] + })), + Some(schema) => { + let instance_value: Value = instance.0; + let options = match schema.t { + SchemaType::Type | SchemaType::PublicPunc => Some(ValidationOptions { be_strict: true }), + _ => None, + }; + + match cache.schemas.validate(&instance_value, schema.index, options.as_ref()) { + Ok(_) => { + let mut custom_errors = Vec::new(); + if schema.t == SchemaType::Type || schema.t == SchemaType::PublicPunc || schema.t == SchemaType::PrivatePunc { + let mut path_parts = vec![]; + let top_level_id = if schema.t == SchemaType::Type { Some(schema_id) } else { None }; + walk_and_validate_refs(&instance_value, &schema.value, &cache, &mut path_parts, false, top_level_id, &mut custom_errors); + } + + if custom_errors.is_empty() { + JsonB(json!({ "response": "success" })) + } else { + JsonB(json!({ "errors": custom_errors })) + } + } + Err(validation_error) => { + let mut error_list = Vec::new(); + collect_errors(&validation_error, &mut error_list); + let errors = format_errors(error_list, &instance_value, schema_id); + if errors.is_empty() { + JsonB(json!({ "response": "success" })) + } else { + JsonB(json!({ "errors": errors })) + } + } + } + } + } +} + +// Recursively collects validation errors +fn collect_errors(error: &ValidationError, errors_list: &mut Vec) { + // Check if this is a structural error that we should skip + let is_structural = matches!( + &error.kind, + ErrorKind::Group | ErrorKind::AllOf | ErrorKind::AnyOf | ErrorKind::Not | ErrorKind::OneOf(_) + ); + + if !error.causes.is_empty() || is_structural { + for cause in &error.causes { + collect_errors(cause, errors_list); + } + return + } + + let base_path = error.instance_location.to_string(); + let errors_to_add = match &error.kind { + ErrorKind::Type { got, want } => handle_type_error(&base_path, got, want), + ErrorKind::Required { want } => handle_required_error(&base_path, want), + ErrorKind::Dependency { prop, missing } => handle_dependency_error(&base_path, prop, missing, false), + ErrorKind::DependentRequired { prop, missing } => handle_dependency_error(&base_path, prop, missing, true), + ErrorKind::AdditionalProperties { got } => handle_additional_properties_error(&base_path, got), + ErrorKind::Enum { want } => handle_enum_error(&base_path, want), + ErrorKind::Const { want } => handle_const_error(&base_path, want), + ErrorKind::MinLength { got, want } => handle_min_length_error(&base_path, *got, *want), + ErrorKind::MaxLength { got, want } => handle_max_length_error(&base_path, *got, *want), + ErrorKind::Pattern { got, want } => handle_pattern_error(&base_path, got, want), + ErrorKind::Minimum { got, want } => handle_minimum_error(&base_path, got, want), + ErrorKind::Maximum { got, want } => handle_maximum_error(&base_path, got, want), + ErrorKind::ExclusiveMinimum { got, want } => handle_exclusive_minimum_error(&base_path, got, want), + ErrorKind::ExclusiveMaximum { got, want } => handle_exclusive_maximum_error(&base_path, got, want), + ErrorKind::MultipleOf { got, want } => handle_multiple_of_error(&base_path, got, want), + ErrorKind::MinItems { got, want } => handle_min_items_error(&base_path, *got, *want), + ErrorKind::MaxItems { got, want } => handle_max_items_error(&base_path, *got, *want), + ErrorKind::UniqueItems { got } => handle_unique_items_error(&base_path, got), + ErrorKind::MinProperties { got, want } => handle_min_properties_error(&base_path, *got, *want), + ErrorKind::MaxProperties { got, want } => handle_max_properties_error(&base_path, *got, *want), + ErrorKind::AdditionalItems { got } => handle_additional_items_error(&base_path, *got), + ErrorKind::Format { want, got, err } => handle_format_error(&base_path, want, got, err), + ErrorKind::PropertyName { prop } => handle_property_name_error(&base_path, prop), + ErrorKind::Contains => handle_contains_error(&base_path), + ErrorKind::MinContains { got, want } => handle_min_contains_error(&base_path, got, *want), + ErrorKind::MaxContains { got, want } => handle_max_contains_error(&base_path, got, *want), + ErrorKind::ContentEncoding { want, err } => handle_content_encoding_error(&base_path, want, err), + ErrorKind::ContentMediaType { want, err, .. } => handle_content_media_type_error(&base_path, want, err), + ErrorKind::FalseSchema => handle_false_schema_error(&base_path), + ErrorKind::Not => handle_not_error(&base_path), + ErrorKind::RefCycle { url, kw_loc1, kw_loc2 } => handle_ref_cycle_error(&base_path, url, kw_loc1, kw_loc2), + ErrorKind::Reference { kw, url } => handle_reference_error(&base_path, kw, url), + ErrorKind::Schema { url } => handle_schema_error(&base_path, url), + ErrorKind::ContentSchema => handle_content_schema_error(&base_path), + ErrorKind::Group => handle_group_error(&base_path), + ErrorKind::AllOf => handle_all_of_error(&base_path), + ErrorKind::AnyOf => handle_any_of_error(&base_path), + ErrorKind::OneOf(matched) => handle_one_of_error(&base_path, matched), + }; + + errors_list.extend(errors_to_add); +} + +// Handler functions for each error kind +fn handle_type_error(base_path: &str, got: &Type, want: &Types) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "TYPE_MISMATCH".to_string(), + message: format!("Expected {} but got {}", + want.iter().map(|t| t.to_string()).collect::>().join(" or "), + got + ), + cause: json!({ + "got": got.to_string(), + "want": want.iter().map(|t| t.to_string()).collect::>() + }), + }] +} + +fn handle_required_error(base_path: &str, want: &[&str]) -> Vec { + // Create a separate error for each missing required field + want.iter().map(|missing_field| { + let field_path = if base_path.is_empty() { + format!("/{}", missing_field) + } else { + format!("{}/{}", base_path, missing_field) + }; + + Error { + path: field_path, + code: "REQUIRED_FIELD_MISSING".to_string(), + message: format!("Required field '{}' is missing", missing_field), + cause: json!({ "want": [missing_field] }), + } + }).collect() +} + +fn handle_dependency_error(base_path: &str, prop: &str, missing: &[&str], is_dependent_required: bool) -> Vec { + // Create a separate error for each missing field + missing.iter().map(|missing_field| { + let field_path = if base_path.is_empty() { + format!("/{}", missing_field) + } else { + format!("{}/{}", base_path, missing_field) + }; + + let (code, message) = if is_dependent_required { + ( + "DEPENDENT_REQUIRED_MISSING".to_string(), + format!("Field '{}' is required when '{}' is present", missing_field, prop), + ) + } else { + ( + "DEPENDENCY_FAILED".to_string(), + format!("Field '{}' is required when '{}' is present", missing_field, prop), + ) + }; + + Error { + path: field_path, + code, + message, + cause: json!({ "prop": prop, "missing": [missing_field] }), + } + }).collect() +} + +fn handle_additional_properties_error(base_path: &str, got: &[Cow]) -> Vec { + let mut errors = Vec::new(); + for extra_prop in got { + let field_path = if base_path.is_empty() { + format!("/{}", extra_prop) + } else { + format!("{}/{}", base_path, extra_prop) + }; + errors.push(Error { + path: field_path, + code: "ADDITIONAL_PROPERTIES_NOT_ALLOWED".to_string(), + message: format!("Property '{}' is not allowed", extra_prop), + cause: json!({ "got": [extra_prop.to_string()] }), + }); + } + errors +} + +fn handle_enum_error(base_path: &str, want: &[Value]) -> Vec { + let message = if want.len() == 1 { + format!("Value must be {}", serde_json::to_string(&want[0]).unwrap_or_else(|_| "unknown".to_string())) + } else { + format!("Value must be one of: {}", + want.iter() + .map(|v| serde_json::to_string(v).unwrap_or_else(|_| "unknown".to_string())) + .collect::>() + .join(", ") + ) + }; + + vec![Error { + path: base_path.to_string(), + code: "ENUM_VIOLATED".to_string(), + message, + cause: json!({ "want": want }), + }] +} + +fn handle_const_error(base_path: &str, want: &Value) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "CONST_VIOLATED".to_string(), + message: format!("Value must be exactly {}", serde_json::to_string(want).unwrap_or_else(|_| "unknown".to_string())), + cause: json!({ "want": want }), + }] +} + +fn handle_min_length_error(base_path: &str, got: usize, want: usize) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "MIN_LENGTH_VIOLATED".to_string(), + message: format!("String length must be at least {} characters, but got {}", want, got), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_max_length_error(base_path: &str, got: usize, want: usize) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "MAX_LENGTH_VIOLATED".to_string(), + message: format!("String length must be at most {} characters, but got {}", want, got), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_pattern_error(base_path: &str, got: &Cow, want: &str) -> Vec { + let display_value = if got.len() > 50 { + format!("{}...", &got[..50]) + } else { + got.to_string() + }; + + vec![Error { + path: base_path.to_string(), + code: "PATTERN_VIOLATED".to_string(), + message: format!("Value '{}' does not match pattern '{}'", display_value, want), + cause: json!({ "got": got.to_string(), "want": want }), + }] +} + +fn handle_minimum_error(base_path: &str, got: &Cow, want: &Number) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "MINIMUM_VIOLATED".to_string(), + message: format!("Value must be at least {}, but got {}", want, got), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_maximum_error(base_path: &str, got: &Cow, want: &Number) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "MAXIMUM_VIOLATED".to_string(), + message: format!("Value must be at most {}, but got {}", want, got), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_exclusive_minimum_error(base_path: &str, got: &Cow, want: &Number) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "EXCLUSIVE_MINIMUM_VIOLATED".to_string(), + message: format!("Value must be greater than {}, but got {}", want, got), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_exclusive_maximum_error(base_path: &str, got: &Cow, want: &Number) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "EXCLUSIVE_MAXIMUM_VIOLATED".to_string(), + message: format!("Value must be less than {}, but got {}", want, got), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_multiple_of_error(base_path: &str, got: &Cow, want: &Number) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "MULTIPLE_OF_VIOLATED".to_string(), + message: format!("{} is not a multiple of {}", got, want), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_min_items_error(base_path: &str, got: usize, want: usize) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "MIN_ITEMS_VIOLATED".to_string(), + message: format!("Array must have at least {} items, but has {}", want, got), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_max_items_error(base_path: &str, got: usize, want: usize) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "MAX_ITEMS_VIOLATED".to_string(), + message: format!("Array must have at most {} items, but has {}", want, got), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_unique_items_error(base_path: &str, got: &[usize; 2]) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "UNIQUE_ITEMS_VIOLATED".to_string(), + message: format!("Array items at positions {} and {} are duplicates", got[0], got[1]), + cause: json!({ "got": got }), + }] +} + +fn handle_min_properties_error(base_path: &str, got: usize, want: usize) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "MIN_PROPERTIES_VIOLATED".to_string(), + message: format!("Object must have at least {} properties, but has {}", want, got), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_max_properties_error(base_path: &str, got: usize, want: usize) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "MAX_PROPERTIES_VIOLATED".to_string(), + message: format!("Object must have at most {} properties, but has {}", want, got), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_additional_items_error(base_path: &str, got: usize) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "ADDITIONAL_ITEMS_NOT_ALLOWED".to_string(), + message: format!("Last {} array items are not allowed", got), + cause: json!({ "got": got }), + }] +} + +fn handle_format_error(base_path: &str, want: &str, got: &Cow, err: &Box) -> Vec { + // If the value is an empty string, skip format validation. + if let Value::String(s) = got.as_ref() { + if s.is_empty() { + return vec![]; + } + } + + vec![Error { + path: base_path.to_string(), + code: "FORMAT_INVALID".to_string(), + message: format!("Value {} is not a valid {} format", + serde_json::to_string(got.as_ref()).unwrap_or_else(|_| "unknown".to_string()), + want + ), + cause: json!({ "got": got, "want": want, "err": err.to_string() }), + }] +} + +fn handle_property_name_error(base_path: &str, prop: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "INVALID_PROPERTY_NAME".to_string(), + message: format!("Property name '{}' is invalid", prop), + cause: json!({ "prop": prop }), + }] +} + +fn handle_contains_error(base_path: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "CONTAINS_FAILED".to_string(), + message: "No array items match the required schema".to_string(), + cause: json!({}), + }] +} + +fn handle_min_contains_error(base_path: &str, got: &[usize], want: usize) -> Vec { + let message = if got.is_empty() { + format!("At least {} array items must match the schema, but none do", want) + } else { + format!("At least {} array items must match the schema, but only {} do (at positions {})", + want, + got.len(), + got.iter().map(|i| i.to_string()).collect::>().join(", ") + ) + }; + + vec![Error { + path: base_path.to_string(), + code: "MIN_CONTAINS_VIOLATED".to_string(), + message, + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_max_contains_error(base_path: &str, got: &[usize], want: usize) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "MAX_CONTAINS_VIOLATED".to_string(), + message: format!("At most {} array items can match the schema, but {} do (at positions {})", + want, + got.len(), + got.iter().map(|i| i.to_string()).collect::>().join(", ") + ), + cause: json!({ "got": got, "want": want }), + }] +} + +fn handle_content_encoding_error(base_path: &str, want: &str, err: &Box) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "CONTENT_ENCODING_INVALID".to_string(), + message: format!("Content is not valid {} encoding: {}", want, err), + cause: json!({ "want": want, "err": err.to_string() }), + }] +} + +fn handle_content_media_type_error(base_path: &str, want: &str, err: &Box) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "CONTENT_MEDIA_TYPE_INVALID".to_string(), + message: format!("Content is not valid {} media type: {}", want, err), + cause: json!({ "want": want, "err": err.to_string() }), + }] +} + +fn handle_false_schema_error(base_path: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "FALSE_SCHEMA".to_string(), + message: "This schema always fails validation".to_string(), + cause: json!({}), + }] +} + +fn handle_not_error(base_path: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "NOT_VIOLATED".to_string(), + message: "Value matches a schema that it should not match".to_string(), + cause: json!({}), + }] +} + +fn handle_ref_cycle_error(base_path: &str, url: &str, kw_loc1: &str, kw_loc2: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "REFERENCE_CYCLE".to_string(), + message: format!("Reference cycle detected: both '{}' and '{}' resolve to '{}'", kw_loc1, kw_loc2, url), + cause: json!({ "url": url, "kw_loc1": kw_loc1, "kw_loc2": kw_loc2 }), + }] +} + +fn handle_reference_error(base_path: &str, kw: &str, url: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "REFERENCE_FAILED".to_string(), + message: format!("{} reference to '{}' failed validation", kw, url), + cause: json!({ "kw": kw, "url": url }), + }] +} + +fn handle_schema_error(base_path: &str, url: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "SCHEMA_FAILED".to_string(), + message: format!("Schema '{}' validation failed", url), + cause: json!({ "url": url }), + }] +} + +fn handle_content_schema_error(base_path: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "CONTENT_SCHEMA_FAILED".to_string(), + message: "Content schema validation failed".to_string(), + cause: json!({}), + }] +} + +fn handle_group_error(base_path: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "VALIDATION_FAILED".to_string(), + message: "Validation failed".to_string(), + cause: json!({}), + }] +} + +fn handle_all_of_error(base_path: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "ALL_OF_VIOLATED".to_string(), + message: "Value does not match all required schemas".to_string(), + cause: json!({}), + }] +} + +fn handle_any_of_error(base_path: &str) -> Vec { + vec![Error { + path: base_path.to_string(), + code: "ANY_OF_VIOLATED".to_string(), + message: "Value does not match any of the allowed schemas".to_string(), + cause: json!({}), + }] +} + +fn handle_one_of_error(base_path: &str, matched: &Option<(usize, usize)>) -> Vec { + let (message, cause) = match matched { + None => ( + "Value must match exactly one schema, but matches none".to_string(), + json!({ "matched_indices": null }) + ), + Some((i, j)) => ( + format!("Value must match exactly one schema, but matches schemas at positions {} and {}", i, j), + json!({ "matched_indices": [i, j] }) + ), + }; + + vec![Error { + path: base_path.to_string(), + code: "ONE_OF_VIOLATED".to_string(), + message, + cause, + }] +} + +// Formats errors according to DropError structure +fn format_errors(errors: Vec, instance: &Value, schema_id: &str) -> Vec { + let mut unique_errors: HashMap = HashMap::new(); + for error in errors { + let error_path = error.path.clone(); + if let Entry::Vacant(entry) = unique_errors.entry(error_path.clone()) { + let failing_value = extract_value_at_path(instance, &error.path); + entry.insert(json!({ + "code": error.code, + "message": error.message, + "details": { + "path": error.path, + "context": failing_value, + "cause": error.cause, + "schema": schema_id + } + })); + } + } + + unique_errors.into_values().collect::>() +} + +// Helper function to extract value at a JSON pointer path +fn extract_value_at_path(instance: &Value, path: &str) -> Value { + let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + let mut current = instance; + + for part in parts { + match current { + Value::Object(map) => { + if let Some(value) = map.get(part) { + current = value; + } else { + return Value::Null; + } + } + Value::Array(arr) => { + if let Ok(index) = part.parse::() { + if let Some(value) = arr.get(index) { + current = value; + } else { + return Value::Null; + } + } else { + return Value::Null; + } + } + _ => return Value::Null, + } + } + + current.clone() +} + +#[pg_extern(strict, parallel_safe)] +fn json_schema_cached(schema_id: &str) -> bool { + let cache = SCHEMA_CACHE.read().unwrap(); + cache.map.contains_key(schema_id) +} + +#[pg_extern(strict)] +fn clear_json_schemas() -> JsonB { + let mut cache = SCHEMA_CACHE.write().unwrap(); + *cache = Cache { + schemas: Schemas::new(), + map: HashMap::new(), + }; + JsonB(json!({ "response": "success" })) } #[pg_extern(strict, parallel_safe)] fn show_json_schemas() -> JsonB { - let cache = SCHEMA_CACHE.read().unwrap(); - let ids: Vec = cache.map.keys().cloned().collect(); - JsonB(json!({ "response": ids })) + let cache = SCHEMA_CACHE.read().unwrap(); + let ids: Vec = cache.map.keys().cloned().collect(); + JsonB(json!({ "response": ids })) } /// This module is required by `cargo pgrx test` invocations. /// It must be visible at the root of your extension crate. #[cfg(test)] pub mod pg_test { - pub fn setup(_options: Vec<&str>) { - // perform one-off initialization when the pg_test framework starts - } + pub fn setup(_options: Vec<&str>) { + // perform one-off initialization when the pg_test framework starts + } - #[must_use] - pub fn postgresql_conf_options() -> Vec<&'static str> { - // return any postgresql.conf settings that are required for your tests - vec![] - } + #[must_use] + pub fn postgresql_conf_options() -> Vec<&'static str> { + // return any postgresql.conf settings that are required for your tests + vec![] + } } #[cfg(any(test, feature = "pg_test"))] mod helpers { - include!("helpers.rs"); + include!("helpers.rs"); } #[cfg(any(test, feature = "pg_test"))] mod schemas { - include!("schemas.rs"); + include!("schemas.rs"); } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { - include!("tests.rs"); -} + include!("tests.rs"); +} \ No newline at end of file diff --git a/src/schemas.rs b/src/schemas.rs index d467081..56a2aa3 100644 --- a/src/schemas.rs +++ b/src/schemas.rs @@ -432,7 +432,8 @@ pub fn property_merging_schemas() -> JsonB { "type": "object", "properties": { "id": { "type": "string" }, - "name": { "type": "string" } + "name": { "type": "string" }, + "type": { "type": "string" } }, "required": ["id"] }] @@ -744,7 +745,8 @@ pub fn title_override_schemas() -> JsonB { "type": "object", "title": "Base Title", "properties": { - "name": { "type": "string" } + "name": { "type": "string" }, + "type": { "type": "string" } }, "required": ["name"] }] diff --git a/src/tests.rs b/src/tests.rs index fc28daf..9d58566 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -169,7 +169,7 @@ fn test_validate_strict() { let result_basic_invalid = validate_json_schema("basic_strict_test.request", jsonb(invalid_basic.clone())); assert_error_count(&result_basic_invalid, 1); - assert_has_error(&result_basic_invalid, "FALSE_SCHEMA", "/extra"); + assert_has_error(&result_basic_invalid, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra"); // Test 2: Non-strict validation - extra properties should pass let result_non_strict = validate_json_schema("non_strict_test.request", jsonb(invalid_basic.clone())); @@ -190,8 +190,8 @@ fn test_validate_strict() { let result_nested_invalid = validate_json_schema("nested_strict_test.request", jsonb(invalid_nested)); assert_error_count(&result_nested_invalid, 2); - assert_has_error(&result_nested_invalid, "FALSE_SCHEMA", "/user/extra"); - assert_has_error(&result_nested_invalid, "FALSE_SCHEMA", "/items/0/extra"); + assert_has_error(&result_nested_invalid, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/user/extra"); + assert_has_error(&result_nested_invalid, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/items/0/extra"); // Test 4: Schema with unevaluatedProperties already set - should allow extras let result_already_unevaluated = validate_json_schema("already_unevaluated_test.request", jsonb(invalid_basic.clone())); @@ -218,7 +218,7 @@ fn test_validate_strict() { let result_conditional_invalid = validate_json_schema("conditional_strict_test.request", jsonb(invalid_conditional)); assert_error_count(&result_conditional_invalid, 1); - assert_has_error(&result_conditional_invalid, "FALSE_SCHEMA", "/extra"); + assert_has_error(&result_conditional_invalid, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra"); } #[pg_test] @@ -412,17 +412,17 @@ fn test_validate_unevaluated_properties() { let result = validate_json_schema("simple_unevaluated_test.request", jsonb(instance_uneval)); - // Should get 3 separate FALSE_SCHEMA errors, one for each unevaluated property + // Should get 3 separate ADDITIONAL_PROPERTIES_NOT_ALLOWED errors, one for each unevaluated property assert_error_count(&result, 3); - // Verify all errors are FALSE_SCHEMA and check paths - assert_has_error(&result, "FALSE_SCHEMA", "/extra1"); - assert_has_error(&result, "FALSE_SCHEMA", "/extra2"); - assert_has_error(&result, "FALSE_SCHEMA", "/extra3"); + // Verify all errors are ADDITIONAL_PROPERTIES_NOT_ALLOWED and check paths + assert_has_error(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra1"); + assert_has_error(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra2"); + assert_has_error(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra3"); // Verify error messages - let extra1_error = find_error_with_code_and_path(&result, "FALSE_SCHEMA", "/extra1"); - assert_error_message_contains(extra1_error, "This schema always fails validation"); + let extra1_error = find_error_with_code_and_path(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra1"); + assert_error_message_contains(extra1_error, "Property 'extra1' is not allowed"); // Test 2: Complex schema with allOf and unevaluatedProperties (already in comprehensive setup) @@ -437,10 +437,10 @@ fn test_validate_unevaluated_properties() { let complex_result = validate_json_schema("conditional_unevaluated_test.request", jsonb(complex_instance)); - // Should get 2 FALSE_SCHEMA errors for unevaluated properties + // Should get 2 ADDITIONAL_PROPERTIES_NOT_ALLOWED errors for unevaluated properties assert_error_count(&complex_result, 2); - assert_has_error(&complex_result, "FALSE_SCHEMA", "/nickname"); - assert_has_error(&complex_result, "FALSE_SCHEMA", "/title"); + assert_has_error(&complex_result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/nickname"); + assert_has_error(&complex_result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/title"); // Test 3: Valid instance with all properties evaluated let valid_instance = json!({ @@ -643,8 +643,8 @@ fn test_validate_punc_with_refs() { let result_public_root = validate_json_schema("public_ref_test.request", jsonb(public_root_extra)); assert_error_count(&result_public_root, 2); - assert_has_error(&result_public_root, "FALSE_SCHEMA", "/extra_field"); - assert_has_error(&result_public_root, "FALSE_SCHEMA", "/another_extra"); + assert_has_error(&result_public_root, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra_field"); + assert_has_error(&result_public_root, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/another_extra"); // Test 2: Private punc allows extra properties at root level let private_root_extra = json!({ @@ -678,24 +678,6 @@ fn test_validate_punc_with_refs() { let result_private_valid = validate_json_schema("private_ref_test.request", jsonb(valid_data_with_address)); assert_success(&result_private_valid); - - // Test 4: Extra properties in nested address should fail for BOTH puncs (types are always strict) - let address_with_extra = json!({ - "type": "person", - "id": "550e8400-e29b-41d4-a716-446655440000", - "name": "John Doe", - "first_name": "John", - "last_name": "Doe", - "address": { - "street": "123 Main St", - "city": "Boston", - "country": "USA" // Should fail - extra property in address - } - }); - - let result_private_address = validate_json_schema("private_ref_test.request", jsonb(address_with_extra)); - assert_error_count(&result_private_address, 1); - assert_has_error(&result_private_address, "FALSE_SCHEMA", "/address/country"); } #[pg_test] diff --git a/validator/Cargo.toml b/validator/Cargo.toml index 66d8bbb..dadff64 100644 --- a/validator/Cargo.toml +++ b/validator/Cargo.toml @@ -12,6 +12,7 @@ categories = ["web-programming"] exclude = [ "tests", ".github", ".gitmodules" ] [dependencies] +pgrx = "0.15.0" serde = "1" serde_json = "1" regex = "1.10.3" @@ -26,6 +27,7 @@ ahash = "0.8.3" appendlist = "1.4" [dev-dependencies] +pgrx-tests = "0.15.0" serde = { version = "1.0", features = ["derive"] } serde_yaml = "0.9" ureq = "2.12" diff --git a/validator/src/compiler.rs b/validator/src/compiler.rs index ca30eee..9bc3ac9 100644 --- a/validator/src/compiler.rs +++ b/validator/src/compiler.rs @@ -10,976 +10,976 @@ use crate::{content::*, draft::*, ecma, formats::*, root::*, roots::*, util::*, #[non_exhaustive] #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Draft { - /// Draft for `http://json-schema.org/draft-04/schema` - V4, - /// Draft for `http://json-schema.org/draft-06/schema` - V6, - /// Draft for `http://json-schema.org/draft-07/schema` - V7, - /// Draft for `https://json-schema.org/draft/2019-09/schema` - V2019_09, - /// Draft for `https://json-schema.org/draft/2020-12/schema` - V2020_12, + /// Draft for `http://json-schema.org/draft-04/schema` + V4, + /// Draft for `http://json-schema.org/draft-06/schema` + V6, + /// Draft for `http://json-schema.org/draft-07/schema` + V7, + /// Draft for `https://json-schema.org/draft/2019-09/schema` + V2019_09, + /// Draft for `https://json-schema.org/draft/2020-12/schema` + V2020_12, } impl Draft { - /** - Get [`Draft`] for given `url` + /** + Get [`Draft`] for given `url` - # Arguments + # Arguments - * `url` - accepts both `http` and `https` and ignores any fragments in url + * `url` - accepts both `http` and `https` and ignores any fragments in url - # Examples + # Examples - ``` - # use boon::*; - assert_eq!(Draft::from_url("https://json-schema.org/draft/2020-12/schema"), Some(Draft::V2020_12)); - assert_eq!(Draft::from_url("http://json-schema.org/draft-07/schema#"), Some(Draft::V7)); - ``` - */ - pub fn from_url(url: &str) -> Option { - match crate::draft::Draft::from_url(url) { - Some(draft) => match draft.version { - 4 => Some(Draft::V4), - 6 => Some(Draft::V6), - 7 => Some(Draft::V7), - 2019 => Some(Draft::V2019_09), - 2020 => Some(Draft::V2020_12), - _ => None, - }, - None => None, - } + ``` + # use boon::*; + assert_eq!(Draft::from_url("https://json-schema.org/draft/2020-12/schema"), Some(Draft::V2020_12)); + assert_eq!(Draft::from_url("http://json-schema.org/draft-07/schema#"), Some(Draft::V7)); + ``` + */ + pub fn from_url(url: &str) -> Option { + match crate::draft::Draft::from_url(url) { + Some(draft) => match draft.version { + 4 => Some(Draft::V4), + 6 => Some(Draft::V6), + 7 => Some(Draft::V7), + 2019 => Some(Draft::V2019_09), + 2020 => Some(Draft::V2020_12), + _ => None, + }, + None => None, } + } - pub(crate) fn internal(&self) -> &'static crate::draft::Draft { - match self { - Draft::V4 => &DRAFT4, - Draft::V6 => &DRAFT6, - Draft::V7 => &DRAFT7, - Draft::V2019_09 => &DRAFT2019, - Draft::V2020_12 => &DRAFT2020, - } + pub(crate) fn internal(&self) -> &'static crate::draft::Draft { + match self { + Draft::V4 => &DRAFT4, + Draft::V6 => &DRAFT6, + Draft::V7 => &DRAFT7, + Draft::V2019_09 => &DRAFT2019, + Draft::V2020_12 => &DRAFT2020, } + } } /// Returns latest draft supported impl Default for Draft { - fn default() -> Self { - Draft::V2020_12 - } + fn default() -> Self { + Draft::V2020_12 + } } /// JsonSchema compiler. #[derive(Default)] pub struct Compiler { - roots: Roots, - assert_format: bool, - assert_content: bool, - formats: HashMap<&'static str, Format>, - decoders: HashMap<&'static str, Decoder>, - media_types: HashMap<&'static str, MediaType>, + roots: Roots, + assert_format: bool, + assert_content: bool, + formats: HashMap<&'static str, Format>, + decoders: HashMap<&'static str, Decoder>, + media_types: HashMap<&'static str, MediaType>, } impl Compiler { - pub fn new() -> Self { - Self::default() + pub fn new() -> Self { + Self::default() + } + + /** + Overrides the draft used to compile schemas without + explicit `$schema` field. + + By default this library uses latest draft supported. + + The use of this option is HIGHLY encouraged to ensure + continued correct operation of your schema. The current + default value will not stay the same over time. + */ + pub fn set_default_draft(&mut self, d: Draft) { + self.roots.default_draft = d.internal() + } + + /** + Always enable format assertions. + + # Default Behavior + + - for draft-07 and earlier: enabled + - for draft/2019-09: disabled, unless + metaschema says `format` vocabulary is required + - for draft/2020-12: disabled, unless + metaschema says `format-assertion` vocabulary is required + */ + pub fn enable_format_assertions(&mut self) { + self.assert_format = true; + } + + /** + Always enable content assertions. + + content assertions include keywords: + - contentEncoding + - contentMediaType + - contentSchema + + Default Behavior is always disabled. + */ + pub fn enable_content_assertions(&mut self) { + self.assert_content = true; + } + + /// Overrides default [`UrlLoader`] used to load schema resources + pub fn use_loader(&mut self, url_loader: Box) { + self.roots.loader.use_loader(url_loader); + } + + /** + Registers custom `format` + + # Note + + - `regex` format cannot be overridden + - format assertions are disabled for draft >= 2019-09. + see [`Compiler::enable_format_assertions`] + */ + pub fn register_format(&mut self, format: Format) { + if format.name != "regex" { + self.formats.insert(format.name, format); + } + } + + /** + Registers custom `contentEncoding` + + Note that content assertions are disabled by default. + see [`Compiler::enable_content_assertions`] + */ + pub fn register_content_encoding(&mut self, decoder: Decoder) { + self.decoders.insert(decoder.name, decoder); + } + + /** + Registers custom `contentMediaType` + + Note that content assertions are disabled by default. + see [`Compiler::enable_content_assertions`] + */ + pub fn register_content_media_type(&mut self, media_type: MediaType) { + self.media_types.insert(media_type.name, media_type); + } + + /** + Adds schema resource which used later in reference resoltion + If you do not know which schema resources required, then use [`UrlLoader`]. + + The argument `loc` can be file path or url. any fragment in `loc` is ignored. + + # Errors + + returns [`CompileError`] if url parsing failed. + */ + pub fn add_resource(&mut self, loc: &str, json: Value) -> Result<(), CompileError> { + let uf = UrlFrag::absolute(loc)?; + self.roots.loader.add_doc(uf.url, json); + Ok(()) + } + + /** + Compile given `loc` into `target` and return an identifier to the compiled + schema. + + the argument `loc` can be file path or url with optional fragment. + examples: `http://example.com/schema.json#/defs/address`, + `samples/schema_file.json#defs/address` + + if `loc` is already compiled, it simply returns the same [`SchemaIndex`] + */ + pub fn compile( + &mut self, + loc: &str, + target: &mut Schemas, + ) -> Result { + let uf = UrlFrag::absolute(loc)?; + // resolve anchor + let up = self.roots.resolve_fragment(uf)?; + + let result = self.do_compile(up, target); + if let Err(bug @ CompileError::Bug(_)) = &result { + debug_assert!(false, "{bug}"); + } + result + } + + fn do_compile( + &mut self, + up: UrlPtr, + target: &mut Schemas, + ) -> Result { + let mut queue = Queue::new(); + let mut compiled = Vec::new(); + + let index = queue.enqueue_schema(target, up); + if queue.schemas.is_empty() { + // already got compiled + return Ok(index); } - /** - Overrides the draft used to compile schemas without - explicit `$schema` field. - - By default this library uses latest draft supported. - - The use of this option is HIGHLY encouraged to ensure - continued correct operation of your schema. The current - default value will not stay the same over time. - */ - pub fn set_default_draft(&mut self, d: Draft) { - self.roots.default_draft = d.internal() + while queue.schemas.len() > compiled.len() { + let up = &queue.schemas[compiled.len()]; + self.roots.ensure_subschema(up)?; + let Some(root) = self.roots.get(&up.url) else { + return Err(CompileError::Bug("or_load didn't add".into())); + }; + let doc = self.roots.loader.load(&root.url)?; + let v = up.lookup(doc)?; + let sch = self.compile_value(target, v, &up.clone(), root, &mut queue)?; + compiled.push(sch); + self.roots.insert(&mut queue.roots); } - /** - Always enable format assertions. + target.insert(queue.schemas, compiled); + Ok(index) + } - # Default Behavior + fn compile_value( + &self, + schemas: &Schemas, + v: &Value, + up: &UrlPtr, + root: &Root, + queue: &mut Queue, + ) -> Result { + let mut s = Schema::new(up.to_string()); + s.draft_version = root.draft.version; - - for draft-07 and earlier: enabled - - for draft/2019-09: disabled, unless - metaschema says `format` vocabulary is required - - for draft/2020-12: disabled, unless - metaschema says `format-assertion` vocabulary is required - */ - pub fn enable_format_assertions(&mut self) { - self.assert_format = true; - } + // we know it is already in queue, we just want to get its index + let len = queue.schemas.len(); + s.idx = queue.enqueue_schema(schemas, up.to_owned()); + debug_assert_eq!(queue.schemas.len(), len, "{up} should already be in queue"); - /** - Always enable content assertions. + s.resource = { + let base = UrlPtr { + url: up.url.clone(), + ptr: root.resource(&up.ptr).ptr.clone(), + }; + queue.enqueue_schema(schemas, base) + }; - content assertions include keywords: - - contentEncoding - - contentMediaType - - contentSchema - - Default Behavior is always disabled. - */ - pub fn enable_content_assertions(&mut self) { - self.assert_content = true; - } - - /// Overrides default [`UrlLoader`] used to load schema resources - pub fn use_loader(&mut self, url_loader: Box) { - self.roots.loader.use_loader(url_loader); - } - - /** - Registers custom `format` - - # Note - - - `regex` format cannot be overridden - - format assertions are disabled for draft >= 2019-09. - see [`Compiler::enable_format_assertions`] - */ - pub fn register_format(&mut self, format: Format) { - if format.name != "regex" { - self.formats.insert(format.name, format); + // if resource, enqueue dynamicAnchors for compilation + if s.idx == s.resource && root.draft.version >= 2020 { + let res = root.resource(&up.ptr); + for (anchor, anchor_ptr) in &res.anchors { + if res.dynamic_anchors.contains(anchor) { + let up = UrlPtr { + url: up.url.clone(), + ptr: anchor_ptr.clone(), + }; + let danchor_sch = queue.enqueue_schema(schemas, up); + s.dynamic_anchors.insert(anchor.to_string(), danchor_sch); } + } } - /** - Registers custom `contentEncoding` - - Note that content assertions are disabled by default. - see [`Compiler::enable_content_assertions`] - */ - pub fn register_content_encoding(&mut self, decoder: Decoder) { - self.decoders.insert(decoder.name, decoder); - } - - /** - Registers custom `contentMediaType` - - Note that content assertions are disabled by default. - see [`Compiler::enable_content_assertions`] - */ - pub fn register_content_media_type(&mut self, media_type: MediaType) { - self.media_types.insert(media_type.name, media_type); - } - - /** - Adds schema resource which used later in reference resoltion - If you do not know which schema resources required, then use [`UrlLoader`]. - - The argument `loc` can be file path or url. any fragment in `loc` is ignored. - - # Errors - - returns [`CompileError`] if url parsing failed. - */ - pub fn add_resource(&mut self, loc: &str, json: Value) -> Result<(), CompileError> { - let uf = UrlFrag::absolute(loc)?; - self.roots.loader.add_doc(uf.url, json); - Ok(()) - } - - /** - Compile given `loc` into `target` and return an identifier to the compiled - schema. - - the argument `loc` can be file path or url with optional fragment. - examples: `http://example.com/schema.json#/defs/address`, - `samples/schema_file.json#defs/address` - - if `loc` is already compiled, it simply returns the same [`SchemaIndex`] - */ - pub fn compile( - &mut self, - loc: &str, - target: &mut Schemas, - ) -> Result { - let uf = UrlFrag::absolute(loc)?; - // resolve anchor - let up = self.roots.resolve_fragment(uf)?; - - let result = self.do_compile(up, target); - if let Err(bug @ CompileError::Bug(_)) = &result { - debug_assert!(false, "{bug}"); - } - result - } - - fn do_compile( - &mut self, - up: UrlPtr, - target: &mut Schemas, - ) -> Result { - let mut queue = Queue::new(); - let mut compiled = Vec::new(); - - let index = queue.enqueue_schema(target, up); - if queue.schemas.is_empty() { - // already got compiled - return Ok(index); - } - - while queue.schemas.len() > compiled.len() { - let up = &queue.schemas[compiled.len()]; - self.roots.ensure_subschema(up)?; - let Some(root) = self.roots.get(&up.url) else { - return Err(CompileError::Bug("or_load didn't add".into())); - }; - let doc = self.roots.loader.load(&root.url)?; - let v = up.lookup(doc)?; - let sch = self.compile_value(target, v, &up.clone(), root, &mut queue)?; - compiled.push(sch); - self.roots.insert(&mut queue.roots); - } - - target.insert(queue.schemas, compiled); - Ok(index) - } - - fn compile_value( - &self, - schemas: &Schemas, - v: &Value, - up: &UrlPtr, - root: &Root, - queue: &mut Queue, - ) -> Result { - let mut s = Schema::new(up.to_string()); - s.draft_version = root.draft.version; - - // we know it is already in queue, we just want to get its index - let len = queue.schemas.len(); - s.idx = queue.enqueue_schema(schemas, up.to_owned()); - debug_assert_eq!(queue.schemas.len(), len, "{up} should already be in queue"); - - s.resource = { - let base = UrlPtr { - url: up.url.clone(), - ptr: root.resource(&up.ptr).ptr.clone(), - }; - queue.enqueue_schema(schemas, base) - }; - - // if resource, enqueue dynamicAnchors for compilation - if s.idx == s.resource && root.draft.version >= 2020 { - let res = root.resource(&up.ptr); - for (anchor, anchor_ptr) in &res.anchors { - if res.dynamic_anchors.contains(anchor) { - let up = UrlPtr { - url: up.url.clone(), - ptr: anchor_ptr.clone(), - }; - let danchor_sch = queue.enqueue_schema(schemas, up); - s.dynamic_anchors.insert(anchor.to_string(), danchor_sch); - } - } - } - - match v { - Value::Object(obj) => { - if obj.is_empty() { - s.boolean = Some(true); - } else { - ObjCompiler { - c: self, - obj, - up, - schemas, - root, - queue, - } - .compile_obj(&mut s)?; - } - } - Value::Bool(b) => s.boolean = Some(*b), - _ => {} - } - - s.all_props_evaluated = s.additional_properties.is_some(); - s.all_items_evaluated = if s.draft_version < 2020 { - s.additional_items.is_some() || matches!(s.items, Some(Items::SchemaRef(_))) + match v { + Value::Object(obj) => { + if obj.is_empty() { + s.boolean = Some(true); } else { - s.items2020.is_some() - }; - s.num_items_evaluated = if let Some(Items::SchemaRefs(list)) = &s.items { - list.len() - } else { - s.prefix_items.len() - }; - - Ok(s) + ObjCompiler { + c: self, + obj, + up, + schemas, + root, + queue, + } + .compile_obj(&mut s)?; + } + } + Value::Bool(b) => s.boolean = Some(*b), + _ => {} } + + s.all_props_evaluated = s.additional_properties.is_some(); + s.all_items_evaluated = if s.draft_version < 2020 { + s.additional_items.is_some() || matches!(s.items, Some(Items::SchemaRef(_))) + } else { + s.items2020.is_some() + }; + s.num_items_evaluated = if let Some(Items::SchemaRefs(list)) = &s.items { + list.len() + } else { + s.prefix_items.len() + }; + + Ok(s) + } } struct ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> { - c: &'c Compiler, - obj: &'v Map, - up: &'l UrlPtr, - schemas: &'s Schemas, - root: &'r Root, - queue: &'q mut Queue, + c: &'c Compiler, + obj: &'v Map, + up: &'l UrlPtr, + schemas: &'s Schemas, + root: &'r Root, + queue: &'q mut Queue, } // compile supported drafts impl ObjCompiler<'_, '_, '_, '_, '_, '_> { - fn compile_obj(&mut self, s: &mut Schema) -> Result<(), CompileError> { - self.compile_draft4(s)?; - if self.draft_version() >= 6 { - self.compile_draft6(s)?; - } - if self.draft_version() >= 7 { - self.compile_draft7(s)?; - } - if self.draft_version() >= 2019 { - self.compile_draft2019(s)?; - } - if self.draft_version() >= 2020 { - self.compile_draft2020(s)?; - } - Ok(()) + fn compile_obj(&mut self, s: &mut Schema) -> Result<(), CompileError> { + self.compile_draft4(s)?; + if self.draft_version() >= 6 { + self.compile_draft6(s)?; + } + if self.draft_version() >= 7 { + self.compile_draft7(s)?; + } + if self.draft_version() >= 2019 { + self.compile_draft2019(s)?; + } + if self.draft_version() >= 2020 { + self.compile_draft2020(s)?; + } + Ok(()) + } + + fn compile_draft4(&mut self, s: &mut Schema) -> Result<(), CompileError> { + if self.has_vocab("core") { + s.ref_ = self.enqueue_ref("$ref")?; + if s.ref_.is_some() && self.draft_version() < 2019 { + // All other properties in a "$ref" object MUST be ignored + return Ok(()); + } } - fn compile_draft4(&mut self, s: &mut Schema) -> Result<(), CompileError> { - if self.has_vocab("core") { - s.ref_ = self.enqueue_ref("$ref")?; - if s.ref_.is_some() && self.draft_version() < 2019 { - // All other properties in a "$ref" object MUST be ignored - return Ok(()); - } + if self.has_vocab("applicator") { + s.all_of = self.enqueue_arr("allOf"); + s.any_of = self.enqueue_arr("anyOf"); + s.one_of = self.enqueue_arr("oneOf"); + s.not = self.enqueue_prop("not"); + + if self.draft_version() < 2020 { + match self.value("items") { + Some(Value::Array(_)) => { + s.items = Some(Items::SchemaRefs(self.enqueue_arr("items"))); + s.additional_items = self.enquue_additional("additionalItems"); + } + _ => s.items = self.enqueue_prop("items").map(Items::SchemaRef), } + } - if self.has_vocab("applicator") { - s.all_of = self.enqueue_arr("allOf"); - s.any_of = self.enqueue_arr("anyOf"); - s.one_of = self.enqueue_arr("oneOf"); - s.not = self.enqueue_prop("not"); + s.properties = self.enqueue_map("properties"); + s.pattern_properties = { + let mut v = vec![]; + if let Some(Value::Object(obj)) = self.value("patternProperties") { + for pname in obj.keys() { + let ecma = + ecma::convert(pname).map_err(|src| CompileError::InvalidRegex { + url: self.up.format("patternProperties"), + regex: pname.to_owned(), + src, + })?; + let regex = + Regex::new(ecma.as_ref()).map_err(|e| CompileError::InvalidRegex { + url: self.up.format("patternProperties"), + regex: ecma.into_owned(), + src: e.into(), + })?; + let ptr = self.up.ptr.append2("patternProperties", pname); + let sch = self.enqueue_schema(ptr); + v.push((regex, sch)); + } + } + v + }; - if self.draft_version() < 2020 { - match self.value("items") { - Some(Value::Array(_)) => { - s.items = Some(Items::SchemaRefs(self.enqueue_arr("items"))); - s.additional_items = self.enquue_additional("additionalItems"); - } - _ => s.items = self.enqueue_prop("items").map(Items::SchemaRef), - } - } + s.additional_properties = self.enquue_additional("additionalProperties"); - s.properties = self.enqueue_map("properties"); - s.pattern_properties = { - let mut v = vec![]; - if let Some(Value::Object(obj)) = self.value("patternProperties") { - for pname in obj.keys() { - let ecma = - ecma::convert(pname).map_err(|src| CompileError::InvalidRegex { - url: self.up.format("patternProperties"), - regex: pname.to_owned(), - src, - })?; - let regex = - Regex::new(ecma.as_ref()).map_err(|e| CompileError::InvalidRegex { - url: self.up.format("patternProperties"), - regex: ecma.into_owned(), - src: e.into(), - })?; - let ptr = self.up.ptr.append2("patternProperties", pname); - let sch = self.enqueue_schema(ptr); - v.push((regex, sch)); - } - } - v + if let Some(Value::Object(deps)) = self.value("dependencies") { + s.dependencies = deps + .iter() + .filter_map(|(k, v)| { + let v = match v { + Value::Array(_) => Some(Dependency::Props(to_strings(v))), + _ => { + let ptr = self.up.ptr.append2("dependencies", k); + Some(Dependency::SchemaRef(self.enqueue_schema(ptr))) + } }; - - s.additional_properties = self.enquue_additional("additionalProperties"); - - if let Some(Value::Object(deps)) = self.value("dependencies") { - s.dependencies = deps - .iter() - .filter_map(|(k, v)| { - let v = match v { - Value::Array(_) => Some(Dependency::Props(to_strings(v))), - _ => { - let ptr = self.up.ptr.append2("dependencies", k); - Some(Dependency::SchemaRef(self.enqueue_schema(ptr))) - } - }; - v.map(|v| (k.clone(), v)) - }) - .collect(); - } - } - - if self.has_vocab("validation") { - match self.value("type") { - Some(Value::String(t)) => { - if let Some(t) = Type::from_str(t) { - s.types.add(t) - } - } - Some(Value::Array(arr)) => { - for t in arr { - if let Value::String(t) = t { - if let Some(t) = Type::from_str(t) { - s.types.add(t) - } - } - } - } - _ => {} - } - - if let Some(Value::Array(e)) = self.value("enum") { - let mut types = Types::default(); - for item in e { - types.add(Type::of(item)); - } - s.enum_ = Some(Enum { - types, - values: e.clone(), - }); - } - - s.multiple_of = self.num("multipleOf"); - - s.maximum = self.num("maximum"); - if let Some(Value::Bool(exclusive)) = self.value("exclusiveMaximum") { - if *exclusive { - s.exclusive_maximum = s.maximum.take(); - } - } else { - s.exclusive_maximum = self.num("exclusiveMaximum"); - } - - s.minimum = self.num("minimum"); - if let Some(Value::Bool(exclusive)) = self.value("exclusiveMinimum") { - if *exclusive { - s.exclusive_minimum = s.minimum.take(); - } - } else { - s.exclusive_minimum = self.num("exclusiveMinimum"); - } - - s.max_length = self.usize("maxLength"); - s.min_length = self.usize("minLength"); - - if let Some(Value::String(p)) = self.value("pattern") { - let p = ecma::convert(p).map_err(CompileError::Bug)?; - s.pattern = Some(Regex::new(p.as_ref()).map_err(|e| CompileError::Bug(e.into()))?); - } - - s.max_items = self.usize("maxItems"); - s.min_items = self.usize("minItems"); - s.unique_items = self.bool("uniqueItems"); - - s.max_properties = self.usize("maxProperties"); - s.min_properties = self.usize("minProperties"); - - if let Some(req) = self.value("required") { - s.required = to_strings(req); - } - } - - // format -- - if self.c.assert_format - || self.has_vocab(match self.draft_version().cmp(&2019) { - Ordering::Less => "core", - Ordering::Equal => "format", - Ordering::Greater => "format-assertion", - }) - { - if let Some(Value::String(format)) = self.value("format") { - s.format = self - .c - .formats - .get(format.as_str()) - .or_else(|| FORMATS.get(format.as_str())) - .cloned(); - } - } - - Ok(()) + v.map(|v| (k.clone(), v)) + }) + .collect(); + } } - fn compile_draft6(&mut self, s: &mut Schema) -> Result<(), CompileError> { - if self.has_vocab("applicator") { - s.contains = self.enqueue_prop("contains"); - s.property_names = self.enqueue_prop("propertyNames"); + if self.has_vocab("validation") { + match self.value("type") { + Some(Value::String(t)) => { + if let Some(t) = Type::from_str(t) { + s.types.add(t) + } } - - if self.has_vocab("validation") { - s.constant = self.value("const").cloned(); + Some(Value::Array(arr)) => { + for t in arr { + if let Value::String(t) = t { + if let Some(t) = Type::from_str(t) { + s.types.add(t) + } + } + } } + _ => {} + } - Ok(()) + if let Some(Value::Array(e)) = self.value("enum") { + let mut types = Types::default(); + for item in e { + types.add(Type::of(item)); + } + s.enum_ = Some(Enum { + types, + values: e.clone(), + }); + } + + s.multiple_of = self.num("multipleOf"); + + s.maximum = self.num("maximum"); + if let Some(Value::Bool(exclusive)) = self.value("exclusiveMaximum") { + if *exclusive { + s.exclusive_maximum = s.maximum.take(); + } + } else { + s.exclusive_maximum = self.num("exclusiveMaximum"); + } + + s.minimum = self.num("minimum"); + if let Some(Value::Bool(exclusive)) = self.value("exclusiveMinimum") { + if *exclusive { + s.exclusive_minimum = s.minimum.take(); + } + } else { + s.exclusive_minimum = self.num("exclusiveMinimum"); + } + + s.max_length = self.usize("maxLength"); + s.min_length = self.usize("minLength"); + + if let Some(Value::String(p)) = self.value("pattern") { + let p = ecma::convert(p).map_err(CompileError::Bug)?; + s.pattern = Some(Regex::new(p.as_ref()).map_err(|e| CompileError::Bug(e.into()))?); + } + + s.max_items = self.usize("maxItems"); + s.min_items = self.usize("minItems"); + s.unique_items = self.bool("uniqueItems"); + + s.max_properties = self.usize("maxProperties"); + s.min_properties = self.usize("minProperties"); + + if let Some(req) = self.value("required") { + s.required = to_strings(req); + } } - fn compile_draft7(&mut self, s: &mut Schema) -> Result<(), CompileError> { - if self.has_vocab("applicator") { - s.if_ = self.enqueue_prop("if"); - if s.if_.is_some() { - if !self.bool_schema("if", false) { - s.then = self.enqueue_prop("then"); - } - if !self.bool_schema("if", true) { - s.else_ = self.enqueue_prop("else"); - } - } - } - - if self.c.assert_content { - if let Some(Value::String(encoding)) = self.value("contentEncoding") { - s.content_encoding = self - .c - .decoders - .get(encoding.as_str()) - .or_else(|| DECODERS.get(encoding.as_str())) - .cloned(); - } - - if let Some(Value::String(media_type)) = self.value("contentMediaType") { - s.content_media_type = self - .c - .media_types - .get(media_type.as_str()) - .or_else(|| MEDIA_TYPES.get(media_type.as_str())) - .cloned(); - } - } - - Ok(()) + // format -- + if self.c.assert_format + || self.has_vocab(match self.draft_version().cmp(&2019) { + Ordering::Less => "core", + Ordering::Equal => "format", + Ordering::Greater => "format-assertion", + }) + { + if let Some(Value::String(format)) = self.value("format") { + s.format = self + .c + .formats + .get(format.as_str()) + .or_else(|| FORMATS.get(format.as_str())) + .cloned(); + } } - fn compile_draft2019(&mut self, s: &mut Schema) -> Result<(), CompileError> { - if self.has_vocab("core") { - s.recursive_ref = self.enqueue_ref("$recursiveRef")?; - s.recursive_anchor = self.bool("$recursiveAnchor"); - } + Ok(()) + } - if self.has_vocab("validation") { - if s.contains.is_some() { - s.max_contains = self.usize("maxContains"); - s.min_contains = self.usize("minContains"); - } - - if let Some(Value::Object(dep_req)) = self.value("dependentRequired") { - for (pname, pvalue) in dep_req { - s.dependent_required - .push((pname.clone(), to_strings(pvalue))); - } - } - } - - if self.has_vocab("applicator") { - s.dependent_schemas = self.enqueue_map("dependentSchemas"); - } - - if self.has_vocab(match self.draft_version() { - 2019 => "applicator", - _ => "unevaluated", - }) { - s.unevaluated_items = self.enqueue_prop("unevaluatedItems"); - s.unevaluated_properties = self.enqueue_prop("unevaluatedProperties"); - } - - if self.c.assert_content - && s.content_media_type - .map(|mt| mt.json_compatible) - .unwrap_or(false) - { - s.content_schema = self.enqueue_prop("contentSchema"); - } - - Ok(()) + fn compile_draft6(&mut self, s: &mut Schema) -> Result<(), CompileError> { + if self.has_vocab("applicator") { + s.contains = self.enqueue_prop("contains"); + s.property_names = self.enqueue_prop("propertyNames"); } - fn compile_draft2020(&mut self, s: &mut Schema) -> Result<(), CompileError> { - if self.has_vocab("core") { - if let Some(sch) = self.enqueue_ref("$dynamicRef")? { - if let Some(Value::String(dref)) = self.value("$dynamicRef") { - let Ok((_, frag)) = Fragment::split(dref) else { - let loc = self.up.format("$dynamicRef"); - return Err(CompileError::ParseAnchorError { loc }); - }; - let anchor = match frag { - Fragment::Anchor(Anchor(s)) => Some(s), - Fragment::JsonPointer(_) => None, - }; - s.dynamic_ref = Some(DynamicRef { sch, anchor }); - } - }; - - if let Some(Value::String(anchor)) = self.value("$dynamicAnchor") { - s.dynamic_anchor = Some(anchor.to_owned()); - } - } - - if self.has_vocab("applicator") { - s.prefix_items = self.enqueue_arr("prefixItems"); - s.items2020 = self.enqueue_prop("items"); - } - - Ok(()) + if self.has_vocab("validation") { + s.constant = self.value("const").cloned(); } + + Ok(()) + } + + fn compile_draft7(&mut self, s: &mut Schema) -> Result<(), CompileError> { + if self.has_vocab("applicator") { + s.if_ = self.enqueue_prop("if"); + if s.if_.is_some() { + if !self.bool_schema("if", false) { + s.then = self.enqueue_prop("then"); + } + if !self.bool_schema("if", true) { + s.else_ = self.enqueue_prop("else"); + } + } + } + + if self.c.assert_content { + if let Some(Value::String(encoding)) = self.value("contentEncoding") { + s.content_encoding = self + .c + .decoders + .get(encoding.as_str()) + .or_else(|| DECODERS.get(encoding.as_str())) + .cloned(); + } + + if let Some(Value::String(media_type)) = self.value("contentMediaType") { + s.content_media_type = self + .c + .media_types + .get(media_type.as_str()) + .or_else(|| MEDIA_TYPES.get(media_type.as_str())) + .cloned(); + } + } + + Ok(()) + } + + fn compile_draft2019(&mut self, s: &mut Schema) -> Result<(), CompileError> { + if self.has_vocab("core") { + s.recursive_ref = self.enqueue_ref("$recursiveRef")?; + s.recursive_anchor = self.bool("$recursiveAnchor"); + } + + if self.has_vocab("validation") { + if s.contains.is_some() { + s.max_contains = self.usize("maxContains"); + s.min_contains = self.usize("minContains"); + } + + if let Some(Value::Object(dep_req)) = self.value("dependentRequired") { + for (pname, pvalue) in dep_req { + s.dependent_required + .push((pname.clone(), to_strings(pvalue))); + } + } + } + + if self.has_vocab("applicator") { + s.dependent_schemas = self.enqueue_map("dependentSchemas"); + } + + if self.has_vocab(match self.draft_version() { + 2019 => "applicator", + _ => "unevaluated", + }) { + s.unevaluated_items = self.enqueue_prop("unevaluatedItems"); + s.unevaluated_properties = self.enqueue_prop("unevaluatedProperties"); + } + + if self.c.assert_content + && s.content_media_type + .map(|mt| mt.json_compatible) + .unwrap_or(false) + { + s.content_schema = self.enqueue_prop("contentSchema"); + } + + Ok(()) + } + + fn compile_draft2020(&mut self, s: &mut Schema) -> Result<(), CompileError> { + if self.has_vocab("core") { + if let Some(sch) = self.enqueue_ref("$dynamicRef")? { + if let Some(Value::String(dref)) = self.value("$dynamicRef") { + let Ok((_, frag)) = Fragment::split(dref) else { + let loc = self.up.format("$dynamicRef"); + return Err(CompileError::ParseAnchorError { loc }); + }; + let anchor = match frag { + Fragment::Anchor(Anchor(s)) => Some(s), + Fragment::JsonPointer(_) => None, + }; + s.dynamic_ref = Some(DynamicRef { sch, anchor }); + } + }; + + if let Some(Value::String(anchor)) = self.value("$dynamicAnchor") { + s.dynamic_anchor = Some(anchor.to_owned()); + } + } + + if self.has_vocab("applicator") { + s.prefix_items = self.enqueue_arr("prefixItems"); + s.items2020 = self.enqueue_prop("items"); + } + + Ok(()) + } } // enqueue helpers impl ObjCompiler<'_, '_, '_, '_, '_, '_> { - fn enqueue_schema(&mut self, ptr: JsonPointer) -> SchemaIndex { - let up = UrlPtr { - url: self.up.url.clone(), - ptr, - }; - self.queue.enqueue_schema(self.schemas, up) - } + fn enqueue_schema(&mut self, ptr: JsonPointer) -> SchemaIndex { + let up = UrlPtr { + url: self.up.url.clone(), + ptr, + }; + self.queue.enqueue_schema(self.schemas, up) + } - fn enqueue_prop(&mut self, pname: &'static str) -> Option { - if self.obj.contains_key(pname) { - let ptr = self.up.ptr.append(pname); - Some(self.enqueue_schema(ptr)) - } else { - None - } + fn enqueue_prop(&mut self, pname: &'static str) -> Option { + if self.obj.contains_key(pname) { + let ptr = self.up.ptr.append(pname); + Some(self.enqueue_schema(ptr)) + } else { + None } + } - fn enqueue_arr(&mut self, pname: &'static str) -> Vec { - if let Some(Value::Array(arr)) = self.obj.get(pname) { - (0..arr.len()) - .map(|i| { - let ptr = self.up.ptr.append2(pname, &i.to_string()); - self.enqueue_schema(ptr) - }) - .collect() - } else { - Vec::new() - } + fn enqueue_arr(&mut self, pname: &'static str) -> Vec { + if let Some(Value::Array(arr)) = self.obj.get(pname) { + (0..arr.len()) + .map(|i| { + let ptr = self.up.ptr.append2(pname, &i.to_string()); + self.enqueue_schema(ptr) + }) + .collect() + } else { + Vec::new() } + } - fn enqueue_map(&mut self, pname: &'static str) -> T - where - T: Default, - T: FromIterator<(String, SchemaIndex)>, - { - if let Some(Value::Object(obj)) = self.obj.get(pname) { - obj.keys() - .map(|k| { - let ptr = self.up.ptr.append2(pname, k); - (k.clone(), self.enqueue_schema(ptr)) - }) - .collect() - } else { - T::default() - } + fn enqueue_map(&mut self, pname: &'static str) -> T + where + T: Default, + T: FromIterator<(String, SchemaIndex)>, + { + if let Some(Value::Object(obj)) = self.obj.get(pname) { + obj.keys() + .map(|k| { + let ptr = self.up.ptr.append2(pname, k); + (k.clone(), self.enqueue_schema(ptr)) + }) + .collect() + } else { + T::default() } + } - fn enqueue_ref(&mut self, pname: &str) -> Result, CompileError> { - let Some(Value::String(ref_)) = self.obj.get(pname) else { - return Ok(None); - }; - let base_url = self.root.base_url(&self.up.ptr); - let abs_ref = UrlFrag::join(base_url, ref_)?; - if let Some(resolved_ref) = self.root.resolve(&abs_ref)? { - // local ref - return Ok(Some(self.enqueue_schema(resolved_ref.ptr))); - } - // remote ref - let up = self.queue.resolve_anchor(abs_ref, &self.c.roots)?; - Ok(Some(self.queue.enqueue_schema(self.schemas, up))) + fn enqueue_ref(&mut self, pname: &str) -> Result, CompileError> { + let Some(Value::String(ref_)) = self.obj.get(pname) else { + return Ok(None); + }; + let base_url = self.root.base_url(&self.up.ptr); + let abs_ref = UrlFrag::join(base_url, ref_)?; + if let Some(resolved_ref) = self.root.resolve(&abs_ref)? { + // local ref + return Ok(Some(self.enqueue_schema(resolved_ref.ptr))); } + // remote ref + let up = self.queue.resolve_anchor(abs_ref, &self.c.roots)?; + Ok(Some(self.queue.enqueue_schema(self.schemas, up))) + } - fn enquue_additional(&mut self, pname: &'static str) -> Option { - if let Some(Value::Bool(b)) = self.obj.get(pname) { - Some(Additional::Bool(*b)) - } else { - self.enqueue_prop(pname).map(Additional::SchemaRef) - } + fn enquue_additional(&mut self, pname: &'static str) -> Option { + if let Some(Value::Bool(b)) = self.obj.get(pname) { + Some(Additional::Bool(*b)) + } else { + self.enqueue_prop(pname).map(Additional::SchemaRef) } + } } // query helpers impl<'v> ObjCompiler<'_, 'v, '_, '_, '_, '_> { - fn draft_version(&self) -> usize { - self.root.draft.version - } + fn draft_version(&self) -> usize { + self.root.draft.version + } - fn has_vocab(&self, name: &str) -> bool { - self.root.has_vocab(name) - } + fn has_vocab(&self, name: &str) -> bool { + self.root.has_vocab(name) + } - fn value(&self, pname: &str) -> Option<&'v Value> { - self.obj.get(pname) - } + fn value(&self, pname: &str) -> Option<&'v Value> { + self.obj.get(pname) + } - fn bool(&self, pname: &str) -> bool { - matches!(self.obj.get(pname), Some(Value::Bool(true))) - } + fn bool(&self, pname: &str) -> bool { + matches!(self.obj.get(pname), Some(Value::Bool(true))) + } - fn usize(&self, pname: &str) -> Option { - let Some(Value::Number(n)) = self.obj.get(pname) else { - return None; - }; - if n.is_u64() { - n.as_u64().map(|n| n as usize) - } else { - n.as_f64() - .filter(|n| n.is_sign_positive() && n.fract() == 0.0) - .map(|n| n as usize) - } + fn usize(&self, pname: &str) -> Option { + let Some(Value::Number(n)) = self.obj.get(pname) else { + return None; + }; + if n.is_u64() { + n.as_u64().map(|n| n as usize) + } else { + n.as_f64() + .filter(|n| n.is_sign_positive() && n.fract() == 0.0) + .map(|n| n as usize) } + } - fn num(&self, pname: &str) -> Option { - if let Some(Value::Number(n)) = self.obj.get(pname) { - Some(n.clone()) - } else { - None - } + fn num(&self, pname: &str) -> Option { + if let Some(Value::Number(n)) = self.obj.get(pname) { + Some(n.clone()) + } else { + None } + } - fn bool_schema(&self, pname: &str, b: bool) -> bool { - if let Some(Value::Bool(v)) = self.obj.get(pname) { - return *v == b; - } - false + fn bool_schema(&self, pname: &str, b: bool) -> bool { + if let Some(Value::Bool(v)) = self.obj.get(pname) { + return *v == b; } + false + } } /// Error type for compilation failures. #[derive(Debug)] pub enum CompileError { - /// Error in parsing `url`. - ParseUrlError { url: String, src: Box }, + /// Error in parsing `url`. + ParseUrlError { url: String, src: Box }, - /// Failed loading `url`. - LoadUrlError { url: String, src: Box }, + /// Failed loading `url`. + LoadUrlError { url: String, src: Box }, - /// no [`UrlLoader`] registered for the `url` - UnsupportedUrlScheme { url: String }, + /// no [`UrlLoader`] registered for the `url` + UnsupportedUrlScheme { url: String }, - /// Error in parsing `$schema` url. - InvalidMetaSchemaUrl { url: String, src: Box }, + /// Error in parsing `$schema` url. + InvalidMetaSchemaUrl { url: String, src: Box }, - /// draft `url` is not supported - UnsupportedDraft { url: String }, + /// draft `url` is not supported + UnsupportedDraft { url: String }, - /// Cycle in resolving `$schema` in `url`. - MetaSchemaCycle { url: String }, + /// Cycle in resolving `$schema` in `url`. + MetaSchemaCycle { url: String }, - /// `url` is not valid against metaschema. - ValidationError { - url: String, - src: ValidationError<'static, 'static>, - }, + /// `url` is not valid against metaschema. + ValidationError { + url: String, + src: ValidationError<'static, 'static>, + }, - /// Error in parsing id at `loc` - ParseIdError { loc: String }, + /// Error in parsing id at `loc` + ParseIdError { loc: String }, - /// Error in parsing anchor at `loc` - ParseAnchorError { loc: String }, + /// Error in parsing anchor at `loc` + ParseAnchorError { loc: String }, - /// Duplicate id `id` in `url` at `ptr1` and `ptr2`. - DuplicateId { - url: String, - id: String, - ptr1: String, - ptr2: String, - }, + /// Duplicate id `id` in `url` at `ptr1` and `ptr2`. + DuplicateId { + url: String, + id: String, + ptr1: String, + ptr2: String, + }, - /// Duplicate anchor `anchor` in `url` at `ptr1` and `ptr2`. - DuplicateAnchor { - anchor: String, - url: String, - ptr1: String, - ptr2: String, - }, + /// Duplicate anchor `anchor` in `url` at `ptr1` and `ptr2`. + DuplicateAnchor { + anchor: String, + url: String, + ptr1: String, + ptr2: String, + }, - /// Not a valid json pointer. - InvalidJsonPointer(String), + /// Not a valid json pointer. + InvalidJsonPointer(String), - /// JsonPointer evaluated to nothing. - JsonPointerNotFound(String), + /// JsonPointer evaluated to nothing. + JsonPointerNotFound(String), - /// anchor in `reference` not found in `url`. - AnchorNotFound { url: String, reference: String }, + /// anchor in `reference` not found in `url`. + AnchorNotFound { url: String, reference: String }, - /// Unsupported vocabulary `vocabulary` in `url`. - UnsupportedVocabulary { url: String, vocabulary: String }, + /// Unsupported vocabulary `vocabulary` in `url`. + UnsupportedVocabulary { url: String, vocabulary: String }, - /// Invalid Regex `regex` at `url`. - InvalidRegex { - url: String, - regex: String, - src: Box, - }, + /// Invalid Regex `regex` at `url`. + InvalidRegex { + url: String, + regex: String, + src: Box, + }, - /// Encountered bug in compiler implementation. Please report - /// this as an issue for this crate. - Bug(Box), + /// Encountered bug in compiler implementation. Please report + /// this as an issue for this crate. + Bug(Box), } impl Error for CompileError { - fn source(&self) -> Option<&(dyn Error + 'static)> { - match self { - Self::ParseUrlError { src, .. } => Some(src.as_ref()), - Self::LoadUrlError { src, .. } => Some(src.as_ref()), - Self::InvalidMetaSchemaUrl { src, .. } => Some(src.as_ref()), - Self::ValidationError { src, .. } => Some(src), - Self::Bug(src) => Some(src.as_ref()), - _ => None, - } + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + Self::ParseUrlError { src, .. } => Some(src.as_ref()), + Self::LoadUrlError { src, .. } => Some(src.as_ref()), + Self::InvalidMetaSchemaUrl { src, .. } => Some(src.as_ref()), + Self::ValidationError { src, .. } => Some(src), + Self::Bug(src) => Some(src.as_ref()), + _ => None, } + } } impl Display for CompileError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::ParseUrlError { url, src } => { - if f.alternate() { - write!(f, "error parsing url {url}: {src}") - } else { - write!(f, "error parsing {url}") - } - } - Self::LoadUrlError { url, src } => { - if f.alternate() { - write!(f, "error loading {url}: {src}") - } else { - write!(f, "error loading {url}") - } - } - Self::UnsupportedUrlScheme { url } => write!(f, "unsupported scheme in {url}"), - Self::InvalidMetaSchemaUrl { url, src } => { - if f.alternate() { - write!(f, "invalid $schema in {url}: {src}") - } else { - write!(f, "invalid $schema in {url}") - } - } - Self::UnsupportedDraft { url } => write!(f, "draft {url} is not supported"), - Self::MetaSchemaCycle { url } => { - write!(f, "cycle in resolving $schema in {url}") - } - Self::ValidationError { url, src } => { - if f.alternate() { - write!(f, "{url} is not valid against metaschema: {src}") - } else { - write!(f, "{url} is not valid against metaschema") - } - } - Self::ParseIdError { loc } => write!(f, "error in parsing id at {loc}"), - Self::ParseAnchorError { loc } => write!(f, "error in parsing anchor at {loc}"), - Self::DuplicateId { - url, - id, - ptr1, - ptr2, - } => write!(f, "duplicate $id {id} in {url} at {ptr1:?} and {ptr2:?}"), - Self::DuplicateAnchor { - anchor, - url, - ptr1, - ptr2, - } => { - write!( - f, - "duplicate anchor {anchor:?} in {url} at {ptr1:?} and {ptr2:?}" - ) - } - Self::InvalidJsonPointer(loc) => write!(f, "invalid json-pointer {loc}"), - Self::JsonPointerNotFound(loc) => write!(f, "json-pointer in {loc} not found"), - Self::AnchorNotFound { url, reference } => { - write!( - f, - "anchor in reference {reference} is not found in schema {url}" - ) - } - Self::UnsupportedVocabulary { url, vocabulary } => { - write!(f, "unsupported vocabulary {vocabulary} in {url}") - } - Self::InvalidRegex { url, regex, src } => { - if f.alternate() { - write!(f, "invalid regex {} at {url}: {src}", quote(regex)) - } else { - write!(f, "invalid regex {} at {url}", quote(regex)) - } - } - Self::Bug(src) => { - write!( - f, - "encountered bug in jsonschema compiler. please report: {src}" - ) - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::ParseUrlError { url, src } => { + if f.alternate() { + write!(f, "error parsing url {url}: {src}") + } else { + write!(f, "error parsing {url}") } + } + Self::LoadUrlError { url, src } => { + if f.alternate() { + write!(f, "error loading {url}: {src}") + } else { + write!(f, "error loading {url}") + } + } + Self::UnsupportedUrlScheme { url } => write!(f, "unsupported scheme in {url}"), + Self::InvalidMetaSchemaUrl { url, src } => { + if f.alternate() { + write!(f, "invalid $schema in {url}: {src}") + } else { + write!(f, "invalid $schema in {url}") + } + } + Self::UnsupportedDraft { url } => write!(f, "draft {url} is not supported"), + Self::MetaSchemaCycle { url } => { + write!(f, "cycle in resolving $schema in {url}") + } + Self::ValidationError { url, src } => { + if f.alternate() { + write!(f, "{url} is not valid against metaschema: {src}") + } else { + write!(f, "{url} is not valid against metaschema") + } + } + Self::ParseIdError { loc } => write!(f, "error in parsing id at {loc}"), + Self::ParseAnchorError { loc } => write!(f, "error in parsing anchor at {loc}"), + Self::DuplicateId { + url, + id, + ptr1, + ptr2, + } => write!(f, "duplicate $id {id} in {url} at {ptr1:?} and {ptr2:?}"), + Self::DuplicateAnchor { + anchor, + url, + ptr1, + ptr2, + } => { + write!( + f, + "duplicate anchor {anchor:?} in {url} at {ptr1:?} and {ptr2:?}" + ) + } + Self::InvalidJsonPointer(loc) => write!(f, "invalid json-pointer {loc}"), + Self::JsonPointerNotFound(loc) => write!(f, "json-pointer in {loc} not found"), + Self::AnchorNotFound { url, reference } => { + write!( + f, + "anchor in reference {reference} is not found in schema {url}" + ) + } + Self::UnsupportedVocabulary { url, vocabulary } => { + write!(f, "unsupported vocabulary {vocabulary} in {url}") + } + Self::InvalidRegex { url, regex, src } => { + if f.alternate() { + write!(f, "invalid regex {} at {url}: {src}", quote(regex)) + } else { + write!(f, "invalid regex {} at {url}", quote(regex)) + } + } + Self::Bug(src) => { + write!( + f, + "encountered bug in jsonschema compiler. please report: {src}" + ) + } } + } } // helpers -- fn to_strings(v: &Value) -> Vec { - if let Value::Array(a) = v { - a.iter() - .filter_map(|t| { - if let Value::String(t) = t { - Some(t.clone()) - } else { - None - } - }) - .collect() - } else { - vec![] - } + if let Value::Array(a) = v { + a.iter() + .filter_map(|t| { + if let Value::String(t) = t { + Some(t.clone()) + } else { + None + } + }) + .collect() + } else { + vec![] + } } pub(crate) struct Queue { - pub(crate) schemas: Vec, - pub(crate) roots: HashMap, + pub(crate) schemas: Vec, + pub(crate) roots: HashMap, } impl Queue { - fn new() -> Self { - Self { - schemas: vec![], - roots: HashMap::new(), - } + fn new() -> Self { + Self { + schemas: vec![], + roots: HashMap::new(), + } + } + + pub(crate) fn resolve_anchor( + &mut self, + uf: UrlFrag, + roots: &Roots, + ) -> Result { + match uf.frag { + Fragment::JsonPointer(ptr) => Ok(UrlPtr { url: uf.url, ptr }), + Fragment::Anchor(_) => { + let root = match roots.get(&uf.url).or_else(|| self.roots.get(&uf.url)) { + Some(root) => root, + None => { + let doc = roots.loader.load(&uf.url)?; + let r = roots.create_root(uf.url.clone(), doc)?; + self.roots.entry(uf.url).or_insert(r) + } + }; + root.resolve_fragment(&uf.frag) + } + } + } + + pub(crate) fn enqueue_schema(&mut self, schemas: &Schemas, up: UrlPtr) -> SchemaIndex { + if let Some(sch) = schemas.get_by_loc(&up) { + // already got compiled + return sch.idx; + } + if let Some(qindex) = self.schemas.iter().position(|e| *e == up) { + // already queued for compilation + return SchemaIndex(schemas.size() + qindex); } - pub(crate) fn resolve_anchor( - &mut self, - uf: UrlFrag, - roots: &Roots, - ) -> Result { - match uf.frag { - Fragment::JsonPointer(ptr) => Ok(UrlPtr { url: uf.url, ptr }), - Fragment::Anchor(_) => { - let root = match roots.get(&uf.url).or_else(|| self.roots.get(&uf.url)) { - Some(root) => root, - None => { - let doc = roots.loader.load(&uf.url)?; - let r = roots.create_root(uf.url.clone(), doc)?; - self.roots.entry(uf.url).or_insert(r) - } - }; - root.resolve_fragment(&uf.frag) - } - } - } - - pub(crate) fn enqueue_schema(&mut self, schemas: &Schemas, up: UrlPtr) -> SchemaIndex { - if let Some(sch) = schemas.get_by_loc(&up) { - // already got compiled - return sch.idx; - } - if let Some(qindex) = self.schemas.iter().position(|e| *e == up) { - // already queued for compilation - return SchemaIndex(schemas.size() + qindex); - } - - // new compilation request - self.schemas.push(up); - SchemaIndex(schemas.size() + self.schemas.len() - 1) - } + // new compilation request + self.schemas.push(up); + SchemaIndex(schemas.size() + self.schemas.len() - 1) + } } diff --git a/validator/src/content.rs b/validator/src/content.rs index 58ccdd7..3890d89 100644 --- a/validator/src/content.rs +++ b/validator/src/content.rs @@ -10,28 +10,28 @@ use serde_json::Value; /// Defines Decoder for `contentEncoding`. #[derive(Clone, Copy)] pub struct Decoder { - /// Name of the encoding - pub name: &'static str, + /// Name of the encoding + pub name: &'static str, - /// Decodes given string to bytes - #[allow(clippy::type_complexity)] - pub func: fn(s: &str) -> Result, Box>, + /// Decodes given string to bytes + #[allow(clippy::type_complexity)] + pub func: fn(s: &str) -> Result, Box>, } pub(crate) static DECODERS: Lazy> = Lazy::new(|| { - let mut m = HashMap::<&'static str, Decoder>::new(); - m.insert( - "base64", - Decoder { - name: "base64", - func: decode_base64, - }, - ); - m + let mut m = HashMap::<&'static str, Decoder>::new(); + m.insert( + "base64", + Decoder { + name: "base64", + func: decode_base64, + }, + ); + m }); fn decode_base64(s: &str) -> Result, Box> { - Ok(base64::engine::general_purpose::STANDARD.decode(s)?) + Ok(base64::engine::general_purpose::STANDARD.decode(s)?) } // mediatypes -- @@ -39,44 +39,44 @@ fn decode_base64(s: &str) -> Result, Box> { /// Defines Mediatype for `contentMediaType`. #[derive(Clone, Copy)] pub struct MediaType { - /// Name of this media-type as defined in RFC 2046. - /// Example: `application/json` - pub name: &'static str, + /// Name of this media-type as defined in RFC 2046. + /// Example: `application/json` + pub name: &'static str, - /// whether this media type can be deserialized to json. If so it can - /// be validated by `contentSchema` keyword. - pub json_compatible: bool, + /// whether this media type can be deserialized to json. If so it can + /// be validated by `contentSchema` keyword. + pub json_compatible: bool, - /** - Check whether `bytes` conforms to this media-type. + /** + Check whether `bytes` conforms to this media-type. - Should return `Ok(Some(Value))` if `deserialize` is `true`, otherwise it can return `Ok(None)`. - Ideally you could deserialize to `serde::de::IgnoredAny` if `deserialize` is `false` to gain - some performance. + Should return `Ok(Some(Value))` if `deserialize` is `true`, otherwise it can return `Ok(None)`. + Ideally you could deserialize to `serde::de::IgnoredAny` if `deserialize` is `false` to gain + some performance. - `deserialize` is always `false` if `json_compatible` is `false`. - */ - #[allow(clippy::type_complexity)] - pub func: fn(bytes: &[u8], deserialize: bool) -> Result, Box>, + `deserialize` is always `false` if `json_compatible` is `false`. + */ + #[allow(clippy::type_complexity)] + pub func: fn(bytes: &[u8], deserialize: bool) -> Result, Box>, } pub(crate) static MEDIA_TYPES: Lazy> = Lazy::new(|| { - let mut m = HashMap::<&'static str, MediaType>::new(); - m.insert( - "application/json", - MediaType { - name: "application/json", - json_compatible: true, - func: check_json, - }, - ); - m + let mut m = HashMap::<&'static str, MediaType>::new(); + m.insert( + "application/json", + MediaType { + name: "application/json", + json_compatible: true, + func: check_json, + }, + ); + m }); fn check_json(bytes: &[u8], deserialize: bool) -> Result, Box> { - if deserialize { - return Ok(Some(serde_json::from_slice(bytes)?)); - } - serde_json::from_slice::(bytes)?; - Ok(None) + if deserialize { + return Ok(Some(serde_json::from_slice(bytes)?)); + } + serde_json::from_slice::(bytes)?; + Ok(None) } diff --git a/validator/src/draft.rs b/validator/src/draft.rs index 91b73d2..1ef87ec 100644 --- a/validator/src/draft.rs +++ b/validator/src/draft.rs @@ -1,6 +1,6 @@ use std::{ - collections::{hash_map::Entry, HashMap}, - str::FromStr, + collections::{hash_map::Entry, HashMap}, + str::FromStr, }; use once_cell::sync::Lazy; @@ -14,563 +14,563 @@ const POS_PROP: u8 = 1 << 1; const POS_ITEM: u8 = 1 << 2; pub(crate) static DRAFT4: Lazy = Lazy::new(|| Draft { - version: 4, - id: "id", - url: "http://json-schema.org/draft-04/schema", - subschemas: HashMap::from([ - // type agnostic - ("definitions", POS_PROP), - ("not", POS_SELF), - ("allOf", POS_ITEM), - ("anyOf", POS_ITEM), - ("oneOf", POS_ITEM), - // object - ("properties", POS_PROP), - ("additionalProperties", POS_SELF), - ("patternProperties", POS_PROP), - // array - ("items", POS_SELF | POS_ITEM), - ("additionalItems", POS_SELF), - ("dependencies", POS_PROP), - ]), - vocab_prefix: "", - all_vocabs: vec![], - default_vocabs: vec![], + version: 4, + id: "id", + url: "http://json-schema.org/draft-04/schema", + subschemas: HashMap::from([ + // type agnostic + ("definitions", POS_PROP), + ("not", POS_SELF), + ("allOf", POS_ITEM), + ("anyOf", POS_ITEM), + ("oneOf", POS_ITEM), + // object + ("properties", POS_PROP), + ("additionalProperties", POS_SELF), + ("patternProperties", POS_PROP), + // array + ("items", POS_SELF | POS_ITEM), + ("additionalItems", POS_SELF), + ("dependencies", POS_PROP), + ]), + vocab_prefix: "", + all_vocabs: vec![], + default_vocabs: vec![], }); pub(crate) static DRAFT6: Lazy = Lazy::new(|| { - let mut subschemas = DRAFT4.subschemas.clone(); - subschemas.extend([("propertyNames", POS_SELF), ("contains", POS_SELF)]); - Draft { - version: 6, - id: "$id", - url: "http://json-schema.org/draft-06/schema", - subschemas, - vocab_prefix: "", - all_vocabs: vec![], - default_vocabs: vec![], - } + let mut subschemas = DRAFT4.subschemas.clone(); + subschemas.extend([("propertyNames", POS_SELF), ("contains", POS_SELF)]); + Draft { + version: 6, + id: "$id", + url: "http://json-schema.org/draft-06/schema", + subschemas, + vocab_prefix: "", + all_vocabs: vec![], + default_vocabs: vec![], + } }); pub(crate) static DRAFT7: Lazy = Lazy::new(|| { - let mut subschemas = DRAFT6.subschemas.clone(); - subschemas.extend([("if", POS_SELF), ("then", POS_SELF), ("else", POS_SELF)]); - Draft { - version: 7, - id: "$id", - url: "http://json-schema.org/draft-07/schema", - subschemas, - vocab_prefix: "", - all_vocabs: vec![], - default_vocabs: vec![], - } + let mut subschemas = DRAFT6.subschemas.clone(); + subschemas.extend([("if", POS_SELF), ("then", POS_SELF), ("else", POS_SELF)]); + Draft { + version: 7, + id: "$id", + url: "http://json-schema.org/draft-07/schema", + subschemas, + vocab_prefix: "", + all_vocabs: vec![], + default_vocabs: vec![], + } }); pub(crate) static DRAFT2019: Lazy = Lazy::new(|| { - let mut subschemas = DRAFT7.subschemas.clone(); - subschemas.extend([ - ("$defs", POS_PROP), - ("dependentSchemas", POS_PROP), - ("unevaluatedProperties", POS_SELF), - ("unevaluatedItems", POS_SELF), - ("contentSchema", POS_SELF), - ]); - Draft { - version: 2019, - id: "$id", - url: "https://json-schema.org/draft/2019-09/schema", - subschemas, - vocab_prefix: "https://json-schema.org/draft/2019-09/vocab/", - all_vocabs: vec![ - "core", - "applicator", - "validation", - "meta-data", - "format", - "content", - ], - default_vocabs: vec!["core", "applicator", "validation"], - } + let mut subschemas = DRAFT7.subschemas.clone(); + subschemas.extend([ + ("$defs", POS_PROP), + ("dependentSchemas", POS_PROP), + ("unevaluatedProperties", POS_SELF), + ("unevaluatedItems", POS_SELF), + ("contentSchema", POS_SELF), + ]); + Draft { + version: 2019, + id: "$id", + url: "https://json-schema.org/draft/2019-09/schema", + subschemas, + vocab_prefix: "https://json-schema.org/draft/2019-09/vocab/", + all_vocabs: vec![ + "core", + "applicator", + "validation", + "meta-data", + "format", + "content", + ], + default_vocabs: vec!["core", "applicator", "validation"], + } }); pub(crate) static DRAFT2020: Lazy = Lazy::new(|| { - let mut subschemas = DRAFT2019.subschemas.clone(); - subschemas.extend([("prefixItems", POS_ITEM)]); - Draft { - version: 2020, - id: "$id", - url: "https://json-schema.org/draft/2020-12/schema", - subschemas, - vocab_prefix: "https://json-schema.org/draft/2020-12/vocab/", - all_vocabs: vec![ - "core", - "applicator", - "unevaluated", - "validation", - "meta-data", - "format-annotation", - "format-assertion", - "content", - ], - default_vocabs: vec!["core", "applicator", "unevaluated", "validation"], - } + let mut subschemas = DRAFT2019.subschemas.clone(); + subschemas.extend([("prefixItems", POS_ITEM)]); + Draft { + version: 2020, + id: "$id", + url: "https://json-schema.org/draft/2020-12/schema", + subschemas, + vocab_prefix: "https://json-schema.org/draft/2020-12/vocab/", + all_vocabs: vec![ + "core", + "applicator", + "unevaluated", + "validation", + "meta-data", + "format-annotation", + "format-assertion", + "content", + ], + default_vocabs: vec!["core", "applicator", "unevaluated", "validation"], + } }); pub(crate) static STD_METASCHEMAS: Lazy = - Lazy::new(|| load_std_metaschemas().expect("std metaschemas must be compilable")); + Lazy::new(|| load_std_metaschemas().expect("std metaschemas must be compilable")); pub(crate) fn latest() -> &'static Draft { - crate::Draft::default().internal() + crate::Draft::default().internal() } // -- pub(crate) struct Draft { - pub(crate) version: usize, - pub(crate) url: &'static str, - id: &'static str, // property name used to represent id - subschemas: HashMap<&'static str, u8>, // location of subschemas - pub(crate) vocab_prefix: &'static str, // prefix used for vocabulary - pub(crate) all_vocabs: Vec<&'static str>, // names of supported vocabs - pub(crate) default_vocabs: Vec<&'static str>, // names of default vocabs + pub(crate) version: usize, + pub(crate) url: &'static str, + id: &'static str, // property name used to represent id + subschemas: HashMap<&'static str, u8>, // location of subschemas + pub(crate) vocab_prefix: &'static str, // prefix used for vocabulary + pub(crate) all_vocabs: Vec<&'static str>, // names of supported vocabs + pub(crate) default_vocabs: Vec<&'static str>, // names of default vocabs } impl Draft { - pub(crate) fn from_url(url: &str) -> Option<&'static Draft> { - let (mut url, frag) = split(url); - if !frag.is_empty() { - return None; - } - if let Some(s) = url.strip_prefix("http://") { - url = s; - } - if let Some(s) = url.strip_prefix("https://") { - url = s; - } - match url { - "json-schema.org/schema" => Some(latest()), - "json-schema.org/draft/2020-12/schema" => Some(&DRAFT2020), - "json-schema.org/draft/2019-09/schema" => Some(&DRAFT2019), - "json-schema.org/draft-07/schema" => Some(&DRAFT7), - "json-schema.org/draft-06/schema" => Some(&DRAFT6), - "json-schema.org/draft-04/schema" => Some(&DRAFT4), - _ => None, - } + pub(crate) fn from_url(url: &str) -> Option<&'static Draft> { + let (mut url, frag) = split(url); + if !frag.is_empty() { + return None; } - - fn get_schema(&self) -> Option { - let url = match self.version { - 2020 => "https://json-schema.org/draft/2020-12/schema", - 2019 => "https://json-schema.org/draft/2019-09/schema", - 7 => "http://json-schema.org/draft-07/schema", - 6 => "http://json-schema.org/draft-06/schema", - 4 => "http://json-schema.org/draft-04/schema", - _ => return None, - }; - let up = UrlPtr { - url: Url::parse(url).unwrap_or_else(|_| panic!("{url} should be valid url")), - ptr: "".into(), - }; - STD_METASCHEMAS.get_by_loc(&up).map(|s| s.idx) + if let Some(s) = url.strip_prefix("http://") { + url = s; } - - pub(crate) fn validate(&self, up: &UrlPtr, v: &Value) -> Result<(), CompileError> { - let Some(sch) = self.get_schema() else { - return Err(CompileError::Bug( - format!("no metaschema preloaded for draft {}", self.version).into(), - )); - }; - STD_METASCHEMAS - .validate(v, sch) - .map_err(|src| CompileError::ValidationError { - url: up.to_string(), - src: src.clone_static(), - }) + if let Some(s) = url.strip_prefix("https://") { + url = s; } - - fn get_id<'a>(&self, obj: &'a Map) -> Option<&'a str> { - if self.version < 2019 && obj.contains_key("$ref") { - return None; // All other properties in a "$ref" object MUST be ignored - } - let Some(Value::String(id)) = obj.get(self.id) else { - return None; - }; - let (id, _) = split(id); // ignore fragment - Some(id).filter(|id| !id.is_empty()) + match url { + "json-schema.org/schema" => Some(latest()), + "json-schema.org/draft/2020-12/schema" => Some(&DRAFT2020), + "json-schema.org/draft/2019-09/schema" => Some(&DRAFT2019), + "json-schema.org/draft-07/schema" => Some(&DRAFT7), + "json-schema.org/draft-06/schema" => Some(&DRAFT6), + "json-schema.org/draft-04/schema" => Some(&DRAFT4), + _ => None, } + } - pub(crate) fn get_vocabs( - &self, - url: &Url, - doc: &Value, - ) -> Result>, CompileError> { - if self.version < 2019 { - return Ok(None); - } - let Value::Object(obj) = doc else { - return Ok(None); - }; + fn get_schema(&self) -> Option { + let url = match self.version { + 2020 => "https://json-schema.org/draft/2020-12/schema", + 2019 => "https://json-schema.org/draft/2019-09/schema", + 7 => "http://json-schema.org/draft-07/schema", + 6 => "http://json-schema.org/draft-06/schema", + 4 => "http://json-schema.org/draft-04/schema", + _ => return None, + }; + let up = UrlPtr { + url: Url::parse(url).unwrap_or_else(|_| panic!("{url} should be valid url")), + ptr: "".into(), + }; + STD_METASCHEMAS.get_by_loc(&up).map(|s| s.idx) + } - let Some(Value::Object(obj)) = obj.get("$vocabulary") else { - return Ok(None); - }; + pub(crate) fn validate(&self, up: &UrlPtr, v: &Value) -> Result<(), CompileError> { + let Some(sch) = self.get_schema() else { + return Err(CompileError::Bug( + format!("no metaschema preloaded for draft {}", self.version).into(), + )); + }; + STD_METASCHEMAS + .validate(v, sch, None) + .map_err(|src| CompileError::ValidationError { + url: up.to_string(), + src: src.clone_static(), + }) + } - let mut vocabs = vec![]; - for (vocab, reqd) in obj { - if let Value::Bool(true) = reqd { - let name = vocab - .strip_prefix(self.vocab_prefix) - .filter(|name| self.all_vocabs.contains(name)); - if let Some(name) = name { - vocabs.push(name.to_owned()); // todo: avoid alloc - } else { - return Err(CompileError::UnsupportedVocabulary { - url: url.as_str().to_owned(), - vocabulary: vocab.to_owned(), - }); - } - } - } - Ok(Some(vocabs)) + fn get_id<'a>(&self, obj: &'a Map) -> Option<&'a str> { + if self.version < 2019 && obj.contains_key("$ref") { + return None; // All other properties in a "$ref" object MUST be ignored } + let Some(Value::String(id)) = obj.get(self.id) else { + return None; + }; + let (id, _) = split(id); // ignore fragment + Some(id).filter(|id| !id.is_empty()) + } - // collects anchors/dynamic_achors from `sch` into `res`. - // note this does not collect from subschemas in sch. - pub(crate) fn collect_anchors( - &self, - sch: &Value, - sch_ptr: &JsonPointer, - res: &mut Resource, - url: &Url, - ) -> Result<(), CompileError> { - let Value::Object(obj) = sch else { - return Ok(()); - }; - - let mut add_anchor = |anchor: Anchor| match res.anchors.entry(anchor) { - Entry::Occupied(entry) => { - if entry.get() == sch_ptr { - // anchor with same root_ptr already exists - return Ok(()); - } - Err(CompileError::DuplicateAnchor { - url: url.as_str().to_owned(), - anchor: entry.key().to_string(), - ptr1: entry.get().to_string(), - ptr2: sch_ptr.to_string(), - }) - } - entry => { - entry.or_insert(sch_ptr.to_owned()); - Ok(()) - } - }; - - if self.version < 2019 { - if obj.contains_key("$ref") { - return Ok(()); // All other properties in a "$ref" object MUST be ignored - } - // anchor is specified in id - if let Some(Value::String(id)) = obj.get(self.id) { - let Ok((_, frag)) = Fragment::split(id) else { - let loc = UrlFrag::format(url, sch_ptr.as_str()); - return Err(CompileError::ParseAnchorError { loc }); - }; - if let Fragment::Anchor(anchor) = frag { - add_anchor(anchor)?; - }; - return Ok(()); - } - } - if self.version >= 2019 { - if let Some(Value::String(anchor)) = obj.get("$anchor") { - add_anchor(anchor.as_str().into())?; - } - } - if self.version >= 2020 { - if let Some(Value::String(anchor)) = obj.get("$dynamicAnchor") { - add_anchor(anchor.as_str().into())?; - res.dynamic_anchors.insert(anchor.as_str().into()); - } - } - Ok(()) + pub(crate) fn get_vocabs( + &self, + url: &Url, + doc: &Value, + ) -> Result>, CompileError> { + if self.version < 2019 { + return Ok(None); } + let Value::Object(obj) = doc else { + return Ok(None); + }; - // error is json-ptr to invalid id - pub(crate) fn collect_resources( - &self, - sch: &Value, - base: &Url, // base of json - sch_ptr: JsonPointer, // ptr of json - url: &Url, - resources: &mut HashMap, - ) -> Result<(), CompileError> { - if resources.contains_key(&sch_ptr) { - // resources are already collected - return Ok(()); - } - if let Value::Bool(_) = sch { - if sch_ptr.is_empty() { - // root resource - resources.insert(sch_ptr.clone(), Resource::new(sch_ptr, base.clone())); - } - return Ok(()); - } + let Some(Value::Object(obj)) = obj.get("$vocabulary") else { + return Ok(None); + }; - let Value::Object(obj) = sch else { - return Ok(()); - }; - - let mut base = base; - let tmp; - let res = if let Some(id) = self.get_id(obj) { - let Ok(id) = UrlFrag::join(base, id) else { - let loc = UrlFrag::format(url, sch_ptr.as_str()); - return Err(CompileError::ParseIdError { loc }); - }; - tmp = id.url; - base = &tmp; - Some(Resource::new(sch_ptr.clone(), base.clone())) - } else if sch_ptr.is_empty() { - // root resource - Some(Resource::new(sch_ptr.clone(), base.clone())) + let mut vocabs = vec![]; + for (vocab, reqd) in obj { + if let Value::Bool(true) = reqd { + let name = vocab + .strip_prefix(self.vocab_prefix) + .filter(|name| self.all_vocabs.contains(name)); + if let Some(name) = name { + vocabs.push(name.to_owned()); // todo: avoid alloc } else { - None - }; - if let Some(res) = res { - if let Some(dup) = resources.values_mut().find(|res| res.id == *base) { - return Err(CompileError::DuplicateId { - url: url.to_string(), - id: base.to_string(), - ptr1: res.ptr.to_string(), - ptr2: dup.ptr.to_string(), - }); - } - resources.insert(sch_ptr.clone(), res); + return Err(CompileError::UnsupportedVocabulary { + url: url.as_str().to_owned(), + vocabulary: vocab.to_owned(), + }); } + } + } + Ok(Some(vocabs)) + } - // collect anchors into base resource - if let Some(res) = resources.values_mut().find(|res| res.id == *base) { - self.collect_anchors(sch, &sch_ptr, res, url)?; - } else { - debug_assert!(false, "base resource must exist"); - } + // collects anchors/dynamic_achors from `sch` into `res`. + // note this does not collect from subschemas in sch. + pub(crate) fn collect_anchors( + &self, + sch: &Value, + sch_ptr: &JsonPointer, + res: &mut Resource, + url: &Url, + ) -> Result<(), CompileError> { + let Value::Object(obj) = sch else { + return Ok(()); + }; - for (&kw, &pos) in &self.subschemas { - let Some(v) = obj.get(kw) else { - continue; - }; - if pos & POS_SELF != 0 { - let ptr = sch_ptr.append(kw); - self.collect_resources(v, base, ptr, url, resources)?; - } - if pos & POS_ITEM != 0 { - if let Value::Array(arr) = v { - for (i, item) in arr.iter().enumerate() { - let ptr = sch_ptr.append2(kw, &i.to_string()); - self.collect_resources(item, base, ptr, url, resources)?; - } - } - } - if pos & POS_PROP != 0 { - if let Value::Object(obj) = v { - for (pname, pvalue) in obj { - let ptr = sch_ptr.append2(kw, pname); - self.collect_resources(pvalue, base, ptr, url, resources)?; - } - } - } + let mut add_anchor = |anchor: Anchor| match res.anchors.entry(anchor) { + Entry::Occupied(entry) => { + if entry.get() == sch_ptr { + // anchor with same root_ptr already exists + return Ok(()); } + Err(CompileError::DuplicateAnchor { + url: url.as_str().to_owned(), + anchor: entry.key().to_string(), + ptr1: entry.get().to_string(), + ptr2: sch_ptr.to_string(), + }) + } + entry => { + entry.or_insert(sch_ptr.to_owned()); Ok(()) + } + }; + + if self.version < 2019 { + if obj.contains_key("$ref") { + return Ok(()); // All other properties in a "$ref" object MUST be ignored + } + // anchor is specified in id + if let Some(Value::String(id)) = obj.get(self.id) { + let Ok((_, frag)) = Fragment::split(id) else { + let loc = UrlFrag::format(url, sch_ptr.as_str()); + return Err(CompileError::ParseAnchorError { loc }); + }; + if let Fragment::Anchor(anchor) = frag { + add_anchor(anchor)?; + }; + return Ok(()); + } + } + if self.version >= 2019 { + if let Some(Value::String(anchor)) = obj.get("$anchor") { + add_anchor(anchor.as_str().into())?; + } + } + if self.version >= 2020 { + if let Some(Value::String(anchor)) = obj.get("$dynamicAnchor") { + add_anchor(anchor.as_str().into())?; + res.dynamic_anchors.insert(anchor.as_str().into()); + } + } + Ok(()) + } + + // error is json-ptr to invalid id + pub(crate) fn collect_resources( + &self, + sch: &Value, + base: &Url, // base of json + sch_ptr: JsonPointer, // ptr of json + url: &Url, + resources: &mut HashMap, + ) -> Result<(), CompileError> { + if resources.contains_key(&sch_ptr) { + // resources are already collected + return Ok(()); + } + if let Value::Bool(_) = sch { + if sch_ptr.is_empty() { + // root resource + resources.insert(sch_ptr.clone(), Resource::new(sch_ptr, base.clone())); + } + return Ok(()); } - pub(crate) fn is_subschema(&self, ptr: &str) -> bool { - if ptr.is_empty() { + let Value::Object(obj) = sch else { + return Ok(()); + }; + + let mut base = base; + let tmp; + let res = if let Some(id) = self.get_id(obj) { + let Ok(id) = UrlFrag::join(base, id) else { + let loc = UrlFrag::format(url, sch_ptr.as_str()); + return Err(CompileError::ParseIdError { loc }); + }; + tmp = id.url; + base = &tmp; + Some(Resource::new(sch_ptr.clone(), base.clone())) + } else if sch_ptr.is_empty() { + // root resource + Some(Resource::new(sch_ptr.clone(), base.clone())) + } else { + None + }; + if let Some(res) = res { + if let Some(dup) = resources.values_mut().find(|res| res.id == *base) { + return Err(CompileError::DuplicateId { + url: url.to_string(), + id: base.to_string(), + ptr1: res.ptr.to_string(), + ptr2: dup.ptr.to_string(), + }); + } + resources.insert(sch_ptr.clone(), res); + } + + // collect anchors into base resource + if let Some(res) = resources.values_mut().find(|res| res.id == *base) { + self.collect_anchors(sch, &sch_ptr, res, url)?; + } else { + debug_assert!(false, "base resource must exist"); + } + + for (&kw, &pos) in &self.subschemas { + let Some(v) = obj.get(kw) else { + continue; + }; + if pos & POS_SELF != 0 { + let ptr = sch_ptr.append(kw); + self.collect_resources(v, base, ptr, url, resources)?; + } + if pos & POS_ITEM != 0 { + if let Value::Array(arr) = v { + for (i, item) in arr.iter().enumerate() { + let ptr = sch_ptr.append2(kw, &i.to_string()); + self.collect_resources(item, base, ptr, url, resources)?; + } + } + } + if pos & POS_PROP != 0 { + if let Value::Object(obj) = v { + for (pname, pvalue) in obj { + let ptr = sch_ptr.append2(kw, pname); + self.collect_resources(pvalue, base, ptr, url, resources)?; + } + } + } + } + Ok(()) + } + + pub(crate) fn is_subschema(&self, ptr: &str) -> bool { + if ptr.is_empty() { + return true; + } + + fn split(mut ptr: &str) -> (&str, &str) { + ptr = &ptr[1..]; // rm `/` prefix + if let Some(i) = ptr.find('/') { + (&ptr[..i], &ptr[i..]) + } else { + (ptr, "") + } + } + + let (tok, ptr) = split(ptr); + + if let Some(&pos) = self.subschemas.get(tok) { + if pos & POS_SELF != 0 && self.is_subschema(ptr) { + return true; + } + if !ptr.is_empty() { + if pos & POS_PROP != 0 { + let (_, ptr) = split(ptr); + if self.is_subschema(ptr) { return true; + } } - - fn split(mut ptr: &str) -> (&str, &str) { - ptr = &ptr[1..]; // rm `/` prefix - if let Some(i) = ptr.find('/') { - (&ptr[..i], &ptr[i..]) - } else { - (ptr, "") - } + if pos & POS_ITEM != 0 { + let (tok, ptr) = split(ptr); + if usize::from_str(tok).is_ok() && self.is_subschema(ptr) { + return true; + } } - - let (tok, ptr) = split(ptr); - - if let Some(&pos) = self.subschemas.get(tok) { - if pos & POS_SELF != 0 && self.is_subschema(ptr) { - return true; - } - if !ptr.is_empty() { - if pos & POS_PROP != 0 { - let (_, ptr) = split(ptr); - if self.is_subschema(ptr) { - return true; - } - } - if pos & POS_ITEM != 0 { - let (tok, ptr) = split(ptr); - if usize::from_str(tok).is_ok() && self.is_subschema(ptr) { - return true; - } - } - } - } - - false + } } + + false + } } fn load_std_metaschemas() -> Result { - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.enable_format_assertions(); - compiler.compile("https://json-schema.org/draft/2020-12/schema", &mut schemas)?; - compiler.compile("https://json-schema.org/draft/2019-09/schema", &mut schemas)?; - compiler.compile("http://json-schema.org/draft-07/schema", &mut schemas)?; - compiler.compile("http://json-schema.org/draft-06/schema", &mut schemas)?; - compiler.compile("http://json-schema.org/draft-04/schema", &mut schemas)?; - Ok(schemas) + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.enable_format_assertions(); + compiler.compile("https://json-schema.org/draft/2020-12/schema", &mut schemas)?; + compiler.compile("https://json-schema.org/draft/2019-09/schema", &mut schemas)?; + compiler.compile("http://json-schema.org/draft-07/schema", &mut schemas)?; + compiler.compile("http://json-schema.org/draft-06/schema", &mut schemas)?; + compiler.compile("http://json-schema.org/draft-04/schema", &mut schemas)?; + Ok(schemas) } #[cfg(test)] mod tests { - use crate::{Compiler, Schemas}; + use crate::{Compiler, Schemas}; - use super::*; + use super::*; - #[test] - fn test_meta() { - let mut schemas = Schemas::default(); - let mut compiler = Compiler::default(); - let v: Value = serde_json::from_str(include_str!("metaschemas/draft-04/schema")).unwrap(); - let url = "https://json-schema.org/draft-04/schema"; - compiler.add_resource(url, v).unwrap(); - compiler.compile(url, &mut schemas).unwrap(); + #[test] + fn test_meta() { + let mut schemas = Schemas::default(); + let mut compiler = Compiler::default(); + let v: Value = serde_json::from_str(include_str!("metaschemas/draft-04/schema")).unwrap(); + let url = "https://json-schema.org/draft-04/schema"; + compiler.add_resource(url, v).unwrap(); + compiler.compile(url, &mut schemas).unwrap(); + } + + #[test] + fn test_from_url() { + let tests = [ + ("http://json-schema.org/draft/2020-12/schema", Some(2020)), // http url + ("https://json-schema.org/draft/2020-12/schema", Some(2020)), // https url + ("https://json-schema.org/schema", Some(latest().version)), // latest + ("https://json-schema.org/draft-04/schema", Some(4)), + ]; + for (url, version) in tests { + let got = Draft::from_url(url).map(|d| d.version); + assert_eq!(got, version, "for {url}"); } + } - #[test] - fn test_from_url() { - let tests = [ - ("http://json-schema.org/draft/2020-12/schema", Some(2020)), // http url - ("https://json-schema.org/draft/2020-12/schema", Some(2020)), // https url - ("https://json-schema.org/schema", Some(latest().version)), // latest - ("https://json-schema.org/draft-04/schema", Some(4)), - ]; - for (url, version) in tests { - let got = Draft::from_url(url).map(|d| d.version); - assert_eq!(got, version, "for {url}"); - } - } - - #[test] - fn test_collect_ids() { - let url = Url::parse("http://a.com/schema.json").unwrap(); - let json: Value = serde_json::from_str( - r#"{ - "id": "http://a.com/schemas/schema.json", - "definitions": { - "s1": { "id": "http://a.com/definitions/s1" }, - "s2": { - "id": "../s2", - "items": [ - { "id": "http://c.com/item" }, - { "id": "http://d.com/item" } - ] - }, - "s3": { - "definitions": { - "s1": { - "id": "s3", - "items": { - "id": "http://b.com/item" - } - } - } - }, - "s4": { "id": "http://e.com/def#abcd" } + #[test] + fn test_collect_ids() { + let url = Url::parse("http://a.com/schema.json").unwrap(); + let json: Value = serde_json::from_str( + r#"{ + "id": "http://a.com/schemas/schema.json", + "definitions": { + "s1": { "id": "http://a.com/definitions/s1" }, + "s2": { + "id": "../s2", + "items": [ + { "id": "http://c.com/item" }, + { "id": "http://d.com/item" } + ] + }, + "s3": { + "definitions": { + "s1": { + "id": "s3", + "items": { + "id": "http://b.com/item" } - }"#, - ) - .unwrap(); - - let want = { - let mut m = HashMap::new(); - m.insert("", "http://a.com/schemas/schema.json"); // root with id - m.insert("/definitions/s1", "http://a.com/definitions/s1"); - m.insert("/definitions/s2", "http://a.com/s2"); // relative id - m.insert("/definitions/s3/definitions/s1", "http://a.com/schemas/s3"); - m.insert("/definitions/s3/definitions/s1/items", "http://b.com/item"); - m.insert("/definitions/s2/items/0", "http://c.com/item"); - m.insert("/definitions/s2/items/1", "http://d.com/item"); - m.insert("/definitions/s4", "http://e.com/def"); // id with fragments - m - }; - let mut got = HashMap::new(); - DRAFT4 - .collect_resources(&json, &url, "".into(), &url, &mut got) - .unwrap(); - let got = got - .iter() - .map(|(k, v)| (k.as_str(), v.id.as_str())) - .collect::>(); - assert_eq!(got, want); - } - - #[test] - fn test_collect_anchors() { - let url = Url::parse("http://a.com/schema.json").unwrap(); - let json: Value = serde_json::from_str( - r#"{ - "$defs": { - "s2": { - "$id": "http://b.com", - "$anchor": "b1", - "items": [ - { "$anchor": "b2" }, - { - "$id": "http//c.com", - "items": [ - {"$anchor": "c1"}, - {"$dynamicAnchor": "c2"} - ] - }, - { "$dynamicAnchor": "b3" } - ] - } - } - }"#, - ) - .unwrap(); - let mut resources = HashMap::new(); - DRAFT2020 - .collect_resources(&json, &url, "".into(), &url, &mut resources) - .unwrap(); - assert!(resources.get("").unwrap().anchors.is_empty()); - assert_eq!(resources.get("/$defs/s2").unwrap().anchors, { - let mut want = HashMap::new(); - want.insert("b1".into(), "/$defs/s2".into()); - want.insert("b2".into(), "/$defs/s2/items/0".into()); - want.insert("b3".into(), "/$defs/s2/items/2".into()); - want - }); - assert_eq!(resources.get("/$defs/s2/items/1").unwrap().anchors, { - let mut want = HashMap::new(); - want.insert("c1".into(), "/$defs/s2/items/1/items/0".into()); - want.insert("c2".into(), "/$defs/s2/items/1/items/1".into()); - want - }); - } - - #[test] - fn test_is_subschema() { - let tests = vec![("/allOf/0", true), ("/allOf/$defs", false)]; - for test in tests { - let got = DRAFT2020.is_subschema(test.0); - assert_eq!(got, test.1, "{}", test.0); + } + } + }, + "s4": { "id": "http://e.com/def#abcd" } } + }"#, + ) + .unwrap(); + + let want = { + let mut m = HashMap::new(); + m.insert("", "http://a.com/schemas/schema.json"); // root with id + m.insert("/definitions/s1", "http://a.com/definitions/s1"); + m.insert("/definitions/s2", "http://a.com/s2"); // relative id + m.insert("/definitions/s3/definitions/s1", "http://a.com/schemas/s3"); + m.insert("/definitions/s3/definitions/s1/items", "http://b.com/item"); + m.insert("/definitions/s2/items/0", "http://c.com/item"); + m.insert("/definitions/s2/items/1", "http://d.com/item"); + m.insert("/definitions/s4", "http://e.com/def"); // id with fragments + m + }; + let mut got = HashMap::new(); + DRAFT4 + .collect_resources(&json, &url, "".into(), &url, &mut got) + .unwrap(); + let got = got + .iter() + .map(|(k, v)| (k.as_str(), v.id.as_str())) + .collect::>(); + assert_eq!(got, want); + } + + #[test] + fn test_collect_anchors() { + let url = Url::parse("http://a.com/schema.json").unwrap(); + let json: Value = serde_json::from_str( + r#"{ + "$defs": { + "s2": { + "$id": "http://b.com", + "$anchor": "b1", + "items": [ + { "$anchor": "b2" }, + { + "$id": "http//c.com", + "items": [ + {"$anchor": "c1"}, + {"$dynamicAnchor": "c2"} + ] + }, + { "$dynamicAnchor": "b3" } + ] + } + } + }"#, + ) + .unwrap(); + let mut resources = HashMap::new(); + DRAFT2020 + .collect_resources(&json, &url, "".into(), &url, &mut resources) + .unwrap(); + assert!(resources.get("").unwrap().anchors.is_empty()); + assert_eq!(resources.get("/$defs/s2").unwrap().anchors, { + let mut want = HashMap::new(); + want.insert("b1".into(), "/$defs/s2".into()); + want.insert("b2".into(), "/$defs/s2/items/0".into()); + want.insert("b3".into(), "/$defs/s2/items/2".into()); + want + }); + assert_eq!(resources.get("/$defs/s2/items/1").unwrap().anchors, { + let mut want = HashMap::new(); + want.insert("c1".into(), "/$defs/s2/items/1/items/0".into()); + want.insert("c2".into(), "/$defs/s2/items/1/items/1".into()); + want + }); + } + + #[test] + fn test_is_subschema() { + let tests = vec![("/allOf/0", true), ("/allOf/$defs", false)]; + for test in tests { + let got = DRAFT2020.is_subschema(test.0); + assert_eq!(got, test.1, "{}", test.0); } + } } diff --git a/validator/src/ecma.rs b/validator/src/ecma.rs index 78112f6..894eb62 100644 --- a/validator/src/ecma.rs +++ b/validator/src/ecma.rs @@ -6,192 +6,192 @@ use regex_syntax::ast::{self, *}; // covert ecma regex to rust regex if possible // see https://262.ecma-international.org/11.0/#sec-regexp-regular-expression-objects pub(crate) fn convert(pattern: &str) -> Result, Box> { - let mut pattern = Cow::Borrowed(pattern); + let mut pattern = Cow::Borrowed(pattern); - let mut ast = loop { - match Parser::new().parse(pattern.as_ref()) { - Ok(ast) => break ast, - Err(e) => { - if let Some(s) = fix_error(&e) { - pattern = Cow::Owned(s); - } else { - Err(e)?; - } - } - } - }; - - loop { - let translator = Translator { - pat: pattern.as_ref(), - out: None, - }; - if let Some(updated_pattern) = ast::visit(&ast, translator)? { - match Parser::new().parse(&updated_pattern) { - Ok(updated_ast) => { - pattern = Cow::Owned(updated_pattern); - ast = updated_ast; - } - Err(e) => { - debug_assert!( - false, - "ecma::translate changed {:?} to {:?}: {e}", - pattern, updated_pattern - ); - break; - } - } + let mut ast = loop { + match Parser::new().parse(pattern.as_ref()) { + Ok(ast) => break ast, + Err(e) => { + if let Some(s) = fix_error(&e) { + pattern = Cow::Owned(s); } else { - break; + Err(e)?; } + } } - Ok(pattern) + }; + + loop { + let translator = Translator { + pat: pattern.as_ref(), + out: None, + }; + if let Some(updated_pattern) = ast::visit(&ast, translator)? { + match Parser::new().parse(&updated_pattern) { + Ok(updated_ast) => { + pattern = Cow::Owned(updated_pattern); + ast = updated_ast; + } + Err(e) => { + debug_assert!( + false, + "ecma::translate changed {:?} to {:?}: {e}", + pattern, updated_pattern + ); + break; + } + } + } else { + break; + } + } + Ok(pattern) } fn fix_error(e: &Error) -> Option { - if let ErrorKind::EscapeUnrecognized = e.kind() { - let (start, end) = (e.span().start.offset, e.span().end.offset); - let s = &e.pattern()[start..end]; - if let r"\c" = s { - // handle \c{control_letter} - if let Some(control_letter) = e.pattern()[end..].chars().next() { - if control_letter.is_ascii_alphabetic() { - return Some(format!( - "{}{}{}", - &e.pattern()[..start], - ((control_letter as u8) % 32) as char, - &e.pattern()[end + 1..], - )); - } - } + if let ErrorKind::EscapeUnrecognized = e.kind() { + let (start, end) = (e.span().start.offset, e.span().end.offset); + let s = &e.pattern()[start..end]; + if let r"\c" = s { + // handle \c{control_letter} + if let Some(control_letter) = e.pattern()[end..].chars().next() { + if control_letter.is_ascii_alphabetic() { + return Some(format!( + "{}{}{}", + &e.pattern()[..start], + ((control_letter as u8) % 32) as char, + &e.pattern()[end + 1..], + )); } + } } - None + } + None } /** handles following translations: -- \d should ascii digits only. so replace with [0-9] -- \D should match everything but ascii digits. so replace with [^0-9] -- \w should match ascii letters only. so replace with [a-zA-Z0-9_] -- \W should match everything but ascii letters. so replace with [^a-zA-Z0-9_] -- \s and \S differences -- \a is not an ECMA 262 control escape +- \d should ascii digits only. so replace with [0-9] +- \D should match everything but ascii digits. so replace with [^0-9] +- \w should match ascii letters only. so replace with [a-zA-Z0-9_] +- \W should match everything but ascii letters. so replace with [^a-zA-Z0-9_] +- \s and \S differences +- \a is not an ECMA 262 control escape */ struct Translator<'a> { - pat: &'a str, - out: Option, + pat: &'a str, + out: Option, } impl Translator<'_> { - fn replace(&mut self, span: &Span, with: &str) { - let (start, end) = (span.start.offset, span.end.offset); - self.out = Some(format!("{}{with}{}", &self.pat[..start], &self.pat[end..])); - } + fn replace(&mut self, span: &Span, with: &str) { + let (start, end) = (span.start.offset, span.end.offset); + self.out = Some(format!("{}{with}{}", &self.pat[..start], &self.pat[end..])); + } - fn replace_class_class(&mut self, perl: &ClassPerl) { - match perl.kind { - ClassPerlKind::Digit => { - self.replace(&perl.span, if perl.negated { "[^0-9]" } else { "[0-9]" }); - } - ClassPerlKind::Word => { - let with = &if perl.negated { - "[^A-Za-z0-9_]" - } else { - "[A-Za-z0-9_]" - }; - self.replace(&perl.span, with); - } - ClassPerlKind::Space => { - let with = &if perl.negated { - "[^ \t\n\r\u{000b}\u{000c}\u{00a0}\u{feff}\u{2003}\u{2029}]" - } else { - "[ \t\n\r\u{000b}\u{000c}\u{00a0}\u{feff}\u{2003}\u{2029}]" - }; - self.replace(&perl.span, with); - } - } + fn replace_class_class(&mut self, perl: &ClassPerl) { + match perl.kind { + ClassPerlKind::Digit => { + self.replace(&perl.span, if perl.negated { "[^0-9]" } else { "[0-9]" }); + } + ClassPerlKind::Word => { + let with = &if perl.negated { + "[^A-Za-z0-9_]" + } else { + "[A-Za-z0-9_]" + }; + self.replace(&perl.span, with); + } + ClassPerlKind::Space => { + let with = &if perl.negated { + "[^ \t\n\r\u{000b}\u{000c}\u{00a0}\u{feff}\u{2003}\u{2029}]" + } else { + "[ \t\n\r\u{000b}\u{000c}\u{00a0}\u{feff}\u{2003}\u{2029}]" + }; + self.replace(&perl.span, with); + } } + } } impl Visitor for Translator<'_> { - type Output = Option; - type Err = &'static str; + type Output = Option; + type Err = &'static str; - fn finish(self) -> Result { - Ok(self.out) - } + fn finish(self) -> Result { + Ok(self.out) + } - fn visit_class_set_item_pre(&mut self, ast: &ast::ClassSetItem) -> Result<(), Self::Err> { - if let ClassSetItem::Perl(perl) = ast { - self.replace_class_class(perl); - } - Ok(()) + fn visit_class_set_item_pre(&mut self, ast: &ast::ClassSetItem) -> Result<(), Self::Err> { + if let ClassSetItem::Perl(perl) = ast { + self.replace_class_class(perl); } + Ok(()) + } - fn visit_post(&mut self, ast: &Ast) -> Result<(), Self::Err> { - if self.out.is_some() { - return Ok(()); - } - match ast { - Ast::ClassPerl(perl) => { - self.replace_class_class(perl); - } - Ast::Literal(ref literal) => { - if let Literal { - kind: LiteralKind::Special(SpecialLiteralKind::Bell), - .. - } = literal.as_ref() - { - return Err("\\a is not an ECMA 262 control escape"); - } - } - _ => (), - } - Ok(()) + fn visit_post(&mut self, ast: &Ast) -> Result<(), Self::Err> { + if self.out.is_some() { + return Ok(()); } + match ast { + Ast::ClassPerl(perl) => { + self.replace_class_class(perl); + } + Ast::Literal(ref literal) => { + if let Literal { + kind: LiteralKind::Special(SpecialLiteralKind::Bell), + .. + } = literal.as_ref() + { + return Err("\\a is not an ECMA 262 control escape"); + } + } + _ => (), + } + Ok(()) + } } #[cfg(test)] mod tests { - use super::*; + use super::*; - #[test] - fn test_ecma_compat_valid() { - // println!("{:#?}", Parser::new().parse(r#"a\a"#)); - let tests = [ - (r"ab\cAcde\cBfg", "ab\u{1}cde\u{2}fg"), // \c{control_letter} - (r"\\comment", r"\\comment"), // there is no \c - (r"ab\def", r#"ab[0-9]ef"#), // \d - (r"ab[a-z\d]ef", r#"ab[a-z[0-9]]ef"#), // \d inside classSet - (r"ab\Def", r#"ab[^0-9]ef"#), // \d - (r"ab[a-z\D]ef", r#"ab[a-z[^0-9]]ef"#), // \D inside classSet - ]; - for (input, want) in tests { - match convert(input) { - Ok(got) => { - if got.as_ref() != want { - panic!("convert({input:?}): got: {got:?}, want: {want:?}"); - } - } - Err(e) => { - panic!("convert({input:?}) failed: {e}"); - } - } + #[test] + fn test_ecma_compat_valid() { + // println!("{:#?}", Parser::new().parse(r#"a\a"#)); + let tests = [ + (r"ab\cAcde\cBfg", "ab\u{1}cde\u{2}fg"), // \c{control_letter} + (r"\\comment", r"\\comment"), // there is no \c + (r"ab\def", r#"ab[0-9]ef"#), // \d + (r"ab[a-z\d]ef", r#"ab[a-z[0-9]]ef"#), // \d inside classSet + (r"ab\Def", r#"ab[^0-9]ef"#), // \d + (r"ab[a-z\D]ef", r#"ab[a-z[^0-9]]ef"#), // \D inside classSet + ]; + for (input, want) in tests { + match convert(input) { + Ok(got) => { + if got.as_ref() != want { + panic!("convert({input:?}): got: {got:?}, want: {want:?}"); + } } + Err(e) => { + panic!("convert({input:?}) failed: {e}"); + } + } } + } - #[test] - fn test_ecma_compat_invalid() { - // println!("{:#?}", Parser::new().parse(r#"a\a"#)); - let tests = [ - r"\c\n", // \c{invalid_char} - r"abc\adef", // \a is not valid - ]; - for input in tests { - if convert(input).is_ok() { - panic!("convert({input:?}) mut fail"); - } - } + #[test] + fn test_ecma_compat_invalid() { + // println!("{:#?}", Parser::new().parse(r#"a\a"#)); + let tests = [ + r"\c\n", // \c{invalid_char} + r"abc\adef", // \a is not valid + ]; + for input in tests { + if convert(input).is_ok() { + panic!("convert({input:?}) mut fail"); + } } + } } diff --git a/validator/src/formats.rs b/validator/src/formats.rs index f6ac288..d3cf247 100644 --- a/validator/src/formats.rs +++ b/validator/src/formats.rs @@ -1,7 +1,7 @@ use std::{ - collections::HashMap, - error::Error, - net::{Ipv4Addr, Ipv6Addr}, + collections::HashMap, + error::Error, + net::{Ipv4Addr, Ipv6Addr}, }; use once_cell::sync::Lazy; @@ -14,825 +14,825 @@ use crate::ecma; /// Defines format for `format` keyword. #[derive(Clone, Copy)] pub struct Format { - /// Name of the format - pub name: &'static str, + /// Name of the format + pub name: &'static str, - /// validates given value. - pub func: fn(v: &Value) -> Result<(), Box>, + /// validates given value. + pub func: fn(v: &Value) -> Result<(), Box>, } pub(crate) static FORMATS: Lazy> = Lazy::new(|| { - let mut m = HashMap::<&'static str, Format>::new(); - let mut register = |name, func| m.insert(name, Format { name, func }); - register("regex", validate_regex); - register("ipv4", validate_ipv4); - register("ipv6", validate_ipv6); - register("hostname", validate_hostname); - register("idn-hostname", validate_idn_hostname); - register("email", validate_email); - register("idn-email", validate_idn_email); - register("date", validate_date); - register("time", validate_time); - register("date-time", validate_date_time); - register("duration", validate_duration); - register("period", validate_period); - register("json-pointer", validate_json_pointer); - register("relative-json-pointer", validate_relative_json_pointer); - register("uuid", validate_uuid); - register("uri", validate_uri); - register("iri", validate_iri); - register("uri-reference", validate_uri_reference); - register("iri-reference", validate_iri_reference); - register("uri-template", validate_uri_template); - m + let mut m = HashMap::<&'static str, Format>::new(); + let mut register = |name, func| m.insert(name, Format { name, func }); + register("regex", validate_regex); + register("ipv4", validate_ipv4); + register("ipv6", validate_ipv6); + register("hostname", validate_hostname); + register("idn-hostname", validate_idn_hostname); + register("email", validate_email); + register("idn-email", validate_idn_email); + register("date", validate_date); + register("time", validate_time); + register("date-time", validate_date_time); + register("duration", validate_duration); + register("period", validate_period); + register("json-pointer", validate_json_pointer); + register("relative-json-pointer", validate_relative_json_pointer); + register("uuid", validate_uuid); + register("uri", validate_uri); + register("iri", validate_iri); + register("uri-reference", validate_uri_reference); + register("iri-reference", validate_iri_reference); + register("uri-template", validate_uri_template); + m }); fn validate_regex(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - ecma::convert(s).map(|_| ()) + let Value::String(s) = v else { + return Ok(()); + }; + ecma::convert(s).map(|_| ()) } fn validate_ipv4(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - s.parse::()?; - Ok(()) + let Value::String(s) = v else { + return Ok(()); + }; + s.parse::()?; + Ok(()) } fn validate_ipv6(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - s.parse::()?; - Ok(()) + let Value::String(s) = v else { + return Ok(()); + }; + s.parse::()?; + Ok(()) } fn validate_date(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - check_date(s) + let Value::String(s) = v else { + return Ok(()); + }; + check_date(s) } fn matches_char(s: &str, index: usize, ch: char) -> bool { - s.is_char_boundary(index) && s[index..].starts_with(ch) + s.is_char_boundary(index) && s[index..].starts_with(ch) } // see https://datatracker.ietf.org/doc/html/rfc3339#section-5.6 fn check_date(s: &str) -> Result<(), Box> { - // yyyy-mm-dd - if s.len() != 10 { - Err("must be 10 characters long")?; - } - if !matches_char(s, 4, '-') || !matches_char(s, 7, '-') { - Err("missing hyphen in correct place")?; - } + // yyyy-mm-dd + if s.len() != 10 { + Err("must be 10 characters long")?; + } + if !matches_char(s, 4, '-') || !matches_char(s, 7, '-') { + Err("missing hyphen in correct place")?; + } - let mut ymd = s.splitn(3, '-').filter_map(|t| t.parse::().ok()); - let (Some(y), Some(m), Some(d)) = (ymd.next(), ymd.next(), ymd.next()) else { - Err("non-positive year/month/day")? - }; + let mut ymd = s.splitn(3, '-').filter_map(|t| t.parse::().ok()); + let (Some(y), Some(m), Some(d)) = (ymd.next(), ymd.next(), ymd.next()) else { + Err("non-positive year/month/day")? + }; - if !matches!(m, 1..=12) { - Err(format!("{m} months in year"))?; - } - if !matches!(d, 1..=31) { - Err(format!("{d} days in month"))?; - } + if !matches!(m, 1..=12) { + Err(format!("{m} months in year"))?; + } + if !matches!(d, 1..=31) { + Err(format!("{d} days in month"))?; + } - match m { - 2 => { - let mut feb_days = 28; - if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) { - feb_days += 1; // leap year - }; - if d > feb_days { - Err(format!("february has {feb_days} days only"))?; - } - } - 4 | 6 | 9 | 11 => { - if d > 30 { - Err("month has 30 days only")?; - } - } - _ => {} + match m { + 2 => { + let mut feb_days = 28; + if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) { + feb_days += 1; // leap year + }; + if d > feb_days { + Err(format!("february has {feb_days} days only"))?; + } } - Ok(()) + 4 | 6 | 9 | 11 => { + if d > 30 { + Err("month has 30 days only")?; + } + } + _ => {} + } + Ok(()) } fn validate_time(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - check_time(s) + let Value::String(s) = v else { + return Ok(()); + }; + check_time(s) } fn check_time(mut str: &str) -> Result<(), Box> { - // min: hh:mm:ssZ - if str.len() < 9 { - Err("less than 9 characters long")? - } - if !matches_char(str, 2, ':') || !matches_char(str, 5, ':') { - Err("missing colon in correct place")? - } + // min: hh:mm:ssZ + if str.len() < 9 { + Err("less than 9 characters long")? + } + if !matches_char(str, 2, ':') || !matches_char(str, 5, ':') { + Err("missing colon in correct place")? + } - // parse hh:mm:ss - if !str.is_char_boundary(8) { - Err("contains non-ascii char")? + // parse hh:mm:ss + if !str.is_char_boundary(8) { + Err("contains non-ascii char")? + } + let mut hms = (str[..8]) + .splitn(3, ':') + .filter_map(|t| t.parse::().ok()); + let (Some(mut h), Some(mut m), Some(s)) = (hms.next(), hms.next(), hms.next()) else { + Err("non-positive hour/min/sec")? + }; + if h > 23 || m > 59 || s > 60 { + Err("hour/min/sec out of range")? + } + str = &str[8..]; + + // parse sec-frac if present + if let Some(rem) = str.strip_prefix('.') { + let n_digits = rem.chars().take_while(char::is_ascii_digit).count(); + if n_digits == 0 { + Err("no digits in second fraction")?; } - let mut hms = (str[..8]) - .splitn(3, ':') - .filter_map(|t| t.parse::().ok()); - let (Some(mut h), Some(mut m), Some(s)) = (hms.next(), hms.next(), hms.next()) else { - Err("non-positive hour/min/sec")? + str = &rem[n_digits..]; + } + + if str != "z" && str != "Z" { + // parse time-numoffset + if str.len() != 6 { + Err("offset must be 6 characters long")?; + } + let sign: isize = match str.chars().next() { + Some('+') => -1, + Some('-') => 1, + _ => return Err("offset must begin with plus/minus")?, }; - if h > 23 || m > 59 || s > 60 { - Err("hour/min/sec out of range")? - } - str = &str[8..]; - - // parse sec-frac if present - if let Some(rem) = str.strip_prefix('.') { - let n_digits = rem.chars().take_while(char::is_ascii_digit).count(); - if n_digits == 0 { - Err("no digits in second fraction")?; - } - str = &rem[n_digits..]; + str = &str[1..]; + if !matches_char(str, 2, ':') { + Err("missing colon in offset at correct place")? } - if str != "z" && str != "Z" { - // parse time-numoffset - if str.len() != 6 { - Err("offset must be 6 characters long")?; - } - let sign: isize = match str.chars().next() { - Some('+') => -1, - Some('-') => 1, - _ => return Err("offset must begin with plus/minus")?, - }; - str = &str[1..]; - if !matches_char(str, 2, ':') { - Err("missing colon in offset at correct place")? - } - - let mut zhm = str.splitn(2, ':').filter_map(|t| t.parse::().ok()); - let (Some(zh), Some(zm)) = (zhm.next(), zhm.next()) else { - Err("non-positive hour/min in offset")? - }; - if zh > 23 || zm > 59 { - Err("hour/min in offset out of range")? - } - - // apply timezone - let mut hm = (h * 60 + m) as isize + sign * (zh * 60 + zm) as isize; - if hm < 0 { - hm += 24 * 60; - debug_assert!(hm >= 0); - } - let hm = hm as usize; - (h, m) = (hm / 60, hm % 60); + let mut zhm = str.splitn(2, ':').filter_map(|t| t.parse::().ok()); + let (Some(zh), Some(zm)) = (zhm.next(), zhm.next()) else { + Err("non-positive hour/min in offset")? + }; + if zh > 23 || zm > 59 { + Err("hour/min in offset out of range")? } - // check leap second - if !(s < 60 || (h == 23 && m == 59)) { - Err("invalid leap second")? + // apply timezone + let mut hm = (h * 60 + m) as isize + sign * (zh * 60 + zm) as isize; + if hm < 0 { + hm += 24 * 60; + debug_assert!(hm >= 0); } - Ok(()) + let hm = hm as usize; + (h, m) = (hm / 60, hm % 60); + } + + // check leap second + if !(s < 60 || (h == 23 && m == 59)) { + Err("invalid leap second")? + } + Ok(()) } fn validate_date_time(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - check_date_time(s) + let Value::String(s) = v else { + return Ok(()); + }; + check_date_time(s) } fn check_date_time(s: &str) -> Result<(), Box> { - // min: yyyy-mm-ddThh:mm:ssZ - if s.len() < 20 { - Err("less than 20 characters long")?; - } - if !s.is_char_boundary(10) || !s[10..].starts_with(['t', 'T']) { - Err("11th character must be t or T")?; - } - if let Err(e) = check_date(&s[..10]) { - Err(format!("invalid date element: {e}"))?; - } - if let Err(e) = check_time(&s[11..]) { - Err(format!("invalid time element: {e}"))?; - } - Ok(()) + // min: yyyy-mm-ddThh:mm:ssZ + if s.len() < 20 { + Err("less than 20 characters long")?; + } + if !s.is_char_boundary(10) || !s[10..].starts_with(['t', 'T']) { + Err("11th character must be t or T")?; + } + if let Err(e) = check_date(&s[..10]) { + Err(format!("invalid date element: {e}"))?; + } + if let Err(e) = check_time(&s[11..]) { + Err(format!("invalid time element: {e}"))?; + } + Ok(()) } fn validate_duration(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - check_duration(s) + let Value::String(s) = v else { + return Ok(()); + }; + check_duration(s) } // see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A fn check_duration(s: &str) -> Result<(), Box> { - // must start with 'P' - let Some(s) = s.strip_prefix('P') else { - Err("must start with P")? - }; + // must start with 'P' + let Some(s) = s.strip_prefix('P') else { + Err("must start with P")? + }; + if s.is_empty() { + Err("nothing after P")? + } + + // dur-week + if let Some(s) = s.strip_suffix('W') { if s.is_empty() { - Err("nothing after P")? + Err("no number in week")? } - - // dur-week - if let Some(s) = s.strip_suffix('W') { - if s.is_empty() { - Err("no number in week")? - } - if !s.chars().all(|c| c.is_ascii_digit()) { - Err("invalid week")? - } - return Ok(()); + if !s.chars().all(|c| c.is_ascii_digit()) { + Err("invalid week")? } + return Ok(()); + } - static UNITS: [&str; 2] = ["YMD", "HMS"]; - for (i, s) in s.split('T').enumerate() { - let mut s = s; - if i != 0 && s.is_empty() { - Err("no time elements")? - } - let Some(mut units) = UNITS.get(i).cloned() else { - Err("more than one T")? - }; - while !s.is_empty() { - let digit_count = s.chars().take_while(char::is_ascii_digit).count(); - if digit_count == 0 { - Err("missing number")? - } - s = &s[digit_count..]; - let Some(unit) = s.chars().next() else { - Err("missing unit")? - }; - let Some(j) = units.find(unit) else { - if UNITS[i].contains(unit) { - Err(format!("unit {unit} out of order"))? - } - Err(format!("invalid unit {unit}"))? - }; - units = &units[j + 1..]; - s = &s[1..]; - } + static UNITS: [&str; 2] = ["YMD", "HMS"]; + for (i, s) in s.split('T').enumerate() { + let mut s = s; + if i != 0 && s.is_empty() { + Err("no time elements")? } + let Some(mut units) = UNITS.get(i).cloned() else { + Err("more than one T")? + }; + while !s.is_empty() { + let digit_count = s.chars().take_while(char::is_ascii_digit).count(); + if digit_count == 0 { + Err("missing number")? + } + s = &s[digit_count..]; + let Some(unit) = s.chars().next() else { + Err("missing unit")? + }; + let Some(j) = units.find(unit) else { + if UNITS[i].contains(unit) { + Err(format!("unit {unit} out of order"))? + } + Err(format!("invalid unit {unit}"))? + }; + units = &units[j + 1..]; + s = &s[1..]; + } + } - Ok(()) + Ok(()) } // see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A fn validate_period(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; + let Value::String(s) = v else { + return Ok(()); + }; - let Some(slash) = s.find('/') else { - Err("missing slash")? - }; + let Some(slash) = s.find('/') else { + Err("missing slash")? + }; - let (start, end) = (&s[..slash], &s[slash + 1..]); - if start.starts_with('P') { - if let Err(e) = check_duration(start) { - Err(format!("invalid start duration: {e}"))? - } - if let Err(e) = check_date_time(end) { - Err(format!("invalid end date-time: {e}"))? - } - } else { - if let Err(e) = check_date_time(start) { - Err(format!("invalid start date-time: {e}"))? - } - if end.starts_with('P') { - if let Err(e) = check_duration(end) { - Err(format!("invalid end duration: {e}"))?; - } - } else if let Err(e) = check_date_time(end) { - Err(format!("invalid end date-time: {e}"))?; - } + let (start, end) = (&s[..slash], &s[slash + 1..]); + if start.starts_with('P') { + if let Err(e) = check_duration(start) { + Err(format!("invalid start duration: {e}"))? } - Ok(()) + if let Err(e) = check_date_time(end) { + Err(format!("invalid end date-time: {e}"))? + } + } else { + if let Err(e) = check_date_time(start) { + Err(format!("invalid start date-time: {e}"))? + } + if end.starts_with('P') { + if let Err(e) = check_duration(end) { + Err(format!("invalid end duration: {e}"))?; + } + } else if let Err(e) = check_date_time(end) { + Err(format!("invalid end date-time: {e}"))?; + } + } + Ok(()) } fn validate_hostname(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - check_hostname(s) + let Value::String(s) = v else { + return Ok(()); + }; + check_hostname(s) } // see https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names fn check_hostname(mut s: &str) -> Result<(), Box> { - // entire hostname (including the delimiting dots but not a trailing dot) has a maximum of 253 ASCII characters - s = s.strip_suffix('.').unwrap_or(s); - if s.len() > 253 { - Err("more than 253 characters long")? + // entire hostname (including the delimiting dots but not a trailing dot) has a maximum of 253 ASCII characters + s = s.strip_suffix('.').unwrap_or(s); + if s.len() > 253 { + Err("more than 253 characters long")? + } + + // Hostnames are composed of series of labels concatenated with dots, as are all domain names + for label in s.split('.') { + // Each label must be from 1 to 63 characters long + if !matches!(label.len(), 1..=63) { + Err("label must be 1 to 63 characters long")?; } - // Hostnames are composed of series of labels concatenated with dots, as are all domain names - for label in s.split('.') { - // Each label must be from 1 to 63 characters long - if !matches!(label.len(), 1..=63) { - Err("label must be 1 to 63 characters long")?; - } - - // labels must not start or end with a hyphen - if label.starts_with('-') { - Err("label starts with hyphen")?; - } - - if label.ends_with('-') { - Err("label ends with hyphen")?; - } - - // labels may contain only the ASCII letters 'a' through 'z' (in a case-insensitive manner), - // the digits '0' through '9', and the hyphen ('-') - if let Some(ch) = label - .chars() - .find(|c| !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-')) - { - Err(format!("invalid character {ch:?}"))?; - } + // labels must not start or end with a hyphen + if label.starts_with('-') { + Err("label starts with hyphen")?; } - Ok(()) + if label.ends_with('-') { + Err("label ends with hyphen")?; + } + + // labels may contain only the ASCII letters 'a' through 'z' (in a case-insensitive manner), + // the digits '0' through '9', and the hyphen ('-') + if let Some(ch) = label + .chars() + .find(|c| !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-')) + { + Err(format!("invalid character {ch:?}"))?; + } + } + + Ok(()) } fn validate_idn_hostname(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - check_idn_hostname(s) + let Value::String(s) = v else { + return Ok(()); + }; + check_idn_hostname(s) } fn check_idn_hostname(s: &str) -> Result<(), Box> { - let s = idna::domain_to_ascii_strict(s)?; - let unicode = idna::domain_to_unicode(&s).0; + let s = idna::domain_to_ascii_strict(s)?; + let unicode = idna::domain_to_unicode(&s).0; - // see https://www.rfc-editor.org/rfc/rfc5892#section-2.6 - { - static DISALLOWED: [char; 10] = [ - '\u{0640}', // ARABIC TATWEEL - '\u{07FA}', // NKO LAJANYALAN - '\u{302E}', // HANGUL SINGLE DOT TONE MARK - '\u{302F}', // HANGUL DOUBLE DOT TONE MARK - '\u{3031}', // VERTICAL KANA REPEAT MARK - '\u{3032}', // VERTICAL KANA REPEAT WITH VOICED SOUND MARK - '\u{3033}', // VERTICAL KANA REPEAT MARK UPPER HALF - '\u{3034}', // VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HA - '\u{3035}', // VERTICAL KANA REPEAT MARK LOWER HALF - '\u{303B}', // VERTICAL IDEOGRAPHIC ITERATION MARK - ]; - if unicode.contains(DISALLOWED) { - Err("contains disallowed character")?; - } + // see https://www.rfc-editor.org/rfc/rfc5892#section-2.6 + { + static DISALLOWED: [char; 10] = [ + '\u{0640}', // ARABIC TATWEEL + '\u{07FA}', // NKO LAJANYALAN + '\u{302E}', // HANGUL SINGLE DOT TONE MARK + '\u{302F}', // HANGUL DOUBLE DOT TONE MARK + '\u{3031}', // VERTICAL KANA REPEAT MARK + '\u{3032}', // VERTICAL KANA REPEAT WITH VOICED SOUND MARK + '\u{3033}', // VERTICAL KANA REPEAT MARK UPPER HALF + '\u{3034}', // VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HA + '\u{3035}', // VERTICAL KANA REPEAT MARK LOWER HALF + '\u{303B}', // VERTICAL IDEOGRAPHIC ITERATION MARK + ]; + if unicode.contains(DISALLOWED) { + Err("contains disallowed character")?; } + } - // unicode string must not contain "--" in 3rd and 4th position - // and must not start and end with a '-' - // see https://www.rfc-editor.org/rfc/rfc5891#section-4.2.3.1 - { - let count: usize = unicode - .chars() - .skip(2) - .take(2) - .map(|c| if c == '-' { 1 } else { 0 }) - .sum(); - if count == 2 { - Err("unicode string must not contain '--' in 3rd and 4th position")?; - } + // unicode string must not contain "--" in 3rd and 4th position + // and must not start and end with a '-' + // see https://www.rfc-editor.org/rfc/rfc5891#section-4.2.3.1 + { + let count: usize = unicode + .chars() + .skip(2) + .take(2) + .map(|c| if c == '-' { 1 } else { 0 }) + .sum(); + if count == 2 { + Err("unicode string must not contain '--' in 3rd and 4th position")?; } + } - // MIDDLE DOT is allowed between 'l' characters only - // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.3 - { - let middle_dot = '\u{00b7}'; - let mut s = unicode.as_str(); - while let Some(i) = s.find(middle_dot) { - let prefix = &s[..i]; - let suffix = &s[i + middle_dot.len_utf8()..]; - if !prefix.ends_with('l') || !suffix.ends_with('l') { - Err("MIDDLE DOT is allowed between 'l' characters only")?; - } - s = suffix; - } + // MIDDLE DOT is allowed between 'l' characters only + // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.3 + { + let middle_dot = '\u{00b7}'; + let mut s = unicode.as_str(); + while let Some(i) = s.find(middle_dot) { + let prefix = &s[..i]; + let suffix = &s[i + middle_dot.len_utf8()..]; + if !prefix.ends_with('l') || !suffix.ends_with('l') { + Err("MIDDLE DOT is allowed between 'l' characters only")?; + } + s = suffix; } + } - // Greek KERAIA must be followed by Greek character - // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.4 - { - let keralia = '\u{0375}'; - let greek = '\u{0370}'..='\u{03FF}'; - let mut s = unicode.as_str(); - while let Some(i) = s.find(keralia) { - let suffix = &s[i + keralia.len_utf8()..]; - if !suffix.starts_with(|c| greek.contains(&c)) { - Err("Greek KERAIA must be followed by Greek character")?; - } - s = suffix; - } + // Greek KERAIA must be followed by Greek character + // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.4 + { + let keralia = '\u{0375}'; + let greek = '\u{0370}'..='\u{03FF}'; + let mut s = unicode.as_str(); + while let Some(i) = s.find(keralia) { + let suffix = &s[i + keralia.len_utf8()..]; + if !suffix.starts_with(|c| greek.contains(&c)) { + Err("Greek KERAIA must be followed by Greek character")?; + } + s = suffix; } + } - // Hebrew GERESH must be preceded by Hebrew character - // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.5 - // - // Hebrew GERSHAYIM must be preceded by Hebrew character - // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.6 - { - let geresh = '\u{05F3}'; - let gereshayim = '\u{05F4}'; - let hebrew = '\u{0590}'..='\u{05FF}'; - for ch in [geresh, gereshayim] { - let mut s = unicode.as_str(); - while let Some(i) = s.find(ch) { - let prefix = &s[..i]; - let suffix = &s[i + ch.len_utf8()..]; - if !prefix.ends_with(|c| hebrew.contains(&c)) { - if i == 0 { - Err("Hebrew GERESH must be preceded by Hebrew character")?; - } else { - Err("Hebrew GERESHYIM must be preceded by Hebrew character")?; - } - } - s = suffix; - } + // Hebrew GERESH must be preceded by Hebrew character + // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.5 + // + // Hebrew GERSHAYIM must be preceded by Hebrew character + // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.6 + { + let geresh = '\u{05F3}'; + let gereshayim = '\u{05F4}'; + let hebrew = '\u{0590}'..='\u{05FF}'; + for ch in [geresh, gereshayim] { + let mut s = unicode.as_str(); + while let Some(i) = s.find(ch) { + let prefix = &s[..i]; + let suffix = &s[i + ch.len_utf8()..]; + if !prefix.ends_with(|c| hebrew.contains(&c)) { + if i == 0 { + Err("Hebrew GERESH must be preceded by Hebrew character")?; + } else { + Err("Hebrew GERESHYIM must be preceded by Hebrew character")?; + } } + s = suffix; + } } + } - // KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han - // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.7 + // KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han + // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.7 + { + let katakana_middle_dot = '\u{30FB}'; + let hiragana = '\u{3040}'..='\u{309F}'; + let katakana = '\u{30A0}'..='\u{30FF}'; + let han = '\u{4E00}'..='\u{9FFF}'; // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block): is this range correct?? + if unicode.contains(katakana_middle_dot) { + if unicode.contains(|c| hiragana.contains(&c)) + || unicode.contains(|c| c != katakana_middle_dot && katakana.contains(&c)) + || unicode.contains(|c| han.contains(&c)) + { + // ok + } else { + Err("KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han")?; + } + } + } + + // ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed + // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.8 + // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.9 + { + let arabic_indic_digits = '\u{0660}'..='\u{0669}'; + let extended_arabic_indic_digits = '\u{06F0}'..='\u{06F9}'; + if unicode.contains(|c| arabic_indic_digits.contains(&c)) + && unicode.contains(|c| extended_arabic_indic_digits.contains(&c)) { - let katakana_middle_dot = '\u{30FB}'; - let hiragana = '\u{3040}'..='\u{309F}'; - let katakana = '\u{30A0}'..='\u{30FF}'; - let han = '\u{4E00}'..='\u{9FFF}'; // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block): is this range correct?? - if unicode.contains(katakana_middle_dot) { - if unicode.contains(|c| hiragana.contains(&c)) - || unicode.contains(|c| c != katakana_middle_dot && katakana.contains(&c)) - || unicode.contains(|c| han.contains(&c)) - { - // ok - } else { - Err("KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han")?; - } - } + Err("ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed")?; } + } - // ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed - // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.8 - // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.9 - { - let arabic_indic_digits = '\u{0660}'..='\u{0669}'; - let extended_arabic_indic_digits = '\u{06F0}'..='\u{06F9}'; - if unicode.contains(|c| arabic_indic_digits.contains(&c)) - && unicode.contains(|c| extended_arabic_indic_digits.contains(&c)) - { - Err("ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed")?; - } + // ZERO WIDTH JOINER must be preceded by Virama + // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.2 + { + let zero_width_jointer = '\u{200D}'; + static VIRAMA: [char; 61] = [ + '\u{094D}', + '\u{09CD}', + '\u{0A4D}', + '\u{0ACD}', + '\u{0B4D}', + '\u{0BCD}', + '\u{0C4D}', + '\u{0CCD}', + '\u{0D3B}', + '\u{0D3C}', + '\u{0D4D}', + '\u{0DCA}', + '\u{0E3A}', + '\u{0EBA}', + '\u{0F84}', + '\u{1039}', + '\u{103A}', + '\u{1714}', + '\u{1734}', + '\u{17D2}', + '\u{1A60}', + '\u{1B44}', + '\u{1BAA}', + '\u{1BAB}', + '\u{1BF2}', + '\u{1BF3}', + '\u{2D7F}', + '\u{A806}', + '\u{A82C}', + '\u{A8C4}', + '\u{A953}', + '\u{A9C0}', + '\u{AAF6}', + '\u{ABED}', + '\u{10A3F}', + '\u{11046}', + '\u{1107F}', + '\u{110B9}', + '\u{11133}', + '\u{11134}', + '\u{111C0}', + '\u{11235}', + '\u{112EA}', + '\u{1134D}', + '\u{11442}', + '\u{114C2}', + '\u{115BF}', + '\u{1163F}', + '\u{116B6}', + '\u{1172B}', + '\u{11839}', + '\u{1193D}', + '\u{1193E}', + '\u{119E0}', + '\u{11A34}', + '\u{11A47}', + '\u{11A99}', + '\u{11C3F}', + '\u{11D44}', + '\u{11D45}', + '\u{11D97}', + ]; // https://www.compart.com/en/unicode/combining/9 + let mut s = unicode.as_str(); + while let Some(i) = s.find(zero_width_jointer) { + let prefix = &s[..i]; + let suffix = &s[i + zero_width_jointer.len_utf8()..]; + if !prefix.ends_with(VIRAMA) { + Err("ZERO WIDTH JOINER must be preceded by Virama")?; + } + s = suffix; } + } - // ZERO WIDTH JOINER must be preceded by Virama - // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.2 - { - let zero_width_jointer = '\u{200D}'; - static VIRAMA: [char; 61] = [ - '\u{094D}', - '\u{09CD}', - '\u{0A4D}', - '\u{0ACD}', - '\u{0B4D}', - '\u{0BCD}', - '\u{0C4D}', - '\u{0CCD}', - '\u{0D3B}', - '\u{0D3C}', - '\u{0D4D}', - '\u{0DCA}', - '\u{0E3A}', - '\u{0EBA}', - '\u{0F84}', - '\u{1039}', - '\u{103A}', - '\u{1714}', - '\u{1734}', - '\u{17D2}', - '\u{1A60}', - '\u{1B44}', - '\u{1BAA}', - '\u{1BAB}', - '\u{1BF2}', - '\u{1BF3}', - '\u{2D7F}', - '\u{A806}', - '\u{A82C}', - '\u{A8C4}', - '\u{A953}', - '\u{A9C0}', - '\u{AAF6}', - '\u{ABED}', - '\u{10A3F}', - '\u{11046}', - '\u{1107F}', - '\u{110B9}', - '\u{11133}', - '\u{11134}', - '\u{111C0}', - '\u{11235}', - '\u{112EA}', - '\u{1134D}', - '\u{11442}', - '\u{114C2}', - '\u{115BF}', - '\u{1163F}', - '\u{116B6}', - '\u{1172B}', - '\u{11839}', - '\u{1193D}', - '\u{1193E}', - '\u{119E0}', - '\u{11A34}', - '\u{11A47}', - '\u{11A99}', - '\u{11C3F}', - '\u{11D44}', - '\u{11D45}', - '\u{11D97}', - ]; // https://www.compart.com/en/unicode/combining/9 - let mut s = unicode.as_str(); - while let Some(i) = s.find(zero_width_jointer) { - let prefix = &s[..i]; - let suffix = &s[i + zero_width_jointer.len_utf8()..]; - if !prefix.ends_with(VIRAMA) { - Err("ZERO WIDTH JOINER must be preceded by Virama")?; - } - s = suffix; - } - } - - check_hostname(&s) + check_hostname(&s) } fn validate_email(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - check_email(s) + let Value::String(s) = v else { + return Ok(()); + }; + check_email(s) } // see https://en.wikipedia.org/wiki/Email_address fn check_email(s: &str) -> Result<(), Box> { - // entire email address to be no more than 254 characters long - if s.len() > 254 { - Err("more than 254 characters long")? + // entire email address to be no more than 254 characters long + if s.len() > 254 { + Err("more than 254 characters long")? + } + + // email address is generally recognized as having two parts joined with an at-sign + let Some(at) = s.rfind('@') else { + Err("missing @")? + }; + let (local, domain) = (&s[..at], &s[at + 1..]); + + // local part may be up to 64 characters long + if local.len() > 64 { + Err("local part more than 64 characters long")? + } + + if local.len() > 1 && local.starts_with('"') && local.ends_with('"') { + // quoted + let local = &local[1..local.len() - 1]; + if local.contains(['\\', '"']) { + Err("backslash and quote not allowed within quoted local part")? + } + } else { + // unquoted + + if local.starts_with('.') { + Err("starts with dot")? + } + if local.ends_with('.') { + Err("ends with dot")? } - // email address is generally recognized as having two parts joined with an at-sign - let Some(at) = s.rfind('@') else { - Err("missing @")? - }; - let (local, domain) = (&s[..at], &s[at + 1..]); - - // local part may be up to 64 characters long - if local.len() > 64 { - Err("local part more than 64 characters long")? + // consecutive dots not allowed + if local.contains("..") { + Err("consecutive dots")? } - if local.len() > 1 && local.starts_with('"') && local.ends_with('"') { - // quoted - let local = &local[1..local.len() - 1]; - if local.contains(['\\', '"']) { - Err("backslash and quote not allowed within quoted local part")? - } - } else { - // unquoted - - if local.starts_with('.') { - Err("starts with dot")? - } - if local.ends_with('.') { - Err("ends with dot")? - } - - // consecutive dots not allowed - if local.contains("..") { - Err("consecutive dots")? - } - - // check allowd chars - if let Some(ch) = local - .chars() - .find(|c| !(c.is_ascii_alphanumeric() || ".!#$%&'*+-/=?^_`{|}~".contains(*c))) - { - Err(format!("invalid character {ch:?}"))? - } + // check allowd chars + if let Some(ch) = local + .chars() + .find(|c| !(c.is_ascii_alphanumeric() || ".!#$%&'*+-/=?^_`{|}~".contains(*c))) + { + Err(format!("invalid character {ch:?}"))? } + } - // domain if enclosed in brackets, must match an IP address - if domain.starts_with('[') && domain.ends_with(']') { - let s = &domain[1..domain.len() - 1]; - if let Some(s) = s.strip_prefix("IPv6:") { - if let Err(e) = s.parse::() { - Err(format!("invalid ipv6 address: {e}"))? - } - return Ok(()); - } - if let Err(e) = s.parse::() { - Err(format!("invalid ipv4 address: {e}"))? - } - return Ok(()); + // domain if enclosed in brackets, must match an IP address + if domain.starts_with('[') && domain.ends_with(']') { + let s = &domain[1..domain.len() - 1]; + if let Some(s) = s.strip_prefix("IPv6:") { + if let Err(e) = s.parse::() { + Err(format!("invalid ipv6 address: {e}"))? + } + return Ok(()); } - - // domain must match the requirements for a hostname - if let Err(e) = check_hostname(domain) { - Err(format!("invalid domain: {e}"))? + if let Err(e) = s.parse::() { + Err(format!("invalid ipv4 address: {e}"))? } + return Ok(()); + } - Ok(()) + // domain must match the requirements for a hostname + if let Err(e) = check_hostname(domain) { + Err(format!("invalid domain: {e}"))? + } + + Ok(()) } fn validate_idn_email(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; + let Value::String(s) = v else { + return Ok(()); + }; - let Some(at) = s.rfind('@') else { - Err("missing @")? - }; - let (local, domain) = (&s[..at], &s[at + 1..]); + let Some(at) = s.rfind('@') else { + Err("missing @")? + }; + let (local, domain) = (&s[..at], &s[at + 1..]); - let local = idna::domain_to_ascii_strict(local)?; - let domain = idna::domain_to_ascii_strict(domain)?; - if let Err(e) = check_idn_hostname(&domain) { - Err(format!("invalid domain: {e}"))? - } - check_email(&format!("{local}@{domain}")) + let local = idna::domain_to_ascii_strict(local)?; + let domain = idna::domain_to_ascii_strict(domain)?; + if let Err(e) = check_idn_hostname(&domain) { + Err(format!("invalid domain: {e}"))? + } + check_email(&format!("{local}@{domain}")) } fn validate_json_pointer(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - check_json_pointer(s) + let Value::String(s) = v else { + return Ok(()); + }; + check_json_pointer(s) } // see https://www.rfc-editor.org/rfc/rfc6901#section-3 fn check_json_pointer(s: &str) -> Result<(), Box> { - if s.is_empty() { - return Ok(()); - } - if !s.starts_with('/') { - Err("not starting with slash")?; - } - for token in s.split('/').skip(1) { - let mut chars = token.chars(); - while let Some(ch) = chars.next() { - if ch == '~' { - if !matches!(chars.next(), Some('0' | '1')) { - Err("~ must be followed by 0 or 1")?; - } - } else if !matches!(ch, '\x00'..='\x2E' | '\x30'..='\x7D' | '\x7F'..='\u{10FFFF}') { - Err("contains disallowed character")?; - } + if s.is_empty() { + return Ok(()); + } + if !s.starts_with('/') { + Err("not starting with slash")?; + } + for token in s.split('/').skip(1) { + let mut chars = token.chars(); + while let Some(ch) = chars.next() { + if ch == '~' { + if !matches!(chars.next(), Some('0' | '1')) { + Err("~ must be followed by 0 or 1")?; } + } else if !matches!(ch, '\x00'..='\x2E' | '\x30'..='\x7D' | '\x7F'..='\u{10FFFF}') { + Err("contains disallowed character")?; + } } - Ok(()) + } + Ok(()) } // see https://tools.ietf.org/html/draft-handrews-relative-json-pointer-01#section-3 fn validate_relative_json_pointer(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; + let Value::String(s) = v else { + return Ok(()); + }; - // start with non-negative-integer - let num_digits = s.chars().take_while(char::is_ascii_digit).count(); - if num_digits == 0 { - Err("must start with non-negative integer")?; - } - if num_digits > 1 && s.starts_with('0') { - Err("starts with zero")?; - } - let s = &s[num_digits..]; + // start with non-negative-integer + let num_digits = s.chars().take_while(char::is_ascii_digit).count(); + if num_digits == 0 { + Err("must start with non-negative integer")?; + } + if num_digits > 1 && s.starts_with('0') { + Err("starts with zero")?; + } + let s = &s[num_digits..]; - // followed by either json-pointer or '#' - if s == "#" { - return Ok(()); - } - if let Err(e) = check_json_pointer(s) { - Err(format!("invalid json-pointer element: {e}"))?; - } - Ok(()) + // followed by either json-pointer or '#' + if s == "#" { + return Ok(()); + } + if let Err(e) = check_json_pointer(s) { + Err(format!("invalid json-pointer element: {e}"))?; + } + Ok(()) } // see https://datatracker.ietf.org/doc/html/rfc4122#page-4 fn validate_uuid(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; + let Value::String(s) = v else { + return Ok(()); + }; - static HEX_GROUPS: [usize; 5] = [8, 4, 4, 4, 12]; - let mut i = 0; - for group in s.split('-') { - if i >= HEX_GROUPS.len() { - Err("more than 5 elements")?; - } - if group.len() != HEX_GROUPS[i] { - Err(format!( - "element {} must be {} characters long", - i + 1, - HEX_GROUPS[i] - ))?; - } - if let Some(ch) = group.chars().find(|c| !c.is_ascii_hexdigit()) { - Err(format!("non-hex character {ch:?}"))?; - } - i += 1; + static HEX_GROUPS: [usize; 5] = [8, 4, 4, 4, 12]; + let mut i = 0; + for group in s.split('-') { + if i >= HEX_GROUPS.len() { + Err("more than 5 elements")?; } - if i != HEX_GROUPS.len() { - Err("must have 5 elements")?; + if group.len() != HEX_GROUPS[i] { + Err(format!( + "element {} must be {} characters long", + i + 1, + HEX_GROUPS[i] + ))?; } - Ok(()) + if let Some(ch) = group.chars().find(|c| !c.is_ascii_hexdigit()) { + Err(format!("non-hex character {ch:?}"))?; + } + i += 1; + } + if i != HEX_GROUPS.len() { + Err("must have 5 elements")?; + } + Ok(()) } fn validate_uri(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - if fluent_uri::UriRef::parse(s.as_str())?.scheme().is_none() { - Err("relative url")?; - }; - Ok(()) + let Value::String(s) = v else { + return Ok(()); + }; + if fluent_uri::UriRef::parse(s.as_str())?.scheme().is_none() { + Err("relative url")?; + }; + Ok(()) } fn validate_iri(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - match Url::parse(s) { - Ok(_) => Ok(()), - Err(url::ParseError::RelativeUrlWithoutBase) => Err("relative url")?, - Err(e) => Err(e)?, - } + let Value::String(s) = v else { + return Ok(()); + }; + match Url::parse(s) { + Ok(_) => Ok(()), + Err(url::ParseError::RelativeUrlWithoutBase) => Err("relative url")?, + Err(e) => Err(e)?, + } } static TEMP_URL: Lazy = Lazy::new(|| Url::parse("http://temp.com").unwrap()); fn parse_uri_reference(s: &str) -> Result> { - if s.contains('\\') { - Err("contains \\\\")?; - } - Ok(TEMP_URL.join(s)?) + if s.contains('\\') { + Err("contains \\\\")?; + } + Ok(TEMP_URL.join(s)?) } fn validate_uri_reference(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - fluent_uri::UriRef::parse(s.as_str())?; - Ok(()) + let Value::String(s) = v else { + return Ok(()); + }; + fluent_uri::UriRef::parse(s.as_str())?; + Ok(()) } fn validate_iri_reference(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; - parse_uri_reference(s)?; - Ok(()) + let Value::String(s) = v else { + return Ok(()); + }; + parse_uri_reference(s)?; + Ok(()) } fn validate_uri_template(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); - }; + let Value::String(s) = v else { + return Ok(()); + }; - let url = parse_uri_reference(s)?; + let url = parse_uri_reference(s)?; - let path = url.path(); - // path we got has curly bases percent encoded - let path = percent_decode_str(path).decode_utf8()?; + let path = url.path(); + // path we got has curly bases percent encoded + let path = percent_decode_str(path).decode_utf8()?; - // ensure curly brackets are not nested and balanced - for part in path.as_ref().split('/') { - let mut want = true; - for got in part - .chars() - .filter(|c| matches!(c, '{' | '}')) - .map(|c| c == '{') - { - if got != want { - Err("nested curly braces")?; - } - want = !want; - } - if !want { - Err("no matching closing brace")? - } + // ensure curly brackets are not nested and balanced + for part in path.as_ref().split('/') { + let mut want = true; + for got in part + .chars() + .filter(|c| matches!(c, '{' | '}')) + .map(|c| c == '{') + { + if got != want { + Err("nested curly braces")?; + } + want = !want; } - Ok(()) + if !want { + Err("no matching closing brace")? + } + } + Ok(()) } diff --git a/validator/src/lib.rs b/validator/src/lib.rs index a5d68b6..1fc59e9 100644 --- a/validator/src/lib.rs +++ b/validator/src/lib.rs @@ -55,17 +55,17 @@ in tree of errors. `println!("{validation_error}")` prints: ```no_compile jsonschema validation failed with file:///tmp/customer.json# - at '': missing properties 'age' - at '/billing_address': missing properties 'street_address', 'city', 'state' + at '': missing properties 'age' + at '/billing_address': missing properties 'street_address', 'city', 'state' ``` The alternate form `println!("{validation_error:#}")` prints: ```no_compile jsonschema validation failed with file:///tmp/customer.json# - [I#] [S#/required] missing properties 'age' - [I#/billing_address] [S#/properties/billing_address/$ref] validation failed with file:///tmp/address.json# - [I#/billing_address] [S#/required] missing properties 'street_address', 'city', 'state' + [I#] [S#/required] missing properties 'age' + [I#/billing_address] [S#/properties/billing_address/$ref] validation failed with file:///tmp/address.json# + [I#/billing_address] [S#/required] missing properties 'street_address', 'city', 'state' ``` here `I` refers to the instance document and `S` refers to last schema document. @@ -117,14 +117,14 @@ mod validator; #[cfg(not(target_arch = "wasm32"))] pub use loader::FileLoader; pub use { - compiler::{CompileError, Compiler, Draft}, - content::{Decoder, MediaType}, - formats::Format, - loader::{SchemeUrlLoader, UrlLoader}, - output::{ - AbsoluteKeywordLocation, FlagOutput, KeywordPath, OutputError, OutputUnit, SchemaToken, - }, - validator::{InstanceLocation, InstanceToken}, + compiler::{CompileError, Compiler, Draft}, + content::{Decoder, MediaType}, + formats::Format, + loader::{SchemeUrlLoader, UrlLoader}, + output::{ + AbsoluteKeywordLocation, FlagOutput, KeywordPath, OutputError, OutputUnit, SchemaToken, + }, + validator::{InstanceLocation, InstanceToken}, }; use std::{borrow::Cow, collections::HashMap, error::Error, fmt::Display}; @@ -134,6 +134,13 @@ use regex::Regex; use serde_json::{Number, Value}; use util::*; +/// Options for validation process +#[derive(Default, Debug, Clone, Copy)] +pub struct ValidationOptions { + /// treat unevaluated properties as an error + pub be_strict: bool, +} + /// Identifier to compiled schema. #[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct SchemaIndex(usize); @@ -141,226 +148,227 @@ pub struct SchemaIndex(usize); /// Collection of compiled schemas. #[derive(Default)] pub struct Schemas { - list: Vec, - map: HashMap, // loc => schema-index + list: Vec, + map: HashMap, // loc => schema-index } impl Schemas { - pub fn new() -> Self { - Self::default() + pub fn new() -> Self { + Self::default() + } + + fn insert(&mut self, locs: Vec, compiled: Vec) { + for (up, sch) in locs.into_iter().zip(compiled.into_iter()) { + let i = self.list.len(); + self.list.push(sch); + self.map.insert(up, i); } + } - fn insert(&mut self, locs: Vec, compiled: Vec) { - for (up, sch) in locs.into_iter().zip(compiled.into_iter()) { - let i = self.list.len(); - self.list.push(sch); - self.map.insert(up, i); - } - } + fn get(&self, idx: SchemaIndex) -> &Schema { + &self.list[idx.0] // todo: return bug + } - fn get(&self, idx: SchemaIndex) -> &Schema { - &self.list[idx.0] // todo: return bug - } + fn get_by_loc(&self, up: &UrlPtr) -> Option<&Schema> { + self.map.get(up).and_then(|&i| self.list.get(i)) + } - fn get_by_loc(&self, up: &UrlPtr) -> Option<&Schema> { - self.map.get(up).and_then(|&i| self.list.get(i)) - } + /// Returns true if `sch_index` is generated for this instance. + pub fn contains(&self, sch_index: SchemaIndex) -> bool { + self.list.get(sch_index.0).is_some() + } - /// Returns true if `sch_index` is generated for this instance. - pub fn contains(&self, sch_index: SchemaIndex) -> bool { - self.list.get(sch_index.0).is_some() - } + pub fn size(&self) -> usize { + self.list.len() + } - pub fn size(&self) -> usize { - self.list.len() - } + /** + Validates `v` with schema identified by `sch_index` - /** - Validates `v` with schema identified by `sch_index` + # Panics - # Panics - - Panics if `sch_index` is not generated for this instance. - [`Schemas::contains`] can be used too ensure that it does not panic. - */ - pub fn validate<'s, 'v>( - &'s self, - v: &'v Value, - sch_index: SchemaIndex, - ) -> Result<(), ValidationError<'s, 'v>> { - let Some(sch) = self.list.get(sch_index.0) else { - panic!("Schemas::validate: schema index out of bounds"); - }; - validator::validate(v, sch, self) - } + Panics if `sch_index` is not generated for this instance. + [`Schemas::contains`] can be used too ensure that it does not panic. + */ + pub fn validate<'s, 'v>( + &'s self, + v: &'v Value, + sch_index: SchemaIndex, + options: Option<&'s ValidationOptions>, + ) -> Result<(), ValidationError<'s, 'v>> { + let Some(sch) = self.list.get(sch_index.0) else { + panic!("Schemas::validate: schema index out of bounds"); + }; + validator::validate(v, sch, self, options) + } } #[derive(Default)] struct Schema { - draft_version: usize, - idx: SchemaIndex, - loc: String, - resource: SchemaIndex, - dynamic_anchors: HashMap, - all_props_evaluated: bool, - all_items_evaluated: bool, - num_items_evaluated: usize, + draft_version: usize, + idx: SchemaIndex, + loc: String, + resource: SchemaIndex, + dynamic_anchors: HashMap, + all_props_evaluated: bool, + all_items_evaluated: bool, + num_items_evaluated: usize, - // type agnostic -- - boolean: Option, // boolean schema - ref_: Option, - recursive_ref: Option, - recursive_anchor: bool, - dynamic_ref: Option, - dynamic_anchor: Option, - types: Types, - enum_: Option, - constant: Option, - not: Option, - all_of: Vec, - any_of: Vec, - one_of: Vec, - if_: Option, - then: Option, - else_: Option, - format: Option, + // type agnostic -- + boolean: Option, // boolean schema + ref_: Option, + recursive_ref: Option, + recursive_anchor: bool, + dynamic_ref: Option, + dynamic_anchor: Option, + types: Types, + enum_: Option, + constant: Option, + not: Option, + all_of: Vec, + any_of: Vec, + one_of: Vec, + if_: Option, + then: Option, + else_: Option, + format: Option, - // object -- - min_properties: Option, - max_properties: Option, - required: Vec, - properties: AHashMap, - pattern_properties: Vec<(Regex, SchemaIndex)>, - property_names: Option, - additional_properties: Option, - dependent_required: Vec<(String, Vec)>, - dependent_schemas: Vec<(String, SchemaIndex)>, - dependencies: Vec<(String, Dependency)>, - unevaluated_properties: Option, + // object -- + min_properties: Option, + max_properties: Option, + required: Vec, + properties: AHashMap, + pattern_properties: Vec<(Regex, SchemaIndex)>, + property_names: Option, + additional_properties: Option, + dependent_required: Vec<(String, Vec)>, + dependent_schemas: Vec<(String, SchemaIndex)>, + dependencies: Vec<(String, Dependency)>, + unevaluated_properties: Option, - // array -- - min_items: Option, - max_items: Option, - unique_items: bool, - min_contains: Option, - max_contains: Option, - contains: Option, - items: Option, - additional_items: Option, - prefix_items: Vec, - items2020: Option, - unevaluated_items: Option, + // array -- + min_items: Option, + max_items: Option, + unique_items: bool, + min_contains: Option, + max_contains: Option, + contains: Option, + items: Option, + additional_items: Option, + prefix_items: Vec, + items2020: Option, + unevaluated_items: Option, - // string -- - min_length: Option, - max_length: Option, - pattern: Option, - content_encoding: Option, - content_media_type: Option, - content_schema: Option, + // string -- + min_length: Option, + max_length: Option, + pattern: Option, + content_encoding: Option, + content_media_type: Option, + content_schema: Option, - // number -- - minimum: Option, - maximum: Option, - exclusive_minimum: Option, - exclusive_maximum: Option, - multiple_of: Option, + // number -- + minimum: Option, + maximum: Option, + exclusive_minimum: Option, + exclusive_maximum: Option, + multiple_of: Option, } #[derive(Debug)] struct Enum { - /// types that occur in enum - types: Types, - /// values in enum - values: Vec, + /// types that occur in enum + types: Types, + /// values in enum + values: Vec, } #[derive(Debug)] enum Items { - SchemaRef(SchemaIndex), - SchemaRefs(Vec), + SchemaRef(SchemaIndex), + SchemaRefs(Vec), } #[derive(Debug)] enum Additional { - Bool(bool), - SchemaRef(SchemaIndex), + Bool(bool), + SchemaRef(SchemaIndex), } #[derive(Debug)] enum Dependency { - Props(Vec), - SchemaRef(SchemaIndex), + Props(Vec), + SchemaRef(SchemaIndex), } struct DynamicRef { - sch: SchemaIndex, - anchor: Option, + sch: SchemaIndex, + anchor: Option, } impl Schema { - fn new(loc: String) -> Self { - Self { - loc, - ..Default::default() - } + fn new(loc: String) -> Self { + Self { + loc, + ..Default::default() } + } } /// JSON data types for JSONSchema #[derive(Debug, PartialEq, Clone, Copy)] pub enum Type { - Null = 1, - Boolean = 2, - Number = 4, - Integer = 8, - String = 16, - Array = 32, - Object = 64, + Null = 1, + Boolean = 2, + Number = 4, + Integer = 8, + String = 16, + Array = 32, + Object = 64, } impl Type { - fn of(v: &Value) -> Self { - match v { - Value::Null => Type::Null, - Value::Bool(_) => Type::Boolean, - Value::Number(_) => Type::Number, - Value::String(_) => Type::String, - Value::Array(_) => Type::Array, - Value::Object(_) => Type::Object, - } + fn of(v: &Value) -> Self { + match v { + Value::Null => Type::Null, + Value::Bool(_) => Type::Boolean, + Value::Number(_) => Type::Number, + Value::String(_) => Type::String, + Value::Array(_) => Type::Array, + Value::Object(_) => Type::Object, } + } - fn from_str(value: &str) -> Option { - match value { - "null" => Some(Self::Null), - "boolean" => Some(Self::Boolean), - "number" => Some(Self::Number), - "integer" => Some(Self::Integer), - "string" => Some(Self::String), - "array" => Some(Self::Array), - "object" => Some(Self::Object), - _ => None, - } + fn from_str(value: &str) -> Option { + match value { + "null" => Some(Self::Null), + "boolean" => Some(Self::Boolean), + "number" => Some(Self::Number), + "integer" => Some(Self::Integer), + "string" => Some(Self::String), + "array" => Some(Self::Array), + "object" => Some(Self::Object), + _ => None, } + } - fn primitive(v: &Value) -> bool { - !matches!(Self::of(v), Self::Array | Self::Object) - } + fn primitive(v: &Value) -> bool { + !matches!(Self::of(v), Self::Array | Self::Object) + } } impl Display for Type { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Type::Null => write!(f, "null"), - Type::Boolean => write!(f, "boolean"), - Type::Number => write!(f, "number"), - Type::Integer => write!(f, "integer"), - Type::String => write!(f, "string"), - Type::Array => write!(f, "array"), - Type::Object => write!(f, "object"), - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Type::Null => write!(f, "null"), + Type::Boolean => write!(f, "boolean"), + Type::Number => write!(f, "number"), + Type::Integer => write!(f, "integer"), + Type::String => write!(f, "string"), + Type::Array => write!(f, "array"), + Type::Object => write!(f, "object"), } + } } /// Set of [`Type`]s @@ -368,55 +376,55 @@ impl Display for Type { pub struct Types(u8); impl Types { - fn is_empty(self) -> bool { - self.0 == 0 - } + fn is_empty(self) -> bool { + self.0 == 0 + } - fn add(&mut self, t: Type) { - self.0 |= t as u8; - } + fn add(&mut self, t: Type) { + self.0 |= t as u8; + } - /// Returns `true` if this set contains given type. - pub fn contains(&self, t: Type) -> bool { - self.0 & t as u8 != 0 - } + /// Returns `true` if this set contains given type. + pub fn contains(&self, t: Type) -> bool { + self.0 & t as u8 != 0 + } - /// Returns an iterator over types. - pub fn iter(&self) -> impl Iterator + '_ { - static TYPES: [Type; 7] = [ - Type::Null, - Type::Boolean, - Type::Number, - Type::Integer, - Type::String, - Type::Array, - Type::Object, - ]; - TYPES.iter().cloned().filter(|t| self.contains(*t)) - } + /// Returns an iterator over types. + pub fn iter(&self) -> impl Iterator + '_ { + static TYPES: [Type; 7] = [ + Type::Null, + Type::Boolean, + Type::Number, + Type::Integer, + Type::String, + Type::Array, + Type::Object, + ]; + TYPES.iter().cloned().filter(|t| self.contains(*t)) + } } impl FromIterator for Types { - fn from_iter>(iter: T) -> Self { - let mut types = Types::default(); - for t in iter { - types.add(t); - } - types + fn from_iter>(iter: T) -> Self { + let mut types = Types::default(); + for t in iter { + types.add(t); } + types + } } /// Error type for validation failures. #[derive(Debug)] pub struct ValidationError<'s, 'v> { - /// The absolute, dereferenced schema location. - pub schema_url: &'s str, - /// The location of the JSON value within the instance being validated - pub instance_location: InstanceLocation<'v>, - /// kind of error - pub kind: ErrorKind<'s, 'v>, - /// Holds nested errors - pub causes: Vec>, + /// The absolute, dereferenced schema location. + pub schema_url: &'s str, + /// The location of the JSON value within the instance being validated + pub instance_location: InstanceLocation<'v>, + /// kind of error + pub kind: ErrorKind<'s, 'v>, + /// Holds nested errors + pub causes: Vec>, } impl Error for ValidationError<'_, '_> {} @@ -424,293 +432,293 @@ impl Error for ValidationError<'_, '_> {} /// A list specifying general categories of validation errors. #[derive(Debug)] pub enum ErrorKind<'s, 'v> { - Group, - Schema { - url: &'s str, - }, - ContentSchema, - PropertyName { - prop: String, - }, - Reference { - kw: &'static str, - url: &'s str, - }, - RefCycle { - url: &'s str, - kw_loc1: String, - kw_loc2: String, - }, - FalseSchema, - Type { - got: Type, - want: Types, - }, - Enum { - want: &'s Vec, - }, - Const { - want: &'s Value, - }, - Format { - got: Cow<'v, Value>, - want: &'static str, - err: Box, - }, - MinProperties { - got: usize, - want: usize, - }, - MaxProperties { - got: usize, - want: usize, - }, - AdditionalProperties { - got: Vec>, - }, - Required { - want: Vec<&'s str>, - }, - Dependency { - /// dependency of prop that failed. - prop: &'s str, - /// missing props. - missing: Vec<&'s str>, - }, - DependentRequired { - /// dependency of prop that failed. - prop: &'s str, - /// missing props. - missing: Vec<&'s str>, - }, - MinItems { - got: usize, - want: usize, - }, - MaxItems { - got: usize, - want: usize, - }, - Contains, - MinContains { - got: Vec, - want: usize, - }, - MaxContains { - got: Vec, - want: usize, - }, - UniqueItems { - got: [usize; 2], - }, - AdditionalItems { - got: usize, - }, - MinLength { - got: usize, - want: usize, - }, - MaxLength { - got: usize, - want: usize, - }, - Pattern { - got: Cow<'v, str>, - want: &'s str, - }, - ContentEncoding { - want: &'static str, - err: Box, - }, - ContentMediaType { - got: Vec, - want: &'static str, - err: Box, - }, - Minimum { - got: Cow<'v, Number>, - want: &'s Number, - }, - Maximum { - got: Cow<'v, Number>, - want: &'s Number, - }, - ExclusiveMinimum { - got: Cow<'v, Number>, - want: &'s Number, - }, - ExclusiveMaximum { - got: Cow<'v, Number>, - want: &'s Number, - }, - MultipleOf { - got: Cow<'v, Number>, - want: &'s Number, - }, - Not, - /// none of the subschemas matched - AllOf, - /// none of the subschemas matched. - AnyOf, - /// - `None`: none of the schemas matched. - /// - Some(i, j): subschemas at i, j matched - OneOf(Option<(usize, usize)>), + Group, + Schema { + url: &'s str, + }, + ContentSchema, + PropertyName { + prop: String, + }, + Reference { + kw: &'static str, + url: &'s str, + }, + RefCycle { + url: &'s str, + kw_loc1: String, + kw_loc2: String, + }, + FalseSchema, + Type { + got: Type, + want: Types, + }, + Enum { + want: &'s Vec, + }, + Const { + want: &'s Value, + }, + Format { + got: Cow<'v, Value>, + want: &'static str, + err: Box, + }, + MinProperties { + got: usize, + want: usize, + }, + MaxProperties { + got: usize, + want: usize, + }, + AdditionalProperties { + got: Vec>, + }, + Required { + want: Vec<&'s str>, + }, + Dependency { + /// dependency of prop that failed. + prop: &'s str, + /// missing props. + missing: Vec<&'s str>, + }, + DependentRequired { + /// dependency of prop that failed. + prop: &'s str, + /// missing props. + missing: Vec<&'s str>, + }, + MinItems { + got: usize, + want: usize, + }, + MaxItems { + got: usize, + want: usize, + }, + Contains, + MinContains { + got: Vec, + want: usize, + }, + MaxContains { + got: Vec, + want: usize, + }, + UniqueItems { + got: [usize; 2], + }, + AdditionalItems { + got: usize, + }, + MinLength { + got: usize, + want: usize, + }, + MaxLength { + got: usize, + want: usize, + }, + Pattern { + got: Cow<'v, str>, + want: &'s str, + }, + ContentEncoding { + want: &'static str, + err: Box, + }, + ContentMediaType { + got: Vec, + want: &'static str, + err: Box, + }, + Minimum { + got: Cow<'v, Number>, + want: &'s Number, + }, + Maximum { + got: Cow<'v, Number>, + want: &'s Number, + }, + ExclusiveMinimum { + got: Cow<'v, Number>, + want: &'s Number, + }, + ExclusiveMaximum { + got: Cow<'v, Number>, + want: &'s Number, + }, + MultipleOf { + got: Cow<'v, Number>, + want: &'s Number, + }, + Not, + /// none of the subschemas matched + AllOf, + /// none of the subschemas matched. + AnyOf, + /// - `None`: none of the schemas matched. + /// - Some(i, j): subschemas at i, j matched + OneOf(Option<(usize, usize)>), } impl Display for ErrorKind<'_, '_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Group => write!(f, "validation failed"), - Self::Schema { url } => write!(f, "validation failed with {url}"), - Self::ContentSchema => write!(f, "contentSchema failed"), - Self::PropertyName { prop } => write!(f, "invalid property {}", quote(prop)), - Self::Reference { .. } => { - write!(f, "validation failed") - } - Self::RefCycle { - url, - kw_loc1, - kw_loc2, - } => write!( - f, - "both {} and {} resolve to {url} causing reference cycle", - quote(&kw_loc1.to_string()), - quote(&kw_loc2.to_string()) - ), - Self::FalseSchema => write!(f, "false schema"), - Self::Type { got, want } => { - // todo: why join not working for Type struct ?? - let want = join_iter(want.iter(), " or "); - write!(f, "want {want}, but got {got}",) - } - Self::Enum { want } => { - if want.iter().all(Type::primitive) { - if want.len() == 1 { - write!(f, "value must be ")?; - display(f, &want[0]) - } else { - let want = join_iter(want.iter().map(string), ", "); - write!(f, "value must be one of {want}") - } - } else { - write!(f, "enum failed") - } - } - Self::Const { want } => { - if Type::primitive(want) { - write!(f, "value must be ")?; - display(f, want) - } else { - write!(f, "const failed") - } - } - Self::Format { got, want, err } => { - display(f, got)?; - write!(f, " is not valid {want}: {err}") - } - Self::MinProperties { got, want } => write!( - f, - "minimum {want} properties required, but got {got} properties" - ), - Self::MaxProperties { got, want } => write!( - f, - "maximum {want} properties required, but got {got} properties" - ), - Self::AdditionalProperties { got } => { - write!( - f, - "additionalProperties {} not allowed", - join_iter(got.iter().map(quote), ", ") - ) - } - Self::Required { want } => write!( - f, - "missing properties {}", - join_iter(want.iter().map(quote), ", ") - ), - Self::Dependency { prop, missing } => { - write!( - f, - "properties {} required, if {} property exists", - join_iter(missing.iter().map(quote), ", "), - quote(prop) - ) - } - Self::DependentRequired { prop, missing } => write!( - f, - "properties {} required, if {} property exists", - join_iter(missing.iter().map(quote), ", "), - quote(prop) - ), - Self::MinItems { got, want } => { - write!(f, "minimum {want} items required, but got {got} items") - } - Self::MaxItems { got, want } => { - write!(f, "maximum {want} items required, but got {got} items") - } - Self::MinContains { got, want } => { - if got.is_empty() { - write!( - f, - "minimum {want} items required to match contains schema, but found none", - ) - } else { - write!( - f, - "minimum {want} items required to match contains schema, but found {} items at {}", - got.len(), - join_iter(got, ", ") - ) - } - } - Self::Contains => write!(f, "no items match contains schema"), - Self::MaxContains { got, want } => { - write!( - f, - "maximum {want} items required to match contains schema, but found {} items at {}", - got.len(), - join_iter(got, ", ") - ) - } - Self::UniqueItems { got: [i, j] } => write!(f, "items at {i} and {j} are equal"), - Self::AdditionalItems { got } => write!(f, "last {got} additionalItems not allowed"), - Self::MinLength { got, want } => write!(f, "length must be >={want}, but got {got}"), - Self::MaxLength { got, want } => write!(f, "length must be <={want}, but got {got}"), - Self::Pattern { got, want } => { - write!(f, "{} does not match pattern {}", quote(got), quote(want)) - } - Self::ContentEncoding { want, err } => { - write!(f, "value is not {} encoded: {err}", quote(want)) - } - Self::ContentMediaType { want, err, .. } => { - write!(f, "value is not of mediatype {}: {err}", quote(want)) - } - Self::Minimum { got, want } => write!(f, "must be >={want}, but got {got}"), - Self::Maximum { got, want } => write!(f, "must be <={want}, but got {got}"), - Self::ExclusiveMinimum { got, want } => write!(f, "must be > {want} but got {got}"), - Self::ExclusiveMaximum { got, want } => write!(f, "must be < {want} but got {got}"), - Self::MultipleOf { got, want } => write!(f, "{got} is not multipleOf {want}"), - Self::Not => write!(f, "not failed"), - Self::AllOf => write!(f, "allOf failed",), - Self::AnyOf => write!(f, "anyOf failed"), - Self::OneOf(None) => write!(f, "oneOf failed, none matched"), - Self::OneOf(Some((i, j))) => write!(f, "oneOf failed, subschemas {i}, {j} matched"), + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Group => write!(f, "validation failed"), + Self::Schema { url } => write!(f, "validation failed with {url}"), + Self::ContentSchema => write!(f, "contentSchema failed"), + Self::PropertyName { prop } => write!(f, "invalid property {}", quote(prop)), + Self::Reference { .. } => { + write!(f, "validation failed") + } + Self::RefCycle { + url, + kw_loc1, + kw_loc2, + } => write!( + f, + "both {} and {} resolve to {url} causing reference cycle", + quote(&kw_loc1.to_string()), + quote(&kw_loc2.to_string()) + ), + Self::FalseSchema => write!(f, "false schema"), + Self::Type { got, want } => { + // todo: why join not working for Type struct ?? + let want = join_iter(want.iter(), " or "); + write!(f, "want {want}, but got {got}",) + } + Self::Enum { want } => { + if want.iter().all(Type::primitive) { + if want.len() == 1 { + write!(f, "value must be ")?; + display(f, &want[0]) + } else { + let want = join_iter(want.iter().map(string), ", "); + write!(f, "value must be one of {want}") + } + } else { + write!(f, "enum failed") } + } + Self::Const { want } => { + if Type::primitive(want) { + write!(f, "value must be ")?; + display(f, want) + } else { + write!(f, "const failed") + } + } + Self::Format { got, want, err } => { + display(f, got)?; + write!(f, " is not valid {want}: {err}") + } + Self::MinProperties { got, want } => write!( + f, + "minimum {want} properties required, but got {got} properties" + ), + Self::MaxProperties { got, want } => write!( + f, + "maximum {want} properties required, but got {got} properties" + ), + Self::AdditionalProperties { got } => { + write!( + f, + "additionalProperties {} not allowed", + join_iter(got.iter().map(quote), ", ") + ) + } + Self::Required { want } => write!( + f, + "missing properties {}", + join_iter(want.iter().map(quote), ", ") + ), + Self::Dependency { prop, missing } => { + write!( + f, + "properties {} required, if {} property exists", + join_iter(missing.iter().map(quote), ", "), + quote(prop) + ) + } + Self::DependentRequired { prop, missing } => write!( + f, + "properties {} required, if {} property exists", + join_iter(missing.iter().map(quote), ", "), + quote(prop) + ), + Self::MinItems { got, want } => { + write!(f, "minimum {want} items required, but got {got} items") + } + Self::MaxItems { got, want } => { + write!(f, "maximum {want} items required, but got {got} items") + } + Self::MinContains { got, want } => { + if got.is_empty() { + write!( + f, + "minimum {want} items required to match contains schema, but found none", + ) + } else { + write!( + f, + "minimum {want} items required to match contains schema, but found {} items at {}", + got.len(), + join_iter(got, ", ") + ) + } + } + Self::Contains => write!(f, "no items match contains schema"), + Self::MaxContains { got, want } => { + write!( + f, + "maximum {want} items required to match contains schema, but found {} items at {}", + got.len(), + join_iter(got, ", ") + ) + } + Self::UniqueItems { got: [i, j] } => write!(f, "items at {i} and {j} are equal"), + Self::AdditionalItems { got } => write!(f, "last {got} additionalItems not allowed"), + Self::MinLength { got, want } => write!(f, "length must be >={want}, but got {got}"), + Self::MaxLength { got, want } => write!(f, "length must be <={want}, but got {got}"), + Self::Pattern { got, want } => { + write!(f, "{} does not match pattern {}", quote(got), quote(want)) + } + Self::ContentEncoding { want, err } => { + write!(f, "value is not {} encoded: {err}", quote(want)) + } + Self::ContentMediaType { want, err, .. } => { + write!(f, "value is not of mediatype {}: {err}", quote(want)) + } + Self::Minimum { got, want } => write!(f, "must be >={want}, but got {got}"), + Self::Maximum { got, want } => write!(f, "must be <={want}, but got {got}"), + Self::ExclusiveMinimum { got, want } => write!(f, "must be > {want} but got {got}"), + Self::ExclusiveMaximum { got, want } => write!(f, "must be < {want} but got {got}"), + Self::MultipleOf { got, want } => write!(f, "{got} is not multipleOf {want}"), + Self::Not => write!(f, "not failed"), + Self::AllOf => write!(f, "allOf failed",), + Self::AnyOf => write!(f, "anyOf failed"), + Self::OneOf(None) => write!(f, "oneOf failed, none matched"), + Self::OneOf(Some((i, j))) => write!(f, "oneOf failed, subschemas {i}, {j} matched"), } + } } fn display(f: &mut std::fmt::Formatter, v: &Value) -> std::fmt::Result { - match v { - Value::String(s) => write!(f, "{}", quote(s)), - Value::Array(_) | Value::Object(_) => write!(f, "value"), - _ => write!(f, "{v}"), - } + match v { + Value::String(s) => write!(f, "{}", quote(s)), + Value::Array(_) | Value::Object(_) => write!(f, "value"), + _ => write!(f, "{v}"), + } } fn string(primitive: &Value) -> String { - if let Value::String(s) = primitive { - quote(s) - } else { - format!("{primitive}") - } + if let Value::String(s) = primitive { + quote(s) + } else { + format!("{primitive}") + } } diff --git a/validator/src/loader.rs b/validator/src/loader.rs index 8871337..53b3b8b 100644 --- a/validator/src/loader.rs +++ b/validator/src/loader.rs @@ -1,7 +1,7 @@ use std::{ - cell::RefCell, - collections::{HashMap, HashSet}, - error::Error, + cell::RefCell, + collections::{HashMap, HashSet}, + error::Error, }; #[cfg(not(target_arch = "wasm32"))] @@ -13,16 +13,16 @@ use serde_json::Value; use url::Url; use crate::{ - compiler::CompileError, - draft::{latest, Draft}, - util::split, - UrlPtr, + compiler::CompileError, + draft::{latest, Draft}, + util::split, + UrlPtr, }; /// A trait for loading json from given `url` pub trait UrlLoader { - /// Loads json from given absolute `url`. - fn load(&self, url: &str) -> Result>; + /// Loads json from given absolute `url`. + fn load(&self, url: &str) -> Result>; } // -- @@ -32,212 +32,212 @@ pub struct FileLoader; #[cfg(not(target_arch = "wasm32"))] impl UrlLoader for FileLoader { - fn load(&self, url: &str) -> Result> { - let url = Url::parse(url)?; - let path = url.to_file_path().map_err(|_| "invalid file path")?; - let file = File::open(path)?; - Ok(serde_json::from_reader(file)?) - } + fn load(&self, url: &str) -> Result> { + let url = Url::parse(url)?; + let path = url.to_file_path().map_err(|_| "invalid file path")?; + let file = File::open(path)?; + Ok(serde_json::from_reader(file)?) + } } // -- #[derive(Default)] pub struct SchemeUrlLoader { - loaders: HashMap<&'static str, Box>, + loaders: HashMap<&'static str, Box>, } impl SchemeUrlLoader { - pub fn new() -> Self { - Self::default() - } + pub fn new() -> Self { + Self::default() + } - /// Registers [`UrlLoader`] for given url `scheme` - pub fn register(&mut self, scheme: &'static str, url_loader: Box) { - self.loaders.insert(scheme, url_loader); - } + /// Registers [`UrlLoader`] for given url `scheme` + pub fn register(&mut self, scheme: &'static str, url_loader: Box) { + self.loaders.insert(scheme, url_loader); + } } impl UrlLoader for SchemeUrlLoader { - fn load(&self, url: &str) -> Result> { - let url = Url::parse(url)?; - let Some(loader) = self.loaders.get(url.scheme()) else { - return Err(CompileError::UnsupportedUrlScheme { - url: url.as_str().to_owned(), - } - .into()); - }; - loader.load(url.as_str()) - } + fn load(&self, url: &str) -> Result> { + let url = Url::parse(url)?; + let Some(loader) = self.loaders.get(url.scheme()) else { + return Err(CompileError::UnsupportedUrlScheme { + url: url.as_str().to_owned(), + } + .into()); + }; + loader.load(url.as_str()) + } } // -- pub(crate) struct DefaultUrlLoader { - doc_map: RefCell>, - doc_list: AppendList, - loader: Box, + doc_map: RefCell>, + doc_list: AppendList, + loader: Box, } impl DefaultUrlLoader { - #[cfg_attr(target_arch = "wasm32", allow(unused_mut))] - pub fn new() -> Self { - let mut loader = SchemeUrlLoader::new(); - #[cfg(not(target_arch = "wasm32"))] - loader.register("file", Box::new(FileLoader)); - Self { - doc_map: Default::default(), - doc_list: AppendList::new(), - loader: Box::new(loader), - } + #[cfg_attr(target_arch = "wasm32", allow(unused_mut))] + pub fn new() -> Self { + let mut loader = SchemeUrlLoader::new(); + #[cfg(not(target_arch = "wasm32"))] + loader.register("file", Box::new(FileLoader)); + Self { + doc_map: Default::default(), + doc_list: AppendList::new(), + loader: Box::new(loader), + } + } + + pub fn get_doc(&self, url: &Url) -> Option<&Value> { + self.doc_map + .borrow() + .get(url) + .and_then(|i| self.doc_list.get(*i)) + } + + pub fn add_doc(&self, url: Url, json: Value) { + if self.get_doc(&url).is_some() { + return; + } + self.doc_list.push(json); + self.doc_map + .borrow_mut() + .insert(url, self.doc_list.len() - 1); + } + + pub fn use_loader(&mut self, loader: Box) { + self.loader = loader; + } + + pub(crate) fn load(&self, url: &Url) -> Result<&Value, CompileError> { + if let Some(doc) = self.get_doc(url) { + return Ok(doc); } - pub fn get_doc(&self, url: &Url) -> Option<&Value> { - self.doc_map - .borrow() - .get(url) - .and_then(|i| self.doc_list.get(*i)) + // check in STD_METAFILES + let doc = if let Some(content) = load_std_meta(url.as_str()) { + serde_json::from_str::(content).map_err(|e| CompileError::LoadUrlError { + url: url.to_string(), + src: e.into(), + })? + } else { + self.loader + .load(url.as_str()) + .map_err(|src| CompileError::LoadUrlError { + url: url.as_str().to_owned(), + src, + })? + }; + self.add_doc(url.clone(), doc); + self.get_doc(url) + .ok_or(CompileError::Bug("doc must exist".into())) + } + + pub(crate) fn get_draft( + &self, + up: &UrlPtr, + doc: &Value, + default_draft: &'static Draft, + mut cycle: HashSet, + ) -> Result<&'static Draft, CompileError> { + let Value::Object(obj) = &doc else { + return Ok(default_draft); + }; + let Some(Value::String(sch)) = obj.get("$schema") else { + return Ok(default_draft); + }; + if let Some(draft) = Draft::from_url(sch) { + return Ok(draft); + } + let (sch, _) = split(sch); + let sch = Url::parse(sch).map_err(|e| CompileError::InvalidMetaSchemaUrl { + url: up.to_string(), + src: e.into(), + })?; + if up.ptr.is_empty() && sch == up.url { + return Err(CompileError::UnsupportedDraft { url: sch.into() }); + } + if !cycle.insert(sch.clone()) { + return Err(CompileError::MetaSchemaCycle { url: sch.into() }); } - pub fn add_doc(&self, url: Url, json: Value) { - if self.get_doc(&url).is_some() { - return; - } - self.doc_list.push(json); - self.doc_map - .borrow_mut() - .insert(url, self.doc_list.len() - 1); - } - - pub fn use_loader(&mut self, loader: Box) { - self.loader = loader; - } - - pub(crate) fn load(&self, url: &Url) -> Result<&Value, CompileError> { - if let Some(doc) = self.get_doc(url) { - return Ok(doc); - } - - // check in STD_METAFILES - let doc = if let Some(content) = load_std_meta(url.as_str()) { - serde_json::from_str::(content).map_err(|e| CompileError::LoadUrlError { - url: url.to_string(), - src: e.into(), - })? - } else { - self.loader - .load(url.as_str()) - .map_err(|src| CompileError::LoadUrlError { - url: url.as_str().to_owned(), - src, - })? - }; - self.add_doc(url.clone(), doc); - self.get_doc(url) - .ok_or(CompileError::Bug("doc must exist".into())) - } - - pub(crate) fn get_draft( - &self, - up: &UrlPtr, - doc: &Value, - default_draft: &'static Draft, - mut cycle: HashSet, - ) -> Result<&'static Draft, CompileError> { - let Value::Object(obj) = &doc else { - return Ok(default_draft); - }; - let Some(Value::String(sch)) = obj.get("$schema") else { - return Ok(default_draft); - }; - if let Some(draft) = Draft::from_url(sch) { - return Ok(draft); - } - let (sch, _) = split(sch); - let sch = Url::parse(sch).map_err(|e| CompileError::InvalidMetaSchemaUrl { - url: up.to_string(), - src: e.into(), - })?; - if up.ptr.is_empty() && sch == up.url { - return Err(CompileError::UnsupportedDraft { url: sch.into() }); - } - if !cycle.insert(sch.clone()) { - return Err(CompileError::MetaSchemaCycle { url: sch.into() }); - } - - let doc = self.load(&sch)?; - let up = UrlPtr { - url: sch, - ptr: "".into(), - }; - self.get_draft(&up, doc, default_draft, cycle) - } - - pub(crate) fn get_meta_vocabs( - &self, - doc: &Value, - draft: &'static Draft, - ) -> Result>, CompileError> { - let Value::Object(obj) = &doc else { - return Ok(None); - }; - let Some(Value::String(sch)) = obj.get("$schema") else { - return Ok(None); - }; - if Draft::from_url(sch).is_some() { - return Ok(None); - } - let (sch, _) = split(sch); - let sch = Url::parse(sch).map_err(|e| CompileError::ParseUrlError { - url: sch.to_string(), - src: e.into(), - })?; - let doc = self.load(&sch)?; - draft.get_vocabs(&sch, doc) + let doc = self.load(&sch)?; + let up = UrlPtr { + url: sch, + ptr: "".into(), + }; + self.get_draft(&up, doc, default_draft, cycle) + } + + pub(crate) fn get_meta_vocabs( + &self, + doc: &Value, + draft: &'static Draft, + ) -> Result>, CompileError> { + let Value::Object(obj) = &doc else { + return Ok(None); + }; + let Some(Value::String(sch)) = obj.get("$schema") else { + return Ok(None); + }; + if Draft::from_url(sch).is_some() { + return Ok(None); } + let (sch, _) = split(sch); + let sch = Url::parse(sch).map_err(|e| CompileError::ParseUrlError { + url: sch.to_string(), + src: e.into(), + })?; + let doc = self.load(&sch)?; + draft.get_vocabs(&sch, doc) + } } pub(crate) static STD_METAFILES: Lazy> = Lazy::new(|| { - let mut files = HashMap::new(); - macro_rules! add { - ($path:expr) => { - files.insert( - $path["metaschemas/".len()..].to_owned(), - include_str!($path), - ); - }; - } - add!("metaschemas/draft-04/schema"); - add!("metaschemas/draft-06/schema"); - add!("metaschemas/draft-07/schema"); - add!("metaschemas/draft/2019-09/schema"); - add!("metaschemas/draft/2019-09/meta/core"); - add!("metaschemas/draft/2019-09/meta/applicator"); - add!("metaschemas/draft/2019-09/meta/validation"); - add!("metaschemas/draft/2019-09/meta/meta-data"); - add!("metaschemas/draft/2019-09/meta/format"); - add!("metaschemas/draft/2019-09/meta/content"); - add!("metaschemas/draft/2020-12/schema"); - add!("metaschemas/draft/2020-12/meta/core"); - add!("metaschemas/draft/2020-12/meta/applicator"); - add!("metaschemas/draft/2020-12/meta/unevaluated"); - add!("metaschemas/draft/2020-12/meta/validation"); - add!("metaschemas/draft/2020-12/meta/meta-data"); - add!("metaschemas/draft/2020-12/meta/content"); - add!("metaschemas/draft/2020-12/meta/format-annotation"); - add!("metaschemas/draft/2020-12/meta/format-assertion"); - files + let mut files = HashMap::new(); + macro_rules! add { + ($path:expr) => { + files.insert( + $path["metaschemas/".len()..].to_owned(), + include_str!($path), + ); + }; + } + add!("metaschemas/draft-04/schema"); + add!("metaschemas/draft-06/schema"); + add!("metaschemas/draft-07/schema"); + add!("metaschemas/draft/2019-09/schema"); + add!("metaschemas/draft/2019-09/meta/core"); + add!("metaschemas/draft/2019-09/meta/applicator"); + add!("metaschemas/draft/2019-09/meta/validation"); + add!("metaschemas/draft/2019-09/meta/meta-data"); + add!("metaschemas/draft/2019-09/meta/format"); + add!("metaschemas/draft/2019-09/meta/content"); + add!("metaschemas/draft/2020-12/schema"); + add!("metaschemas/draft/2020-12/meta/core"); + add!("metaschemas/draft/2020-12/meta/applicator"); + add!("metaschemas/draft/2020-12/meta/unevaluated"); + add!("metaschemas/draft/2020-12/meta/validation"); + add!("metaschemas/draft/2020-12/meta/meta-data"); + add!("metaschemas/draft/2020-12/meta/content"); + add!("metaschemas/draft/2020-12/meta/format-annotation"); + add!("metaschemas/draft/2020-12/meta/format-assertion"); + files }); fn load_std_meta(url: &str) -> Option<&'static str> { - let meta = url - .strip_prefix("http://json-schema.org/") - .or_else(|| url.strip_prefix("https://json-schema.org/")); - if let Some(meta) = meta { - if meta == "schema" { - return load_std_meta(latest().url); - } - return STD_METAFILES.get(meta).cloned(); + let meta = url + .strip_prefix("http://json-schema.org/") + .or_else(|| url.strip_prefix("https://json-schema.org/")); + if let Some(meta) = meta { + if meta == "schema" { + return load_std_meta(latest().url); } - None + return STD_METAFILES.get(meta).cloned(); + } + None } diff --git a/validator/src/output.rs b/validator/src/output.rs index 8da46f8..4665786 100644 --- a/validator/src/output.rs +++ b/validator/src/output.rs @@ -1,622 +1,622 @@ use std::{ - borrow::Cow, - fmt::{Display, Formatter, Write}, + borrow::Cow, + fmt::{Display, Formatter, Write}, }; use serde::{ - ser::{SerializeMap, SerializeSeq}, - Serialize, + ser::{SerializeMap, SerializeSeq}, + Serialize, }; use crate::{util::*, ErrorKind, InstanceLocation, ValidationError}; impl<'s> ValidationError<'s, '_> { - fn absolute_keyword_location(&self) -> AbsoluteKeywordLocation<'s> { - if let ErrorKind::Reference { url, .. } = &self.kind { - AbsoluteKeywordLocation { - schema_url: url, - keyword_path: None, - } - } else { - AbsoluteKeywordLocation { - schema_url: self.schema_url, - keyword_path: self.kind.keyword_path(), - } - } + fn absolute_keyword_location(&self) -> AbsoluteKeywordLocation<'s> { + if let ErrorKind::Reference { url, .. } = &self.kind { + AbsoluteKeywordLocation { + schema_url: url, + keyword_path: None, + } + } else { + AbsoluteKeywordLocation { + schema_url: self.schema_url, + keyword_path: self.kind.keyword_path(), + } } + } - fn skip(&self) -> bool { - self.causes.len() == 1 && matches!(self.kind, ErrorKind::Reference { .. }) - } + fn skip(&self) -> bool { + self.causes.len() == 1 && matches!(self.kind, ErrorKind::Reference { .. }) + } - /// The `Flag` output format, merely the boolean result. - pub fn flag_output(&self) -> FlagOutput { - FlagOutput { valid: false } - } + /// The `Flag` output format, merely the boolean result. + pub fn flag_output(&self) -> FlagOutput { + FlagOutput { valid: false } + } - /// The `Basic` structure, a flat list of output units. - pub fn basic_output(&self) -> OutputUnit<'_, '_, '_> { - let mut outputs = vec![]; + /// The `Basic` structure, a flat list of output units. + pub fn basic_output(&self) -> OutputUnit<'_, '_, '_> { + let mut outputs = vec![]; - let mut in_ref = InRef::default(); - let mut kw_loc = KeywordLocation::default(); - for node in DfsIterator::new(self) { - match node { - DfsItem::Pre(e) => { - in_ref.pre(e); - kw_loc.pre(e); - if e.skip() || matches!(e.kind, ErrorKind::Schema { .. }) { - continue; - } - let absolute_keyword_location = if in_ref.get() { - Some(e.absolute_keyword_location()) - } else { - None - }; - outputs.push(OutputUnit { - valid: false, - keyword_location: kw_loc.get(e), - absolute_keyword_location, - instance_location: &e.instance_location, - error: OutputError::Leaf(&e.kind), - }); - } - DfsItem::Post(e) => { - in_ref.post(); - kw_loc.post(); - if e.skip() || matches!(e.kind, ErrorKind::Schema { .. }) { - continue; - } - } - } - } - - let error = if outputs.is_empty() { - OutputError::Leaf(&self.kind) - } else { - OutputError::Branch(outputs) - }; - OutputUnit { + let mut in_ref = InRef::default(); + let mut kw_loc = KeywordLocation::default(); + for node in DfsIterator::new(self) { + match node { + DfsItem::Pre(e) => { + in_ref.pre(e); + kw_loc.pre(e); + if e.skip() || matches!(e.kind, ErrorKind::Schema { .. }) { + continue; + } + let absolute_keyword_location = if in_ref.get() { + Some(e.absolute_keyword_location()) + } else { + None + }; + outputs.push(OutputUnit { valid: false, - keyword_location: String::new(), - absolute_keyword_location: None, - instance_location: &self.instance_location, - error, + keyword_location: kw_loc.get(e), + absolute_keyword_location, + instance_location: &e.instance_location, + error: OutputError::Leaf(&e.kind), + }); } + DfsItem::Post(e) => { + in_ref.post(); + kw_loc.post(); + if e.skip() || matches!(e.kind, ErrorKind::Schema { .. }) { + continue; + } + } + } } - /// The `Detailed` structure, based on the schema. - pub fn detailed_output(&self) -> OutputUnit<'_, '_, '_> { - let mut root = None; - let mut stack: Vec = vec![]; + let error = if outputs.is_empty() { + OutputError::Leaf(&self.kind) + } else { + OutputError::Branch(outputs) + }; + OutputUnit { + valid: false, + keyword_location: String::new(), + absolute_keyword_location: None, + instance_location: &self.instance_location, + error, + } + } - let mut in_ref = InRef::default(); - let mut kw_loc = KeywordLocation::default(); - for node in DfsIterator::new(self) { - match node { - DfsItem::Pre(e) => { - in_ref.pre(e); - kw_loc.pre(e); - if e.skip() { - continue; - } - let absolute_keyword_location = if in_ref.get() { - Some(e.absolute_keyword_location()) - } else { - None - }; - stack.push(OutputUnit { - valid: false, - keyword_location: kw_loc.get(e), - absolute_keyword_location, - instance_location: &e.instance_location, - error: OutputError::Leaf(&e.kind), - }); - } - DfsItem::Post(e) => { - in_ref.post(); - kw_loc.post(); - if e.skip() { - continue; - } - let output = stack.pop().unwrap(); - if let Some(parent) = stack.last_mut() { - match &mut parent.error { - OutputError::Leaf(_) => { - parent.error = OutputError::Branch(vec![output]); - } - OutputError::Branch(v) => v.push(output), - } - } else { - root.replace(output); - } - } + /// The `Detailed` structure, based on the schema. + pub fn detailed_output(&self) -> OutputUnit<'_, '_, '_> { + let mut root = None; + let mut stack: Vec = vec![]; + + let mut in_ref = InRef::default(); + let mut kw_loc = KeywordLocation::default(); + for node in DfsIterator::new(self) { + match node { + DfsItem::Pre(e) => { + in_ref.pre(e); + kw_loc.pre(e); + if e.skip() { + continue; + } + let absolute_keyword_location = if in_ref.get() { + Some(e.absolute_keyword_location()) + } else { + None + }; + stack.push(OutputUnit { + valid: false, + keyword_location: kw_loc.get(e), + absolute_keyword_location, + instance_location: &e.instance_location, + error: OutputError::Leaf(&e.kind), + }); + } + DfsItem::Post(e) => { + in_ref.post(); + kw_loc.post(); + if e.skip() { + continue; + } + let output = stack.pop().unwrap(); + if let Some(parent) = stack.last_mut() { + match &mut parent.error { + OutputError::Leaf(_) => { + parent.error = OutputError::Branch(vec![output]); + } + OutputError::Branch(v) => v.push(output), } + } else { + root.replace(output); + } } - root.unwrap() + } } + root.unwrap() + } } // DfsIterator -- impl Display for ValidationError<'_, '_> { - /// Formats error hierarchy. Use `#` to show the schema location. - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut indent = Indent::default(); - let mut sloc = SchemaLocation::default(); - // let mut kw_loc = KeywordLocation::default(); - for node in DfsIterator::new(self) { - match node { - DfsItem::Pre(e) => { - // kw_loc.pre(e); - if e.skip() { - continue; - } - indent.pre(f)?; - if f.alternate() { - sloc.pre(e); - } - if let ErrorKind::Schema { .. } = &e.kind { - write!(f, "jsonschema {}", e.kind)?; - } else { - write!(f, "at {}", quote(&e.instance_location.to_string()))?; - if f.alternate() { - write!(f, " [{}]", sloc)?; - // write!(f, " [{}]", kw_loc.get(e))?; - // write!(f, " [{}]", e.absolute_keyword_location())?; - } - write!(f, ": {}", e.kind)?; - } - } - DfsItem::Post(e) => { - // kw_loc.post(); - if e.skip() { - continue; - } - indent.post(); - sloc.post(); - } + /// Formats error hierarchy. Use `#` to show the schema location. + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut indent = Indent::default(); + let mut sloc = SchemaLocation::default(); + // let mut kw_loc = KeywordLocation::default(); + for node in DfsIterator::new(self) { + match node { + DfsItem::Pre(e) => { + // kw_loc.pre(e); + if e.skip() { + continue; + } + indent.pre(f)?; + if f.alternate() { + sloc.pre(e); + } + if let ErrorKind::Schema { .. } = &e.kind { + write!(f, "jsonschema {}", e.kind)?; + } else { + write!(f, "at {}", quote(&e.instance_location.to_string()))?; + if f.alternate() { + write!(f, " [{}]", sloc)?; + // write!(f, " [{}]", kw_loc.get(e))?; + // write!(f, " [{}]", e.absolute_keyword_location())?; } + write!(f, ": {}", e.kind)?; + } } - Ok(()) + DfsItem::Post(e) => { + // kw_loc.post(); + if e.skip() { + continue; + } + indent.post(); + sloc.post(); + } + } } + Ok(()) + } } struct DfsIterator<'a, 'v, 's> { - root: Option<&'a ValidationError<'v, 's>>, - stack: Vec>, + root: Option<&'a ValidationError<'v, 's>>, + stack: Vec>, } impl<'a, 'v, 's> DfsIterator<'a, 'v, 's> { - fn new(err: &'a ValidationError<'v, 's>) -> Self { - DfsIterator { - root: Some(err), - stack: vec![], - } + fn new(err: &'a ValidationError<'v, 's>) -> Self { + DfsIterator { + root: Some(err), + stack: vec![], } + } } impl<'a, 'v, 's> Iterator for DfsIterator<'a, 'v, 's> { - type Item = DfsItem<&'a ValidationError<'v, 's>>; + type Item = DfsItem<&'a ValidationError<'v, 's>>; - fn next(&mut self) -> Option { - let Some(mut frame) = self.stack.pop() else { - if let Some(err) = self.root.take() { - self.stack.push(Frame::from(err)); - return Some(DfsItem::Pre(err)); - } else { - return None; - } - }; - - if frame.causes.is_empty() { - return Some(DfsItem::Post(frame.err)); - } - - let err = &frame.causes[0]; - frame.causes = &frame.causes[1..]; - self.stack.push(frame); + fn next(&mut self) -> Option { + let Some(mut frame) = self.stack.pop() else { + if let Some(err) = self.root.take() { self.stack.push(Frame::from(err)); - Some(DfsItem::Pre(err)) + return Some(DfsItem::Pre(err)); + } else { + return None; + } + }; + + if frame.causes.is_empty() { + return Some(DfsItem::Post(frame.err)); } + + let err = &frame.causes[0]; + frame.causes = &frame.causes[1..]; + self.stack.push(frame); + self.stack.push(Frame::from(err)); + Some(DfsItem::Pre(err)) + } } struct Frame<'a, 'v, 's> { - err: &'a ValidationError<'v, 's>, - causes: &'a [ValidationError<'v, 's>], + err: &'a ValidationError<'v, 's>, + causes: &'a [ValidationError<'v, 's>], } impl<'a, 'v, 's> Frame<'a, 'v, 's> { - fn from(err: &'a ValidationError<'v, 's>) -> Self { - Self { - err, - causes: &err.causes, - } + fn from(err: &'a ValidationError<'v, 's>) -> Self { + Self { + err, + causes: &err.causes, } + } } enum DfsItem { - Pre(T), - Post(T), + Pre(T), + Post(T), } // Indent -- #[derive(Default)] struct Indent { - n: usize, + n: usize, } impl Indent { - fn pre(&mut self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - if self.n > 0 { - writeln!(f)?; - for _ in 0..self.n - 1 { - write!(f, " ")?; - } - write!(f, "- ")?; - } - self.n += 1; - Ok(()) + fn pre(&mut self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + if self.n > 0 { + writeln!(f)?; + for _ in 0..self.n - 1 { + write!(f, " ")?; + } + write!(f, "- ")?; } + self.n += 1; + Ok(()) + } - fn post(&mut self) { - self.n -= 1; - } + fn post(&mut self) { + self.n -= 1; + } } // SchemaLocation #[derive(Default)] struct SchemaLocation<'a, 's, 'v> { - stack: Vec<&'a ValidationError<'s, 'v>>, + stack: Vec<&'a ValidationError<'s, 'v>>, } impl<'a, 's, 'v> SchemaLocation<'a, 's, 'v> { - fn pre(&mut self, e: &'a ValidationError<'s, 'v>) { - self.stack.push(e); - } + fn pre(&mut self, e: &'a ValidationError<'s, 'v>) { + self.stack.push(e); + } - fn post(&mut self) { - self.stack.pop(); - } + fn post(&mut self) { + self.stack.pop(); + } } impl Display for SchemaLocation<'_, '_, '_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let mut iter = self.stack.iter().cloned(); - let cur = iter.next_back().unwrap(); - let cur: Cow = match &cur.kind { - ErrorKind::Schema { url } => Cow::Borrowed(url), - ErrorKind::Reference { url, .. } => Cow::Borrowed(url), - _ => Cow::Owned(cur.absolute_keyword_location().to_string()), - }; + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut iter = self.stack.iter().cloned(); + let cur = iter.next_back().unwrap(); + let cur: Cow = match &cur.kind { + ErrorKind::Schema { url } => Cow::Borrowed(url), + ErrorKind::Reference { url, .. } => Cow::Borrowed(url), + _ => Cow::Owned(cur.absolute_keyword_location().to_string()), + }; - let Some(prev) = iter.next_back() else { - return write!(f, "{cur}"); - }; + let Some(prev) = iter.next_back() else { + return write!(f, "{cur}"); + }; - let p = match &prev.kind { - ErrorKind::Schema { url } => { - let (p, _) = split(url); - p - } - ErrorKind::Reference { url, .. } => { - let (p, _) = split(url); - p - } - _ => { - let (p, _) = split(prev.schema_url); - p - } - }; - let (c, frag) = split(cur.as_ref()); - if c == p { - write!(f, "S#{frag}") - } else { - write!(f, "{cur}") - } + let p = match &prev.kind { + ErrorKind::Schema { url } => { + let (p, _) = split(url); + p + } + ErrorKind::Reference { url, .. } => { + let (p, _) = split(url); + p + } + _ => { + let (p, _) = split(prev.schema_url); + p + } + }; + let (c, frag) = split(cur.as_ref()); + if c == p { + write!(f, "S#{frag}") + } else { + write!(f, "{cur}") } + } } // KeywordLocation -- #[derive(Default)] struct KeywordLocation<'a> { - loc: String, - stack: Vec<(&'a str, usize)>, // (schema_url, len) + loc: String, + stack: Vec<(&'a str, usize)>, // (schema_url, len) } impl<'a> KeywordLocation<'a> { - fn pre(&mut self, e: &'a ValidationError) { - let cur = match &e.kind { - ErrorKind::Schema { url } => url, - ErrorKind::Reference { url, .. } => url, - _ => e.schema_url, - }; + fn pre(&mut self, e: &'a ValidationError) { + let cur = match &e.kind { + ErrorKind::Schema { url } => url, + ErrorKind::Reference { url, .. } => url, + _ => e.schema_url, + }; - if let Some((prev, _)) = self.stack.last() { - self.loc.push_str(&e.schema_url[prev.len()..]); // todo: url-decode - if let ErrorKind::Reference { kw, .. } = &e.kind { - self.loc.push('/'); - self.loc.push_str(kw); - } - } - self.stack.push((cur, self.loc.len())); + if let Some((prev, _)) = self.stack.last() { + self.loc.push_str(&e.schema_url[prev.len()..]); // todo: url-decode + if let ErrorKind::Reference { kw, .. } = &e.kind { + self.loc.push('/'); + self.loc.push_str(kw); + } } + self.stack.push((cur, self.loc.len())); + } - fn post(&mut self) { - self.stack.pop(); - if let Some((_, len)) = self.stack.last() { - self.loc.truncate(*len); - } + fn post(&mut self) { + self.stack.pop(); + if let Some((_, len)) = self.stack.last() { + self.loc.truncate(*len); } + } - fn get(&mut self, cur: &'a ValidationError) -> String { - if let ErrorKind::Reference { .. } = &cur.kind { - self.loc.clone() - } else if let Some(kw_path) = &cur.kind.keyword_path() { - let len = self.loc.len(); - self.loc.push('/'); - write!(self.loc, "{}", kw_path).expect("write kw_path to String should not fail"); - let loc = self.loc.clone(); - self.loc.truncate(len); - loc - } else { - self.loc.clone() - } + fn get(&mut self, cur: &'a ValidationError) -> String { + if let ErrorKind::Reference { .. } = &cur.kind { + self.loc.clone() + } else if let Some(kw_path) = &cur.kind.keyword_path() { + let len = self.loc.len(); + self.loc.push('/'); + write!(self.loc, "{}", kw_path).expect("write kw_path to String should not fail"); + let loc = self.loc.clone(); + self.loc.truncate(len); + loc + } else { + self.loc.clone() } + } } #[derive(Default)] struct InRef { - stack: Vec, + stack: Vec, } impl InRef { - fn pre(&mut self, e: &ValidationError) { - let in_ref: bool = self.get() || matches!(e.kind, ErrorKind::Reference { .. }); - self.stack.push(in_ref); - } + fn pre(&mut self, e: &ValidationError) { + let in_ref: bool = self.get() || matches!(e.kind, ErrorKind::Reference { .. }); + self.stack.push(in_ref); + } - fn post(&mut self) { - self.stack.pop(); - } + fn post(&mut self) { + self.stack.pop(); + } - fn get(&self) -> bool { - self.stack.last().cloned().unwrap_or_default() - } + fn get(&self) -> bool { + self.stack.last().cloned().unwrap_or_default() + } } // output formats -- /// Simplest output format, merely the boolean result. pub struct FlagOutput { - pub valid: bool, + pub valid: bool, } impl Serialize for FlagOutput { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - let mut map = serializer.serialize_map(Some(1))?; - map.serialize_entry("valid", &self.valid)?; - map.end() - } + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut map = serializer.serialize_map(Some(1))?; + map.serialize_entry("valid", &self.valid)?; + map.end() + } } impl Display for FlagOutput { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write_json_to_fmt(f, self) - } + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write_json_to_fmt(f, self) + } } /// Single OutputUnit used in Basic/Detailed output formats. pub struct OutputUnit<'e, 's, 'v> { - pub valid: bool, - pub keyword_location: String, - /// The absolute, dereferenced location of the validating keyword - pub absolute_keyword_location: Option>, - /// The location of the JSON value within the instance being validated - pub instance_location: &'e InstanceLocation<'v>, - pub error: OutputError<'e, 's, 'v>, + pub valid: bool, + pub keyword_location: String, + /// The absolute, dereferenced location of the validating keyword + pub absolute_keyword_location: Option>, + /// The location of the JSON value within the instance being validated + pub instance_location: &'e InstanceLocation<'v>, + pub error: OutputError<'e, 's, 'v>, } impl Serialize for OutputUnit<'_, '_, '_> { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - let n = 4 + self.absolute_keyword_location.as_ref().map_or(0, |_| 1); - let mut map = serializer.serialize_map(Some(n))?; - map.serialize_entry("valid", &self.valid)?; - map.serialize_entry("keywordLocation", &self.keyword_location.to_string())?; - if let Some(s) = &self.absolute_keyword_location { - map.serialize_entry("absoluteKeywordLocation", &s.to_string())?; - } - map.serialize_entry("instanceLocation", &self.instance_location.to_string())?; - let pname = match self.error { - OutputError::Leaf(_) => "error", - OutputError::Branch(_) => "errors", - }; - map.serialize_entry(pname, &self.error)?; - map.end() + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let n = 4 + self.absolute_keyword_location.as_ref().map_or(0, |_| 1); + let mut map = serializer.serialize_map(Some(n))?; + map.serialize_entry("valid", &self.valid)?; + map.serialize_entry("keywordLocation", &self.keyword_location.to_string())?; + if let Some(s) = &self.absolute_keyword_location { + map.serialize_entry("absoluteKeywordLocation", &s.to_string())?; } + map.serialize_entry("instanceLocation", &self.instance_location.to_string())?; + let pname = match self.error { + OutputError::Leaf(_) => "error", + OutputError::Branch(_) => "errors", + }; + map.serialize_entry(pname, &self.error)?; + map.end() + } } impl Display for OutputUnit<'_, '_, '_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write_json_to_fmt(f, self) - } + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write_json_to_fmt(f, self) + } } /// Error of [`OutputUnit`]. pub enum OutputError<'e, 's, 'v> { - /// Single. - Leaf(&'e ErrorKind<'s, 'v>), - /// Nested. - Branch(Vec>), + /// Single. + Leaf(&'e ErrorKind<'s, 'v>), + /// Nested. + Branch(Vec>), } impl Serialize for OutputError<'_, '_, '_> { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - match self { - OutputError::Leaf(kind) => serializer.serialize_str(&kind.to_string()), - OutputError::Branch(units) => { - let mut seq = serializer.serialize_seq(Some(units.len()))?; - for unit in units { - seq.serialize_element(unit)?; - } - seq.end() - } + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + OutputError::Leaf(kind) => serializer.serialize_str(&kind.to_string()), + OutputError::Branch(units) => { + let mut seq = serializer.serialize_seq(Some(units.len()))?; + for unit in units { + seq.serialize_element(unit)?; } + seq.end() + } } + } } // AbsoluteKeywordLocation -- impl<'s> ErrorKind<'s, '_> { - pub fn keyword_path(&self) -> Option> { - #[inline(always)] - fn kw(kw: &'static str) -> Option> { - Some(KeywordPath { - keyword: kw, - token: None, - }) - } - - #[inline(always)] - fn kw_prop<'s>(kw: &'static str, prop: &'s str) -> Option> { - Some(KeywordPath { - keyword: kw, - token: Some(SchemaToken::Prop(prop)), - }) - } - - use ErrorKind::*; - match self { - Group => None, - Schema { .. } => None, - ContentSchema => kw("contentSchema"), - PropertyName { .. } => kw("propertyNames"), - Reference { kw: kword, .. } => kw(kword), - RefCycle { .. } => None, - FalseSchema => None, - Type { .. } => kw("type"), - Enum { .. } => kw("enum"), - Const { .. } => kw("const"), - Format { .. } => kw("format"), - MinProperties { .. } => kw("minProperties"), - MaxProperties { .. } => kw("maxProperties"), - AdditionalProperties { .. } => kw("additionalProperty"), - Required { .. } => kw("required"), - Dependency { prop, .. } => kw_prop("dependencies", prop), - DependentRequired { prop, .. } => kw_prop("dependentRequired", prop), - MinItems { .. } => kw("minItems"), - MaxItems { .. } => kw("maxItems"), - Contains => kw("contains"), - MinContains { .. } => kw("minContains"), - MaxContains { .. } => kw("maxContains"), - UniqueItems { .. } => kw("uniqueItems"), - AdditionalItems { .. } => kw("additionalItems"), - MinLength { .. } => kw("minLength"), - MaxLength { .. } => kw("maxLength"), - Pattern { .. } => kw("pattern"), - ContentEncoding { .. } => kw("contentEncoding"), - ContentMediaType { .. } => kw("contentMediaType"), - Minimum { .. } => kw("minimum"), - Maximum { .. } => kw("maximum"), - ExclusiveMinimum { .. } => kw("exclusiveMinimum"), - ExclusiveMaximum { .. } => kw("exclusiveMaximum"), - MultipleOf { .. } => kw("multipleOf"), - Not => kw("not"), - AllOf => kw("allOf"), - AnyOf => kw("anyOf"), - OneOf(_) => kw("oneOf"), - } + pub fn keyword_path(&self) -> Option> { + #[inline(always)] + fn kw(kw: &'static str) -> Option> { + Some(KeywordPath { + keyword: kw, + token: None, + }) } + + #[inline(always)] + fn kw_prop<'s>(kw: &'static str, prop: &'s str) -> Option> { + Some(KeywordPath { + keyword: kw, + token: Some(SchemaToken::Prop(prop)), + }) + } + + use ErrorKind::*; + match self { + Group => None, + Schema { .. } => None, + ContentSchema => kw("contentSchema"), + PropertyName { .. } => kw("propertyNames"), + Reference { kw: kword, .. } => kw(kword), + RefCycle { .. } => None, + FalseSchema => None, + Type { .. } => kw("type"), + Enum { .. } => kw("enum"), + Const { .. } => kw("const"), + Format { .. } => kw("format"), + MinProperties { .. } => kw("minProperties"), + MaxProperties { .. } => kw("maxProperties"), + AdditionalProperties { .. } => kw("additionalProperty"), + Required { .. } => kw("required"), + Dependency { prop, .. } => kw_prop("dependencies", prop), + DependentRequired { prop, .. } => kw_prop("dependentRequired", prop), + MinItems { .. } => kw("minItems"), + MaxItems { .. } => kw("maxItems"), + Contains => kw("contains"), + MinContains { .. } => kw("minContains"), + MaxContains { .. } => kw("maxContains"), + UniqueItems { .. } => kw("uniqueItems"), + AdditionalItems { .. } => kw("additionalItems"), + MinLength { .. } => kw("minLength"), + MaxLength { .. } => kw("maxLength"), + Pattern { .. } => kw("pattern"), + ContentEncoding { .. } => kw("contentEncoding"), + ContentMediaType { .. } => kw("contentMediaType"), + Minimum { .. } => kw("minimum"), + Maximum { .. } => kw("maximum"), + ExclusiveMinimum { .. } => kw("exclusiveMinimum"), + ExclusiveMaximum { .. } => kw("exclusiveMaximum"), + MultipleOf { .. } => kw("multipleOf"), + Not => kw("not"), + AllOf => kw("allOf"), + AnyOf => kw("anyOf"), + OneOf(_) => kw("oneOf"), + } + } } /// The absolute, dereferenced location of the validating keyword #[derive(Debug, Clone)] pub struct AbsoluteKeywordLocation<'s> { - /// The absolute, dereferenced schema location. - pub schema_url: &'s str, - /// Location within the `schema_url`. - pub keyword_path: Option>, + /// The absolute, dereferenced schema location. + pub schema_url: &'s str, + /// Location within the `schema_url`. + pub keyword_path: Option>, } impl Display for AbsoluteKeywordLocation<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.schema_url.fmt(f)?; - if let Some(path) = &self.keyword_path { - f.write_str("/")?; - path.keyword.fmt(f)?; - if let Some(token) = &path.token { - f.write_str("/")?; - match token { - SchemaToken::Prop(p) => write!(f, "{}", escape(p))?, // todo: url-encode - SchemaToken::Item(i) => write!(f, "{i}")?, - } - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.schema_url.fmt(f)?; + if let Some(path) = &self.keyword_path { + f.write_str("/")?; + path.keyword.fmt(f)?; + if let Some(token) = &path.token { + f.write_str("/")?; + match token { + SchemaToken::Prop(p) => write!(f, "{}", escape(p))?, // todo: url-encode + SchemaToken::Item(i) => write!(f, "{i}")?, } - Ok(()) + } } + Ok(()) + } } #[derive(Debug, Clone)] /// JsonPointer in schema. pub struct KeywordPath<'s> { - /// The first token. - pub keyword: &'static str, - /// Optinal token within keyword. - pub token: Option>, + /// The first token. + pub keyword: &'static str, + /// Optinal token within keyword. + pub token: Option>, } impl Display for KeywordPath<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.keyword.fmt(f)?; - if let Some(token) = &self.token { - f.write_str("/")?; - token.fmt(f)?; - } - Ok(()) + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.keyword.fmt(f)?; + if let Some(token) = &self.token { + f.write_str("/")?; + token.fmt(f)?; } + Ok(()) + } } /// Token for schema. #[derive(Debug, Clone)] pub enum SchemaToken<'s> { - /// Token for property. - Prop(&'s str), - /// Token for array item. - Item(usize), + /// Token for property. + Prop(&'s str), + /// Token for array item. + Item(usize), } impl Display for SchemaToken<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - SchemaToken::Prop(p) => write!(f, "{}", escape(p)), - SchemaToken::Item(i) => write!(f, "{i}"), - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SchemaToken::Prop(p) => write!(f, "{}", escape(p)), + SchemaToken::Item(i) => write!(f, "{i}"), } + } } // helpers -- fn write_json_to_fmt(f: &mut std::fmt::Formatter, value: &T) -> Result<(), std::fmt::Error> where - T: ?Sized + Serialize, + T: ?Sized + Serialize, { - let s = if f.alternate() { - serde_json::to_string_pretty(value) - } else { - serde_json::to_string(value) - }; - let s = s.map_err(|_| std::fmt::Error)?; - f.write_str(&s) + let s = if f.alternate() { + serde_json::to_string_pretty(value) + } else { + serde_json::to_string(value) + }; + let s = s.map_err(|_| std::fmt::Error)?; + f.write_str(&s) } diff --git a/validator/src/root.rs b/validator/src/root.rs index 9c6213a..9dd2e43 100644 --- a/validator/src/root.rs +++ b/validator/src/root.rs @@ -6,123 +6,123 @@ use serde_json::Value; use url::Url; pub(crate) struct Root { - pub(crate) draft: &'static Draft, - pub(crate) resources: HashMap, // ptr => _ - pub(crate) url: Url, - pub(crate) meta_vocabs: Option>, + pub(crate) draft: &'static Draft, + pub(crate) resources: HashMap, // ptr => _ + pub(crate) url: Url, + pub(crate) meta_vocabs: Option>, } impl Root { - pub(crate) fn has_vocab(&self, name: &str) -> bool { - if self.draft.version < 2019 || name == "core" { - return true; - } - if let Some(vocabs) = &self.meta_vocabs { - return vocabs.iter().any(|s| s == name); - } - self.draft.default_vocabs.contains(&name) + pub(crate) fn has_vocab(&self, name: &str) -> bool { + if self.draft.version < 2019 || name == "core" { + return true; } + if let Some(vocabs) = &self.meta_vocabs { + return vocabs.iter().any(|s| s == name); + } + self.draft.default_vocabs.contains(&name) + } - fn resolve_fragment_in(&self, frag: &Fragment, res: &Resource) -> Result { - let ptr = match frag { - Fragment::Anchor(anchor) => { - let Some(ptr) = res.anchors.get(anchor) else { - return Err(CompileError::AnchorNotFound { - url: self.url.to_string(), - reference: UrlFrag::format(&res.id, frag.as_str()), - }); - }; - ptr.clone() - } - Fragment::JsonPointer(ptr) => res.ptr.concat(ptr), + fn resolve_fragment_in(&self, frag: &Fragment, res: &Resource) -> Result { + let ptr = match frag { + Fragment::Anchor(anchor) => { + let Some(ptr) = res.anchors.get(anchor) else { + return Err(CompileError::AnchorNotFound { + url: self.url.to_string(), + reference: UrlFrag::format(&res.id, frag.as_str()), + }); }; - Ok(UrlPtr { - url: self.url.clone(), - ptr, - }) - } + ptr.clone() + } + Fragment::JsonPointer(ptr) => res.ptr.concat(ptr), + }; + Ok(UrlPtr { + url: self.url.clone(), + ptr, + }) + } - pub(crate) fn resolve_fragment(&self, frag: &Fragment) -> Result { - let res = self.resources.get("").ok_or(CompileError::Bug( - format!("no root resource found for {}", self.url).into(), - ))?; - self.resolve_fragment_in(frag, res) - } + pub(crate) fn resolve_fragment(&self, frag: &Fragment) -> Result { + let res = self.resources.get("").ok_or(CompileError::Bug( + format!("no root resource found for {}", self.url).into(), + ))?; + self.resolve_fragment_in(frag, res) + } - // resolves `UrlFrag` to `UrlPtr` from root. - // returns `None` if it is external. - pub(crate) fn resolve(&self, uf: &UrlFrag) -> Result, CompileError> { - let res = { - if uf.url == self.url { - self.resources.get("").ok_or(CompileError::Bug( - format!("no root resource found for {}", self.url).into(), - ))? - } else { - // look for resource with id==uf.url - let Some(res) = self.resources.values().find(|res| res.id == uf.url) else { - return Ok(None); // external url - }; - res - } + // resolves `UrlFrag` to `UrlPtr` from root. + // returns `None` if it is external. + pub(crate) fn resolve(&self, uf: &UrlFrag) -> Result, CompileError> { + let res = { + if uf.url == self.url { + self.resources.get("").ok_or(CompileError::Bug( + format!("no root resource found for {}", self.url).into(), + ))? + } else { + // look for resource with id==uf.url + let Some(res) = self.resources.values().find(|res| res.id == uf.url) else { + return Ok(None); // external url }; + res + } + }; - self.resolve_fragment_in(&uf.frag, res).map(Some) + self.resolve_fragment_in(&uf.frag, res).map(Some) + } + + pub(crate) fn resource(&self, ptr: &JsonPointer) -> &Resource { + let mut ptr = ptr.as_str(); + loop { + if let Some(res) = self.resources.get(ptr) { + return res; + } + let Some((prefix, _)) = ptr.rsplit_once('/') else { + break; + }; + ptr = prefix; } + self.resources.get("").expect("root resource should exist") + } - pub(crate) fn resource(&self, ptr: &JsonPointer) -> &Resource { - let mut ptr = ptr.as_str(); - loop { - if let Some(res) = self.resources.get(ptr) { - return res; - } - let Some((prefix, _)) = ptr.rsplit_once('/') else { - break; - }; - ptr = prefix; - } - self.resources.get("").expect("root resource should exist") - } - - pub(crate) fn base_url(&self, ptr: &JsonPointer) -> &Url { - &self.resource(ptr).id - } - - pub(crate) fn add_subschema( - &mut self, - doc: &Value, - ptr: &JsonPointer, - ) -> Result<(), CompileError> { - let v = ptr.lookup(doc, &self.url)?; - let base_url = self.base_url(ptr).clone(); - self.draft - .collect_resources(v, &base_url, ptr.clone(), &self.url, &mut self.resources)?; - - // collect anchors - if !self.resources.contains_key(ptr) { - let res = self.resource(ptr); - if let Some(res) = self.resources.get_mut(&res.ptr.clone()) { - self.draft.collect_anchors(v, ptr, res, &self.url)?; - } - } - Ok(()) + pub(crate) fn base_url(&self, ptr: &JsonPointer) -> &Url { + &self.resource(ptr).id + } + + pub(crate) fn add_subschema( + &mut self, + doc: &Value, + ptr: &JsonPointer, + ) -> Result<(), CompileError> { + let v = ptr.lookup(doc, &self.url)?; + let base_url = self.base_url(ptr).clone(); + self.draft + .collect_resources(v, &base_url, ptr.clone(), &self.url, &mut self.resources)?; + + // collect anchors + if !self.resources.contains_key(ptr) { + let res = self.resource(ptr); + if let Some(res) = self.resources.get_mut(&res.ptr.clone()) { + self.draft.collect_anchors(v, ptr, res, &self.url)?; + } } + Ok(()) + } } #[derive(Debug)] pub(crate) struct Resource { - pub(crate) ptr: JsonPointer, // from root - pub(crate) id: Url, - pub(crate) anchors: HashMap, // anchor => ptr - pub(crate) dynamic_anchors: HashSet, + pub(crate) ptr: JsonPointer, // from root + pub(crate) id: Url, + pub(crate) anchors: HashMap, // anchor => ptr + pub(crate) dynamic_anchors: HashSet, } impl Resource { - pub(crate) fn new(ptr: JsonPointer, id: Url) -> Self { - Self { - ptr, - id, - anchors: HashMap::new(), - dynamic_anchors: HashSet::new(), - } + pub(crate) fn new(ptr: JsonPointer, id: Url) -> Self { + Self { + ptr, + id, + anchors: HashMap::new(), + dynamic_anchors: HashSet::new(), } + } } diff --git a/validator/src/roots.rs b/validator/src/roots.rs index fd64eca..c8ce26f 100644 --- a/validator/src/roots.rs +++ b/validator/src/roots.rs @@ -8,100 +8,100 @@ use url::Url; // -- pub(crate) struct Roots { - pub(crate) default_draft: &'static Draft, - map: HashMap, - pub(crate) loader: DefaultUrlLoader, + pub(crate) default_draft: &'static Draft, + map: HashMap, + pub(crate) loader: DefaultUrlLoader, } impl Roots { - fn new() -> Self { - Self { - default_draft: latest(), - map: Default::default(), - loader: DefaultUrlLoader::new(), - } + fn new() -> Self { + Self { + default_draft: latest(), + map: Default::default(), + loader: DefaultUrlLoader::new(), } + } } impl Default for Roots { - fn default() -> Self { - Self::new() - } + fn default() -> Self { + Self::new() + } } impl Roots { - pub(crate) fn get(&self, url: &Url) -> Option<&Root> { - self.map.get(url) + pub(crate) fn get(&self, url: &Url) -> Option<&Root> { + self.map.get(url) + } + + pub(crate) fn resolve_fragment(&mut self, uf: UrlFrag) -> Result { + self.or_load(uf.url.clone())?; + let Some(root) = self.map.get(&uf.url) else { + return Err(CompileError::Bug("or_load didn't add".into())); + }; + root.resolve_fragment(&uf.frag) + } + + pub(crate) fn ensure_subschema(&mut self, up: &UrlPtr) -> Result<(), CompileError> { + self.or_load(up.url.clone())?; + let Some(root) = self.map.get_mut(&up.url) else { + return Err(CompileError::Bug("or_load didn't add".into())); + }; + if !root.draft.is_subschema(up.ptr.as_str()) { + let doc = self.loader.load(&root.url)?; + let v = up.ptr.lookup(doc, &up.url)?; + root.draft.validate(up, v)?; + root.add_subschema(doc, &up.ptr)?; + } + Ok(()) + } + + pub(crate) fn or_load(&mut self, url: Url) -> Result<(), CompileError> { + debug_assert!(url.fragment().is_none(), "trying to add root with fragment"); + if self.map.contains_key(&url) { + return Ok(()); + } + let doc = self.loader.load(&url)?; + let r = self.create_root(url.clone(), doc)?; + self.map.insert(url, r); + Ok(()) + } + + pub(crate) fn create_root(&self, url: Url, doc: &Value) -> Result { + let draft = { + let up = UrlPtr { + url: url.clone(), + ptr: "".into(), + }; + self.loader + .get_draft(&up, doc, self.default_draft, HashSet::new())? + }; + let vocabs = self.loader.get_meta_vocabs(doc, draft)?; + let resources = { + let mut m = HashMap::default(); + draft.collect_resources(doc, &url, "".into(), &url, &mut m)?; + m + }; + + if !matches!(url.host_str(), Some("json-schema.org")) { + draft.validate( + &UrlPtr { + url: url.clone(), + ptr: "".into(), + }, + doc, + )?; } - pub(crate) fn resolve_fragment(&mut self, uf: UrlFrag) -> Result { - self.or_load(uf.url.clone())?; - let Some(root) = self.map.get(&uf.url) else { - return Err(CompileError::Bug("or_load didn't add".into())); - }; - root.resolve_fragment(&uf.frag) - } + Ok(Root { + draft, + resources, + url: url.clone(), + meta_vocabs: vocabs, + }) + } - pub(crate) fn ensure_subschema(&mut self, up: &UrlPtr) -> Result<(), CompileError> { - self.or_load(up.url.clone())?; - let Some(root) = self.map.get_mut(&up.url) else { - return Err(CompileError::Bug("or_load didn't add".into())); - }; - if !root.draft.is_subschema(up.ptr.as_str()) { - let doc = self.loader.load(&root.url)?; - let v = up.ptr.lookup(doc, &up.url)?; - root.draft.validate(up, v)?; - root.add_subschema(doc, &up.ptr)?; - } - Ok(()) - } - - pub(crate) fn or_load(&mut self, url: Url) -> Result<(), CompileError> { - debug_assert!(url.fragment().is_none(), "trying to add root with fragment"); - if self.map.contains_key(&url) { - return Ok(()); - } - let doc = self.loader.load(&url)?; - let r = self.create_root(url.clone(), doc)?; - self.map.insert(url, r); - Ok(()) - } - - pub(crate) fn create_root(&self, url: Url, doc: &Value) -> Result { - let draft = { - let up = UrlPtr { - url: url.clone(), - ptr: "".into(), - }; - self.loader - .get_draft(&up, doc, self.default_draft, HashSet::new())? - }; - let vocabs = self.loader.get_meta_vocabs(doc, draft)?; - let resources = { - let mut m = HashMap::default(); - draft.collect_resources(doc, &url, "".into(), &url, &mut m)?; - m - }; - - if !matches!(url.host_str(), Some("json-schema.org")) { - draft.validate( - &UrlPtr { - url: url.clone(), - ptr: "".into(), - }, - doc, - )?; - } - - Ok(Root { - draft, - resources, - url: url.clone(), - meta_vocabs: vocabs, - }) - } - - pub(crate) fn insert(&mut self, roots: &mut HashMap) { - self.map.extend(roots.drain()); - } + pub(crate) fn insert(&mut self, roots: &mut HashMap) { + self.map.extend(roots.drain()); + } } diff --git a/validator/src/util.rs b/validator/src/util.rs index b064bad..1181fd5 100644 --- a/validator/src/util.rs +++ b/validator/src/util.rs @@ -1,8 +1,8 @@ use std::{ - borrow::{Borrow, Cow}, - fmt::Display, - hash::{Hash, Hasher}, - str::FromStr, + borrow::{Borrow, Cow}, + fmt::Display, + hash::{Hash, Hasher}, + str::FromStr, }; use ahash::{AHashMap, AHasher}; @@ -19,112 +19,112 @@ pub(crate) struct JsonPointer(pub(crate) String); impl JsonPointer { pub(crate) fn escape(token: &str) -> Cow<'_, str> { - const SPECIAL: [char; 2] = ['~', '/']; - if token.contains(SPECIAL) { - token.replace('~', "~0").replace('/', "~1").into() - } else { - token.into() - } + const SPECIAL: [char; 2] = ['~', '/']; + if token.contains(SPECIAL) { + token.replace('~', "~0").replace('/', "~1").into() + } else { + token.into() } + } - pub(crate) fn unescape(mut tok: &str) -> Result, ()> { - let Some(mut tilde) = tok.find('~') else { - return Ok(Cow::Borrowed(tok)); - }; - let mut s = String::with_capacity(tok.len()); - loop { - s.push_str(&tok[..tilde]); - tok = &tok[tilde + 1..]; - match tok.chars().next() { - Some('1') => s.push('/'), - Some('0') => s.push('~'), - _ => return Err(()), + pub(crate) fn unescape(mut tok: &str) -> Result, ()> { + let Some(mut tilde) = tok.find('~') else { + return Ok(Cow::Borrowed(tok)); + }; + let mut s = String::with_capacity(tok.len()); + loop { + s.push_str(&tok[..tilde]); + tok = &tok[tilde + 1..]; + match tok.chars().next() { + Some('1') => s.push('/'), + Some('0') => s.push('~'), + _ => return Err(()), + } + tok = &tok[1..]; + let Some(i) = tok.find('~') else { + s.push_str(tok); + break; + }; + tilde = i; + } + Ok(Cow::Owned(s)) + } + + pub(crate) fn lookup<'a>( + &self, + mut v: &'a Value, + v_url: &Url, + ) -> Result<&'a Value, CompileError> { + for tok in self.0.split('/').skip(1) { + let Ok(tok) = Self::unescape(tok) else { + let loc = UrlFrag::format(v_url, self.as_str()); + return Err(CompileError::InvalidJsonPointer(loc)); + }; + match v { + Value::Object(obj) => { + if let Some(pvalue) = obj.get(tok.as_ref()) { + v = pvalue; + continue; + } + } + Value::Array(arr) => { + if let Ok(i) = usize::from_str(tok.as_ref()) { + if let Some(item) = arr.get(i) { + v = item; + continue; } - tok = &tok[1..]; - let Some(i) = tok.find('~') else { - s.push_str(tok); - break; - }; - tilde = i; + }; } - Ok(Cow::Owned(s)) + _ => {} + } + let loc = UrlFrag::format(v_url, self.as_str()); + return Err(CompileError::JsonPointerNotFound(loc)); } + Ok(v) + } - pub(crate) fn lookup<'a>( - &self, - mut v: &'a Value, - v_url: &Url, - ) -> Result<&'a Value, CompileError> { - for tok in self.0.split('/').skip(1) { - let Ok(tok) = Self::unescape(tok) else { - let loc = UrlFrag::format(v_url, self.as_str()); - return Err(CompileError::InvalidJsonPointer(loc)); - }; - match v { - Value::Object(obj) => { - if let Some(pvalue) = obj.get(tok.as_ref()) { - v = pvalue; - continue; - } - } - Value::Array(arr) => { - if let Ok(i) = usize::from_str(tok.as_ref()) { - if let Some(item) = arr.get(i) { - v = item; - continue; - } - }; - } - _ => {} - } - let loc = UrlFrag::format(v_url, self.as_str()); - return Err(CompileError::JsonPointerNotFound(loc)); - } - Ok(v) - } + pub(crate) fn as_str(&self) -> &str { + &self.0 + } - pub(crate) fn as_str(&self) -> &str { - &self.0 - } + pub(crate) fn is_empty(&self) -> bool { + self.0.is_empty() + } - pub(crate) fn is_empty(&self) -> bool { - self.0.is_empty() - } + pub(crate) fn concat(&self, next: &Self) -> Self { + JsonPointer(format!("{}{}", self.0, next.0)) + } - pub(crate) fn concat(&self, next: &Self) -> Self { - JsonPointer(format!("{}{}", self.0, next.0)) - } + pub(crate) fn append(&self, tok: &str) -> Self { + Self(format!("{}/{}", self, Self::escape(tok))) + } - pub(crate) fn append(&self, tok: &str) -> Self { - Self(format!("{}/{}", self, Self::escape(tok))) - } - - pub(crate) fn append2(&self, tok1: &str, tok2: &str) -> Self { - Self(format!( - "{}/{}/{}", - self, - Self::escape(tok1), - Self::escape(tok2) - )) - } + pub(crate) fn append2(&self, tok1: &str, tok2: &str) -> Self { + Self(format!( + "{}/{}/{}", + self, + Self::escape(tok1), + Self::escape(tok2) + )) + } } impl Display for JsonPointer { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } } impl Borrow for JsonPointer { - fn borrow(&self) -> &str { - &self.0 - } + fn borrow(&self) -> &str { + &self.0 + } } impl From<&str> for JsonPointer { - fn from(value: &str) -> Self { - Self(value.into()) - } + fn from(value: &str) -> Self { + Self(value.into()) + } } // -- @@ -133,297 +133,297 @@ impl From<&str> for JsonPointer { pub(crate) struct Anchor(pub(crate) String); impl Display for Anchor { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } } impl Borrow for Anchor { - fn borrow(&self) -> &str { - &self.0 - } + fn borrow(&self) -> &str { + &self.0 + } } impl From<&str> for Anchor { - fn from(value: &str) -> Self { - Self(value.into()) - } + fn from(value: &str) -> Self { + Self(value.into()) + } } // -- #[derive(Debug, Clone, Eq, PartialEq)] pub(crate) enum Fragment { - Anchor(Anchor), - JsonPointer(JsonPointer), + Anchor(Anchor), + JsonPointer(JsonPointer), } impl Fragment { - pub(crate) fn split(s: &str) -> Result<(&str, Fragment), CompileError> { - let (u, frag) = split(s); - let frag = percent_decode_str(frag) - .decode_utf8() - .map_err(|src| CompileError::ParseUrlError { - url: s.to_string(), - src: src.into(), - })? - .to_string(); - let frag = if frag.is_empty() || frag.starts_with('/') { - Fragment::JsonPointer(JsonPointer(frag)) - } else { - Fragment::Anchor(Anchor(frag)) - }; - Ok((u, frag)) - } + pub(crate) fn split(s: &str) -> Result<(&str, Fragment), CompileError> { + let (u, frag) = split(s); + let frag = percent_decode_str(frag) + .decode_utf8() + .map_err(|src| CompileError::ParseUrlError { + url: s.to_string(), + src: src.into(), + })? + .to_string(); + let frag = if frag.is_empty() || frag.starts_with('/') { + Fragment::JsonPointer(JsonPointer(frag)) + } else { + Fragment::Anchor(Anchor(frag)) + }; + Ok((u, frag)) + } - pub(crate) fn encode(frag: &str) -> String { - // https://url.spec.whatwg.org/#fragment-percent-encode-set - const FRAGMENT: &AsciiSet = &CONTROLS - .add(b'%') - .add(b' ') - .add(b'"') - .add(b'<') - .add(b'>') - .add(b'`'); - percent_encoding::utf8_percent_encode(frag, FRAGMENT).to_string() - } + pub(crate) fn encode(frag: &str) -> String { + // https://url.spec.whatwg.org/#fragment-percent-encode-set + const FRAGMENT: &AsciiSet = &CONTROLS + .add(b'%') + .add(b' ') + .add(b'"') + .add(b'<') + .add(b'>') + .add(b'`'); + percent_encoding::utf8_percent_encode(frag, FRAGMENT).to_string() + } - pub(crate) fn as_str(&self) -> &str { - match self { - Fragment::Anchor(s) => &s.0, - Fragment::JsonPointer(s) => &s.0, - } + pub(crate) fn as_str(&self) -> &str { + match self { + Fragment::Anchor(s) => &s.0, + Fragment::JsonPointer(s) => &s.0, } + } } // -- #[derive(Clone)] pub(crate) struct UrlFrag { - pub(crate) url: Url, - pub(crate) frag: Fragment, + pub(crate) url: Url, + pub(crate) frag: Fragment, } impl UrlFrag { - pub(crate) fn absolute(input: &str) -> Result { - let (u, frag) = Fragment::split(input)?; + pub(crate) fn absolute(input: &str) -> Result { + let (u, frag) = Fragment::split(input)?; - // note: windows drive letter is treated as url scheme by url parser - #[cfg(not(target_arch = "wasm32"))] - if std::env::consts::OS == "windows" && starts_with_windows_drive(u) { - let url = Url::from_file_path(u) - .map_err(|_| CompileError::Bug(format!("failed to convert {u} into url").into()))?; - return Ok(UrlFrag { url, frag }); - } - - match Url::parse(u) { - Ok(url) => Ok(UrlFrag { url, frag }), - #[cfg(not(target_arch = "wasm32"))] - Err(url::ParseError::RelativeUrlWithoutBase) => { - let p = std::path::absolute(u).map_err(|e| CompileError::ParseUrlError { - url: u.to_owned(), - src: e.into(), - })?; - let url = Url::from_file_path(p).map_err(|_| { - CompileError::Bug(format!("failed to convert {u} into url").into()) - })?; - Ok(UrlFrag { url, frag }) - } - Err(e) => Err(CompileError::ParseUrlError { - url: u.to_owned(), - src: e.into(), - }), - } + // note: windows drive letter is treated as url scheme by url parser + #[cfg(not(target_arch = "wasm32"))] + if std::env::consts::OS == "windows" && starts_with_windows_drive(u) { + let url = Url::from_file_path(u) + .map_err(|_| CompileError::Bug(format!("failed to convert {u} into url").into()))?; + return Ok(UrlFrag { url, frag }); } - pub(crate) fn join(url: &Url, input: &str) -> Result { - let (input, frag) = Fragment::split(input)?; - if input.is_empty() { - return Ok(UrlFrag { - url: url.clone(), - frag, - }); - } - let url = url.join(input).map_err(|e| CompileError::ParseUrlError { - url: input.to_string(), - src: e.into(), + match Url::parse(u) { + Ok(url) => Ok(UrlFrag { url, frag }), + #[cfg(not(target_arch = "wasm32"))] + Err(url::ParseError::RelativeUrlWithoutBase) => { + let p = std::path::absolute(u).map_err(|e| CompileError::ParseUrlError { + url: u.to_owned(), + src: e.into(), + })?; + let url = Url::from_file_path(p).map_err(|_| { + CompileError::Bug(format!("failed to convert {u} into url").into()) })?; - Ok(UrlFrag { url, frag }) + } + Err(e) => Err(CompileError::ParseUrlError { + url: u.to_owned(), + src: e.into(), + }), } + } - pub(crate) fn format(url: &Url, frag: &str) -> String { - if frag.is_empty() { - url.to_string() - } else { - format!("{}#{}", url, Fragment::encode(frag)) - } + pub(crate) fn join(url: &Url, input: &str) -> Result { + let (input, frag) = Fragment::split(input)?; + if input.is_empty() { + return Ok(UrlFrag { + url: url.clone(), + frag, + }); } + let url = url.join(input).map_err(|e| CompileError::ParseUrlError { + url: input.to_string(), + src: e.into(), + })?; + + Ok(UrlFrag { url, frag }) + } + + pub(crate) fn format(url: &Url, frag: &str) -> String { + if frag.is_empty() { + url.to_string() + } else { + format!("{}#{}", url, Fragment::encode(frag)) + } + } } impl Display for UrlFrag { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}#{}", self.url, Fragment::encode(self.frag.as_str())) - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}#{}", self.url, Fragment::encode(self.frag.as_str())) + } } // -- #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub(crate) struct UrlPtr { - pub(crate) url: Url, - pub(crate) ptr: JsonPointer, + pub(crate) url: Url, + pub(crate) ptr: JsonPointer, } impl UrlPtr { - pub(crate) fn lookup<'a>(&self, doc: &'a Value) -> Result<&'a Value, CompileError> { - self.ptr.lookup(doc, &self.url) - } + pub(crate) fn lookup<'a>(&self, doc: &'a Value) -> Result<&'a Value, CompileError> { + self.ptr.lookup(doc, &self.url) + } - pub(crate) fn format(&self, tok: &str) -> String { - format!( - "{}#{}/{}", - self.url, - Fragment::encode(self.ptr.as_str()), - Fragment::encode(JsonPointer::escape(tok).as_ref()), - ) - } + pub(crate) fn format(&self, tok: &str) -> String { + format!( + "{}#{}/{}", + self.url, + Fragment::encode(self.ptr.as_str()), + Fragment::encode(JsonPointer::escape(tok).as_ref()), + ) + } } impl Display for UrlPtr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}#{}", self.url, Fragment::encode(self.ptr.as_str())) - } + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}#{}", self.url, Fragment::encode(self.ptr.as_str())) + } } // -- pub(crate) fn is_integer(v: &Value) -> bool { - match v { - Value::Number(n) => { - n.is_i64() || n.is_u64() || n.as_f64().filter(|n| n.fract() == 0.0).is_some() - } - _ => false, + match v { + Value::Number(n) => { + n.is_i64() || n.is_u64() || n.as_f64().filter(|n| n.fract() == 0.0).is_some() } + _ => false, + } } #[cfg(not(target_arch = "wasm32"))] fn starts_with_windows_drive(p: &str) -> bool { - p.chars().next().filter(char::is_ascii_uppercase).is_some() && p[1..].starts_with(":\\") + p.chars().next().filter(char::is_ascii_uppercase).is_some() && p[1..].starts_with(":\\") } /// returns single-quoted string pub(crate) fn quote(s: &T) -> String where - T: AsRef + std::fmt::Debug + ?Sized, + T: AsRef + std::fmt::Debug + ?Sized, { - let s = format!("{s:?}").replace(r#"\""#, "\"").replace('\'', r"\'"); - format!("'{}'", &s[1..s.len() - 1]) + let s = format!("{s:?}").replace(r#"\""#, "\"").replace('\'', r"\'"); + format!("'{}'", &s[1..s.len() - 1]) } pub(crate) fn join_iter(iterable: T, sep: &str) -> String where - T: IntoIterator, - T::Item: Display, + T: IntoIterator, + T::Item: Display, { - iterable - .into_iter() - .map(|e| e.to_string()) - .collect::>() - .join(sep) + iterable + .into_iter() + .map(|e| e.to_string()) + .collect::>() + .join(sep) } pub(crate) fn escape(token: &str) -> Cow<'_, str> { - JsonPointer::escape(token) + JsonPointer::escape(token) } pub(crate) fn split(url: &str) -> (&str, &str) { - if let Some(i) = url.find('#') { - (&url[..i], &url[i + 1..]) - } else { - (url, "") - } + if let Some(i) = url.find('#') { + (&url[..i], &url[i + 1..]) + } else { + (url, "") + } } /// serde_json treats 0 and 0.0 not equal. so we cannot simply use v1==v2 pub(crate) fn equals(v1: &Value, v2: &Value) -> bool { - match (v1, v2) { - (Value::Null, Value::Null) => true, - (Value::Bool(b1), Value::Bool(b2)) => b1 == b2, - (Value::Number(n1), Value::Number(n2)) => { - if let (Some(n1), Some(n2)) = (n1.as_u64(), n2.as_u64()) { - return n1 == n2; - } - if let (Some(n1), Some(n2)) = (n1.as_i64(), n2.as_i64()) { - return n1 == n2; - } - if let (Some(n1), Some(n2)) = (n1.as_f64(), n2.as_f64()) { - return n1 == n2; - } - false - } - (Value::String(s1), Value::String(s2)) => s1 == s2, - (Value::Array(arr1), Value::Array(arr2)) => { - if arr1.len() != arr2.len() { - return false; - } - arr1.iter().zip(arr2).all(|(e1, e2)| equals(e1, e2)) - } - (Value::Object(obj1), Value::Object(obj2)) => { - if obj1.len() != obj2.len() { - return false; - } - for (k1, v1) in obj1 { - if let Some(v2) = obj2.get(k1) { - if !equals(v1, v2) { - return false; - } - } else { - return false; - } - } - true - } - _ => false, + match (v1, v2) { + (Value::Null, Value::Null) => true, + (Value::Bool(b1), Value::Bool(b2)) => b1 == b2, + (Value::Number(n1), Value::Number(n2)) => { + if let (Some(n1), Some(n2)) = (n1.as_u64(), n2.as_u64()) { + return n1 == n2; + } + if let (Some(n1), Some(n2)) = (n1.as_i64(), n2.as_i64()) { + return n1 == n2; + } + if let (Some(n1), Some(n2)) = (n1.as_f64(), n2.as_f64()) { + return n1 == n2; + } + false } + (Value::String(s1), Value::String(s2)) => s1 == s2, + (Value::Array(arr1), Value::Array(arr2)) => { + if arr1.len() != arr2.len() { + return false; + } + arr1.iter().zip(arr2).all(|(e1, e2)| equals(e1, e2)) + } + (Value::Object(obj1), Value::Object(obj2)) => { + if obj1.len() != obj2.len() { + return false; + } + for (k1, v1) in obj1 { + if let Some(v2) = obj2.get(k1) { + if !equals(v1, v2) { + return false; + } + } else { + return false; + } + } + true + } + _ => false, + } } pub(crate) fn duplicates(arr: &Vec) -> Option<(usize, usize)> { - match arr.as_slice() { - [e0, e1] => { - if equals(e0, e1) { - return Some((0, 1)); - } - } - [e0, e1, e2] => { - if equals(e0, e1) { - return Some((0, 1)); - } else if equals(e0, e2) { - return Some((0, 2)); - } else if equals(e1, e2) { - return Some((1, 2)); - } - } - _ => { - let len = arr.len(); - if len <= 20 { - for i in 0..len - 1 { - for j in i + 1..len { - if equals(&arr[i], &arr[j]) { - return Some((i, j)); - } - } - } - } else { - let mut seen = AHashMap::with_capacity(len); - for (i, item) in arr.iter().enumerate() { - if let Some(j) = seen.insert(HashedValue(item), i) { - return Some((j, i)); - } - } - } - } + match arr.as_slice() { + [e0, e1] => { + if equals(e0, e1) { + return Some((0, 1)); + } } - None + [e0, e1, e2] => { + if equals(e0, e1) { + return Some((0, 1)); + } else if equals(e0, e2) { + return Some((0, 2)); + } else if equals(e1, e2) { + return Some((1, 2)); + } + } + _ => { + let len = arr.len(); + if len <= 20 { + for i in 0..len - 1 { + for j in i + 1..len { + if equals(&arr[i], &arr[j]) { + return Some((i, j)); + } + } + } + } else { + let mut seen = AHashMap::with_capacity(len); + for (i, item) in arr.iter().enumerate() { + if let Some(j) = seen.insert(HashedValue(item), i) { + return Some((j, i)); + } + } + } + } + } + None } // HashedValue -- @@ -433,113 +433,113 @@ pub(crate) fn duplicates(arr: &Vec) -> Option<(usize, usize)> { pub(crate) struct HashedValue<'a>(pub(crate) &'a Value); impl PartialEq for HashedValue<'_> { - fn eq(&self, other: &Self) -> bool { - equals(self.0, other.0) - } + fn eq(&self, other: &Self) -> bool { + equals(self.0, other.0) + } } impl Eq for HashedValue<'_> {} impl Hash for HashedValue<'_> { - fn hash(&self, state: &mut H) { - match self.0 { - Value::Null => state.write_u32(3_221_225_473), // chosen randomly - Value::Bool(ref b) => b.hash(state), - Value::Number(ref num) => { - if let Some(num) = num.as_f64() { - num.to_bits().hash(state); - } else if let Some(num) = num.as_u64() { - num.hash(state); - } else if let Some(num) = num.as_i64() { - num.hash(state); - } - } - Value::String(ref str) => str.hash(state), - Value::Array(ref arr) => { - for item in arr { - HashedValue(item).hash(state); - } - } - Value::Object(ref obj) => { - let mut hash = 0; - for (pname, pvalue) in obj { - // We have no way of building a new hasher of type `H`, so we - // hardcode using the default hasher of a hash map. - let mut hasher = AHasher::default(); - pname.hash(&mut hasher); - HashedValue(pvalue).hash(&mut hasher); - hash ^= hasher.finish(); - } - state.write_u64(hash); - } + fn hash(&self, state: &mut H) { + match self.0 { + Value::Null => state.write_u32(3_221_225_473), // chosen randomly + Value::Bool(ref b) => b.hash(state), + Value::Number(ref num) => { + if let Some(num) = num.as_f64() { + num.to_bits().hash(state); + } else if let Some(num) = num.as_u64() { + num.hash(state); + } else if let Some(num) = num.as_i64() { + num.hash(state); } + } + Value::String(ref str) => str.hash(state), + Value::Array(ref arr) => { + for item in arr { + HashedValue(item).hash(state); + } + } + Value::Object(ref obj) => { + let mut hash = 0; + for (pname, pvalue) in obj { + // We have no way of building a new hasher of type `H`, so we + // hardcode using the default hasher of a hash map. + let mut hasher = AHasher::default(); + pname.hash(&mut hasher); + HashedValue(pvalue).hash(&mut hasher); + hash ^= hasher.finish(); + } + state.write_u64(hash); + } } + } } #[cfg(test)] mod tests { - use ahash::AHashMap; - use serde_json::json; + use ahash::AHashMap; + use serde_json::json; - use super::*; + use super::*; - #[test] - fn test_quote() { - assert_eq!(quote(r#"abc"def'ghi"#), r#"'abc"def\'ghi'"#); + #[test] + fn test_quote() { + assert_eq!(quote(r#"abc"def'ghi"#), r#"'abc"def\'ghi'"#); + } + + #[test] + fn test_fragment_split() { + let tests = [ + ("#", Fragment::JsonPointer("".into())), + ("#/a/b", Fragment::JsonPointer("/a/b".into())), + ("#abcd", Fragment::Anchor("abcd".into())), + ("#%61%62%63%64", Fragment::Anchor("abcd".into())), + ( + "#%2F%61%62%63%64%2fef", + Fragment::JsonPointer("/abcd/ef".into()), + ), // '/' is encoded + ("#abcd+ef", Fragment::Anchor("abcd+ef".into())), // '+' should not traslate to space + ]; + for test in tests { + let (_, got) = Fragment::split(test.0).unwrap(); + assert_eq!(got, test.1, "Fragment::split({:?})", test.0); } + } - #[test] - fn test_fragment_split() { - let tests = [ - ("#", Fragment::JsonPointer("".into())), - ("#/a/b", Fragment::JsonPointer("/a/b".into())), - ("#abcd", Fragment::Anchor("abcd".into())), - ("#%61%62%63%64", Fragment::Anchor("abcd".into())), - ( - "#%2F%61%62%63%64%2fef", - Fragment::JsonPointer("/abcd/ef".into()), - ), // '/' is encoded - ("#abcd+ef", Fragment::Anchor("abcd+ef".into())), // '+' should not traslate to space - ]; - for test in tests { - let (_, got) = Fragment::split(test.0).unwrap(); - assert_eq!(got, test.1, "Fragment::split({:?})", test.0); - } + #[test] + fn test_unescape() { + let tests = [ + ("bar~0", Some("bar~")), + ("bar~1", Some("bar/")), + ("bar~01", Some("bar~1")), + ("bar~", None), + ("bar~~", None), + ]; + for (tok, want) in tests { + let res = JsonPointer::unescape(tok).ok(); + let got = res.as_ref().map(|c| c.as_ref()); + assert_eq!(got, want, "unescape({:?})", tok) } + } - #[test] - fn test_unescape() { - let tests = [ - ("bar~0", Some("bar~")), - ("bar~1", Some("bar/")), - ("bar~01", Some("bar~1")), - ("bar~", None), - ("bar~~", None), - ]; - for (tok, want) in tests { - let res = JsonPointer::unescape(tok).ok(); - let got = res.as_ref().map(|c| c.as_ref()); - assert_eq!(got, want, "unescape({:?})", tok) - } + #[test] + fn test_equals() { + let tests = [["1.0", "1"], ["-1.0", "-1"]]; + for [a, b] in tests { + let a = serde_json::from_str(a).unwrap(); + let b = serde_json::from_str(b).unwrap(); + assert!(equals(&a, &b)); } + } - #[test] - fn test_equals() { - let tests = [["1.0", "1"], ["-1.0", "-1"]]; - for [a, b] in tests { - let a = serde_json::from_str(a).unwrap(); - let b = serde_json::from_str(b).unwrap(); - assert!(equals(&a, &b)); - } - } - - #[test] - fn test_hashed_value() { - let mut seen = AHashMap::with_capacity(10); - let (v1, v2) = (json!(2), json!(2.0)); - assert!(equals(&v1, &v2)); - assert!(seen.insert(HashedValue(&v1), 1).is_none()); - assert!(seen.insert(HashedValue(&v2), 1).is_some()); - } + #[test] + fn test_hashed_value() { + let mut seen = AHashMap::with_capacity(10); + let (v1, v2) = (json!(2), json!(2.0)); + assert!(equals(&v1, &v2)); + assert!(seen.insert(HashedValue(&v1), 1).is_none()); + assert!(seen.insert(HashedValue(&v2), 1).is_some()); + } } diff --git a/validator/src/validator.rs b/validator/src/validator.rs index 2dfa804..63a3ef7 100644 --- a/validator/src/validator.rs +++ b/validator/src/validator.rs @@ -5,1165 +5,1195 @@ use serde_json::{Map, Value}; use crate::{util::*, *}; macro_rules! prop { - ($prop:expr) => { - InstanceToken::Prop(Cow::Borrowed($prop)) - }; + ($prop:expr) => { + InstanceToken::Prop(Cow::Borrowed($prop)) + }; } macro_rules! item { - ($item:expr) => { - InstanceToken::Item($item) - }; + ($item:expr) => { + InstanceToken::Item($item) + }; } pub(crate) fn validate<'s, 'v>( - v: &'v Value, - schema: &'s Schema, - schemas: &'s Schemas, + v: &'v Value, + schema: &'s Schema, + schemas: &'s Schemas, + options: Option<&'s ValidationOptions>, ) -> Result<(), ValidationError<'s, 'v>> { - let scope = Scope { - sch: schema.idx, - ref_kw: None, - vid: 0, - parent: None, - }; - let mut vloc = Vec::with_capacity(8); - let (result, _) = Validator { - v, - vloc: &mut vloc, - schema, - schemas, - scope, - uneval: Uneval::from(v, schema, false), - errors: vec![], - bool_result: false, - } - .validate(); - match result { - Err(err) => { - let mut e = ValidationError { - schema_url: &schema.loc, - instance_location: InstanceLocation::new(), - kind: ErrorKind::Schema { url: &schema.loc }, - causes: vec![], - }; - if let ErrorKind::Group = err.kind { - e.causes = err.causes; - } else { - e.causes.push(err); - } - Err(e) - } - Ok(_) => Ok(()), + let scope = Scope { + sch: schema.idx, + ref_kw: None, + vid: 0, + parent: None, + }; + let mut vloc = Vec::with_capacity(8); + let be_strict = options.map_or(false, |o| o.be_strict); + let (result, _) = Validator { + v, + vloc: &mut vloc, + schema, + schemas, + scope, + options, + uneval: Uneval::from(v, schema, be_strict), + errors: vec![], + bool_result: false, + } + .validate(); + match result { + Err(err) => { + let mut e = ValidationError { + schema_url: &schema.loc, + instance_location: InstanceLocation::new(), + kind: ErrorKind::Schema { url: &schema.loc }, + causes: vec![], + }; + if let ErrorKind::Group = err.kind { + e.causes = err.causes; + } else { + e.causes.push(err); + } + Err(e) } + Ok(_) => Ok(()), + } } macro_rules! kind { - ($kind:ident, $name:ident: $value:expr) => { - ErrorKind::$kind { $name: $value } - }; - ($kind:ident, $got:expr, $want:expr) => { - ErrorKind::$kind { - got: $got, - want: $want, - } - }; - ($kind:ident, $got:expr, $want:expr, $err:expr) => { - ErrorKind::$kind { - got: $got, - want: $want, - err: $err, - } - }; - ($kind: ident) => { - ErrorKind::$kind - }; + ($kind:ident, $name:ident: $value:expr) => { + ErrorKind::$kind { $name: $value } + }; + ($kind:ident, $got:expr, $want:expr) => { + ErrorKind::$kind { + got: $got, + want: $want, + } + }; + ($kind:ident, $got:expr, $want:expr, $err:expr) => { + ErrorKind::$kind { + got: $got, + want: $want, + err: $err, + } + }; + ($kind: ident) => { + ErrorKind::$kind + }; } struct Validator<'v, 's, 'd, 'e> { - v: &'v Value, - vloc: &'e mut Vec>, - schema: &'s Schema, - schemas: &'s Schemas, - scope: Scope<'d>, - uneval: Uneval<'v>, - errors: Vec>, - bool_result: bool, // is interested to know valid or not (but not actuall error) + v: &'v Value, + vloc: &'e mut Vec>, + schema: &'s Schema, + schemas: &'s Schemas, + scope: Scope<'d>, + options: Option<&'s ValidationOptions>, + uneval: Uneval<'v>, + errors: Vec>, + bool_result: bool, // is interested to know valid or not (but not actuall error) } impl<'v, 's> Validator<'v, 's, '_, '_> { - fn validate(mut self) -> (Result<(), ValidationError<'s, 'v>>, Uneval<'v>) { - let s = self.schema; - let v = self.v; + fn validate(mut self) -> (Result<(), ValidationError<'s, 'v>>, Uneval<'v>) { + let s = self.schema; + let v = self.v; - // boolean -- - if let Some(b) = s.boolean { - return match b { - false => (Err(self.error(kind!(FalseSchema))), self.uneval), - true => (Ok(()), self.uneval), - }; - } - - // check cycle -- - if let Some(scp) = self.scope.check_cycle() { - let kind = ErrorKind::RefCycle { - url: &self.schema.loc, - kw_loc1: self.kw_loc(&self.scope), - kw_loc2: self.kw_loc(scp), - }; - return (Err(self.error(kind)), self.uneval); - } - - // type -- - if !s.types.is_empty() { - let v_type = Type::of(v); - let matched = - s.types.contains(v_type) || (s.types.contains(Type::Integer) && is_integer(v)); - if !matched { - return (Err(self.error(kind!(Type, v_type, s.types))), self.uneval); - } - } - - // constant -- - if let Some(c) = &s.constant { - if !equals(v, c) { - return (Err(self.error(kind!(Const, want: c))), self.uneval); - } - } - - // enum -- - if let Some(Enum { types, values }) = &s.enum_ { - if !types.contains(Type::of(v)) || !values.iter().any(|e| equals(e, v)) { - return (Err(self.error(kind!(Enum, want: values))), self.uneval); - } - } - - // format -- - if let Some(format) = &s.format { - if let Err(e) = (format.func)(v) { - self.add_error(kind!(Format, Cow::Borrowed(v), format.name, e)); - } - } - - // $ref -- - if let Some(ref_) = s.ref_ { - let result = self.validate_ref(ref_, "$ref"); - if s.draft_version < 2019 { - return (result, self.uneval); - } - self.errors.extend(result.err()); - } - - // type specific validations -- - match v { - Value::Object(obj) => self.obj_validate(obj), - Value::Array(arr) => self.arr_validate(arr), - Value::String(str) => self.str_validate(str), - Value::Number(num) => self.num_validate(num), - _ => {} - } - - if self.errors.is_empty() || !self.bool_result { - if s.draft_version >= 2019 { - self.refs_validate(); - } - self.cond_validate(); - if s.draft_version >= 2019 { - self.uneval_validate(); - } - } - - match self.errors.len() { - 0 => (Ok(()), self.uneval), - 1 => (Err(self.errors.remove(0)), self.uneval), - _ => { - let mut e = self.error(kind!(Group)); - e.causes = self.errors; - (Err(e), self.uneval) - } - } + // boolean -- + if let Some(b) = s.boolean { + return match b { + false => (Err(self.error(kind!(FalseSchema))), self.uneval), + true => (Ok(()), self.uneval), + }; } + + // check cycle -- + if let Some(scp) = self.scope.check_cycle() { + let kind = ErrorKind::RefCycle { + url: &self.schema.loc, + kw_loc1: self.kw_loc(&self.scope), + kw_loc2: self.kw_loc(scp), + }; + return (Err(self.error(kind)), self.uneval); + } + + // type -- + if !s.types.is_empty() { + let v_type = Type::of(v); + let matched = + s.types.contains(v_type) || (s.types.contains(Type::Integer) && is_integer(v)); + if !matched { + return (Err(self.error(kind!(Type, v_type, s.types))), self.uneval); + } + } + + // constant -- + if let Some(c) = &s.constant { + if !equals(v, c) { + return (Err(self.error(kind!(Const, want: c))), self.uneval); + } + } + + // enum -- + if let Some(Enum { types, values }) = &s.enum_ { + if !types.contains(Type::of(v)) || !values.iter().any(|e| equals(e, v)) { + return (Err(self.error(kind!(Enum, want: values))), self.uneval); + } + } + + // format -- + if let Some(format) = &s.format { + if let Err(e) = (format.func)(v) { + self.add_error(kind!(Format, Cow::Borrowed(v), format.name, e)); + } + } + + // type specific validations -- + match v { + Value::Object(obj) => self.obj_validate(obj), + Value::Array(arr) => self.arr_validate(arr), + Value::String(str) => self.str_validate(str), + Value::Number(num) => self.num_validate(num), + _ => {} + } + + // $ref -- + if let Some(ref_) = s.ref_ { + let result = self.validate_ref(ref_, "$ref"); + if s.draft_version < 2019 { + return (result, self.uneval); + } + self.errors.extend(result.err()); + } + + if self.errors.is_empty() || !self.bool_result { + if s.draft_version >= 2019 { + self.refs_validate(); + } + self.cond_validate(); + if s.draft_version >= 2019 { + self.uneval_validate(); + } + } + + match self.errors.len() { + 0 => (Ok(()), self.uneval), + 1 => (Err(self.errors.remove(0)), self.uneval), + _ => { + let mut e = self.error(kind!(Group)); + e.causes = self.errors; + (Err(e), self.uneval) + } + } + } } // type specific validations impl<'v> Validator<'v, '_, '_, '_> { - fn obj_validate(&mut self, obj: &'v Map) { - let s = self.schema; - macro_rules! add_err { - ($result:expr) => { - if let Err(e) = $result { - self.errors.push(e); - } - }; - } - - // minProperties -- - if let Some(min) = s.min_properties { - if obj.len() < min { - self.add_error(kind!(MinProperties, obj.len(), min)); - } - } - - // maxProperties -- - if let Some(max) = s.max_properties { - if obj.len() > max { - self.add_error(kind!(MaxProperties, obj.len(), max)); - } - } - - // required -- - if !s.required.is_empty() { - if let Some(missing) = self.find_missing(obj, &s.required) { - self.add_error(kind!(Required, want: missing)); - } - } - - if self.bool_result && !self.errors.is_empty() { - return; - } - - // dependencies -- - for (prop, dep) in &s.dependencies { - if obj.contains_key(prop) { - match dep { - Dependency::Props(required) => { - if let Some(missing) = self.find_missing(obj, required) { - self.add_error(ErrorKind::Dependency { prop, missing }); - } - } - Dependency::SchemaRef(sch) => { - add_err!(self.validate_self(*sch)); - } - } - } - } - - let mut additional_props = vec![]; - for (pname, pvalue) in obj { - if self.bool_result && !self.errors.is_empty() { - return; - } - let mut evaluated = false; - - // properties -- - if let Some(sch) = s.properties.get(pname) { - evaluated = true; - add_err!(self.validate_val(*sch, pvalue, prop!(pname))); - } - - // patternProperties -- - for (regex, sch) in &s.pattern_properties { - if regex.is_match(pname) { - evaluated = true; - add_err!(self.validate_val(*sch, pvalue, prop!(pname))); - } - } - - if !evaluated { - // additionalProperties -- - if let Some(additional) = &s.additional_properties { - evaluated = true; - match additional { - Additional::Bool(allowed) => { - if !allowed { - additional_props.push(pname.into()); - } - } - Additional::SchemaRef(sch) => { - add_err!(self.validate_val(*sch, pvalue, prop!(pname))); - } - } - } - } - - if evaluated { - self.uneval.props.remove(pname); - } - } - if !additional_props.is_empty() { - self.add_error(kind!(AdditionalProperties, got: additional_props)); - } - - if s.draft_version == 4 { - return; - } - - // propertyNames -- - if let Some(sch) = &s.property_names { - for pname in obj.keys() { - let v = Value::String(pname.to_owned()); - if let Err(mut e) = self.schemas.validate(&v, *sch) { - e.schema_url = &s.loc; - e.kind = ErrorKind::PropertyName { - prop: pname.to_owned(), - }; - self.errors.push(e.clone_static()); - } - } - } - - if s.draft_version == 6 { - return; - } - - // dependentSchemas -- - for (pname, sch) in &s.dependent_schemas { - if obj.contains_key(pname) { - add_err!(self.validate_self(*sch)); - } - } - - // dependentRequired -- - for (prop, required) in &s.dependent_required { - if obj.contains_key(prop) { - if let Some(missing) = self.find_missing(obj, required) { - self.add_error(ErrorKind::DependentRequired { prop, missing }); - } - } + fn obj_validate(&mut self, obj: &'v Map) { + let s = self.schema; + macro_rules! add_err { + ($result:expr) => { + if let Err(e) = $result { + self.errors.push(e); } + }; } - fn arr_validate(&mut self, arr: &'v Vec) { - let s = self.schema; - let len = arr.len(); - macro_rules! add_err { - ($result:expr) => { - if let Err(e) = $result { - self.errors.push(e); - } - }; + // minProperties -- + if let Some(min) = s.min_properties { + if obj.len() < min { + self.add_error(kind!(MinProperties, obj.len(), min)); + } + } + + // maxProperties -- + if let Some(max) = s.max_properties { + if obj.len() > max { + self.add_error(kind!(MaxProperties, obj.len(), max)); + } + } + + // required -- + if !s.required.is_empty() { + if let Some(missing) = self.find_missing(obj, &s.required) { + self.add_error(kind!(Required, want: missing)); + } + } + + if self.bool_result && !self.errors.is_empty() { + return; + } + + // dependencies -- + for (prop, dep) in &s.dependencies { + if obj.contains_key(prop) { + match dep { + Dependency::Props(required) => { + if let Some(missing) = self.find_missing(obj, required) { + self.add_error(ErrorKind::Dependency { prop, missing }); + } + } + Dependency::SchemaRef(sch) => { + add_err!(self.validate_self(*sch)); + } } + } + } - // minItems -- - if let Some(min) = s.min_items { - if len < min { - self.add_error(kind!(MinItems, len, min)); - } + let mut additional_props = vec![]; + for (pname, pvalue) in obj { + if self.bool_result && !self.errors.is_empty() { + return; + } + let mut evaluated = false; + + // properties -- + if let Some(sch) = s.properties.get(pname) { + evaluated = true; + add_err!(self.validate_val(*sch, pvalue, prop!(pname))); + } + + // patternProperties -- + for (regex, sch) in &s.pattern_properties { + if regex.is_match(pname) { + evaluated = true; + add_err!(self.validate_val(*sch, pvalue, prop!(pname))); } + } - // maxItems -- - if let Some(max) = s.max_items { - if len > max { - self.add_error(kind!(MaxItems, len, max)); + if !evaluated { + // additionalProperties -- + if let Some(additional) = &s.additional_properties { + evaluated = true; + match additional { + Additional::Bool(allowed) => { + if !allowed { + additional_props.push(pname.into()); + } } + Additional::SchemaRef(sch) => { + add_err!(self.validate_val(*sch, pvalue, prop!(pname))); + } + } } + } - // uniqueItems -- - if len > 1 && s.unique_items { - if let Some((i, j)) = duplicates(arr) { - self.add_error(kind!(UniqueItems, got: [i, j])); - } + if evaluated { + self.uneval.props.remove(pname); + } + } + if !additional_props.is_empty() { + self.add_error(kind!(AdditionalProperties, got: additional_props)); + } + + if s.draft_version == 4 { + return; + } + + // propertyNames -- + if let Some(sch) = &s.property_names { + for pname in obj.keys() { + let v = Value::String(pname.to_owned()); + if let Err(mut e) = self.schemas.validate(&v, *sch, self.options) { + e.schema_url = &s.loc; + e.kind = ErrorKind::PropertyName { + prop: pname.to_owned(), + }; + self.errors.push(e.clone_static()); } + } + } - if s.draft_version < 2020 { - let mut evaluated = 0; + if s.draft_version == 6 { + return; + } - // items -- - if let Some(items) = &s.items { - match items { - Items::SchemaRef(sch) => { - for (i, item) in arr.iter().enumerate() { - add_err!(self.validate_val(*sch, item, item!(i))); - } - evaluated = len; - debug_assert!(self.uneval.items.is_empty()); - } - Items::SchemaRefs(list) => { - for (i, (item, sch)) in arr.iter().zip(list).enumerate() { - add_err!(self.validate_val(*sch, item, item!(i))); - } - evaluated = min(list.len(), len); - } - } - } + // dependentSchemas -- + for (pname, sch) in &s.dependent_schemas { + if obj.contains_key(pname) { + add_err!(self.validate_self(*sch)); + } + } - // additionalItems -- - if let Some(additional) = &s.additional_items { - match additional { - Additional::Bool(allowed) => { - if !allowed && evaluated != len { - self.add_error(kind!(AdditionalItems, got: len - evaluated)); - } - } - Additional::SchemaRef(sch) => { - for (i, item) in arr[evaluated..].iter().enumerate() { - add_err!(self.validate_val(*sch, item, item!(i))); - } - } - } - debug_assert!(self.uneval.items.is_empty()); - } - } else { - // prefixItems -- - for (i, (sch, item)) in s.prefix_items.iter().zip(arr).enumerate() { - add_err!(self.validate_val(*sch, item, item!(i))); - } - - // items2020 -- - if let Some(sch) = &s.items2020 { - let evaluated = min(s.prefix_items.len(), len); - for (i, item) in arr[evaluated..].iter().enumerate() { - add_err!(self.validate_val(*sch, item, item!(i))); - } - debug_assert!(self.uneval.items.is_empty()); - } + // dependentRequired -- + for (prop, required) in &s.dependent_required { + if obj.contains_key(prop) { + if let Some(missing) = self.find_missing(obj, required) { + self.add_error(ErrorKind::DependentRequired { prop, missing }); } + } + } + } - // contains -- - if let Some(sch) = &s.contains { - let mut matched = vec![]; - let mut errors = vec![]; + fn arr_validate(&mut self, arr: &'v Vec) { + let s = self.schema; + let len = arr.len(); + macro_rules! add_err { + ($result:expr) => { + if let Err(e) = $result { + self.errors.push(e); + } + }; + } + // minItems -- + if let Some(min) = s.min_items { + if len < min { + self.add_error(kind!(MinItems, len, min)); + } + } + + // maxItems -- + if let Some(max) = s.max_items { + if len > max { + self.add_error(kind!(MaxItems, len, max)); + } + } + + // uniqueItems -- + if len > 1 && s.unique_items { + if let Some((i, j)) = duplicates(arr) { + self.add_error(kind!(UniqueItems, got: [i, j])); + } + } + + if s.draft_version < 2020 { + let mut evaluated = 0; + + // items -- + if let Some(items) = &s.items { + match items { + Items::SchemaRef(sch) => { for (i, item) in arr.iter().enumerate() { - if let Err(e) = self.validate_val(*sch, item, item!(i)) { - errors.push(e); - } else { - matched.push(i); - if s.draft_version >= 2020 { - self.uneval.items.remove(&i); - } - } + add_err!(self.validate_val(*sch, item, item!(i))); } - - // minContains -- - if let Some(min) = s.min_contains { - if matched.len() < min { - let mut e = self.error(kind!(MinContains, matched.clone(), min)); - e.causes = errors; - self.errors.push(e); - } - } else if matched.is_empty() { - let mut e = self.error(kind!(Contains)); - e.causes = errors; - self.errors.push(e); - } - - // maxContains -- - if let Some(max) = s.max_contains { - if matched.len() > max { - self.add_error(kind!(MaxContains, matched, max)); - } + evaluated = len; + // debug_assert!(self.uneval.items.is_empty()); + } + Items::SchemaRefs(list) => { + for (i, (item, sch)) in arr.iter().zip(list).enumerate() { + add_err!(self.validate_val(*sch, item, item!(i))); } + evaluated = min(list.len(), len); + } } + } + + // additionalItems -- + if let Some(additional) = &s.additional_items { + match additional { + Additional::Bool(allowed) => { + if !allowed && evaluated != len { + self.add_error(kind!(AdditionalItems, got: len - evaluated)); + } + } + Additional::SchemaRef(sch) => { + for (i, item) in arr[evaluated..].iter().enumerate() { + add_err!(self.validate_val(*sch, item, item!(i))); + } + } + } + // debug_assert!(self.uneval.items.is_empty()); + } + } else { + // prefixItems -- + for (i, (sch, item)) in s.prefix_items.iter().zip(arr).enumerate() { + add_err!(self.validate_val(*sch, item, item!(i))); + } + + // items2020 -- + if let Some(sch) = &s.items2020 { + let evaluated = min(s.prefix_items.len(), len); + for (i, item) in arr[evaluated..].iter().enumerate() { + add_err!(self.validate_val(*sch, item, item!(i))); + } + // debug_assert!(self.uneval.items.is_empty()); + } } - fn str_validate(&mut self, str: &'v String) { - let s = self.schema; - let mut len = None; + // contains -- + if let Some(sch) = &s.contains { + let mut matched = vec![]; + let mut errors = vec![]; - // minLength -- - if let Some(min) = s.min_length { - let len = len.get_or_insert_with(|| str.chars().count()); - if *len < min { - self.add_error(kind!(MinLength, *len, min)); - } + for (i, item) in arr.iter().enumerate() { + if let Err(e) = self.validate_val(*sch, item, item!(i)) { + errors.push(e); + } else { + matched.push(i); + if s.draft_version >= 2020 { + self.uneval.items.remove(&i); + } } + } - // maxLength -- - if let Some(max) = s.max_length { - let len = len.get_or_insert_with(|| str.chars().count()); - if *len > max { - self.add_error(kind!(MaxLength, *len, max)); - } + // minContains -- + if let Some(min) = s.min_contains { + if matched.len() < min { + let mut e = self.error(kind!(MinContains, matched.clone(), min)); + e.causes = errors; + self.errors.push(e); } + } else if matched.is_empty() { + let mut e = self.error(kind!(Contains)); + e.causes = errors; + self.errors.push(e); + } - // pattern -- - if let Some(regex) = &s.pattern { - if !regex.is_match(str) { - self.add_error(kind!(Pattern, str.into(), regex.as_str())); - } + // maxContains -- + if let Some(max) = s.max_contains { + if matched.len() > max { + self.add_error(kind!(MaxContains, matched, max)); } + } + } + } - if s.draft_version == 6 { - return; - } + fn str_validate(&mut self, str: &'v String) { + let s = self.schema; + let mut len = None; - // contentEncoding -- - let mut decoded = Some(Cow::from(str.as_bytes())); - if let Some(decoder) = &s.content_encoding { - match (decoder.func)(str) { - Ok(bytes) => decoded = Some(Cow::from(bytes)), - Err(err) => { - decoded = None; - self.add_error(ErrorKind::ContentEncoding { - want: decoder.name, - err, - }) - } - } - } - - // contentMediaType -- - let mut deserialized = None; - if let (Some(mt), Some(decoded)) = (&s.content_media_type, decoded) { - match (mt.func)(decoded.as_ref(), s.content_schema.is_some()) { - Ok(des) => deserialized = des, - Err(e) => { - self.add_error(kind!(ContentMediaType, decoded.into(), mt.name, e)); - } - } - } - - // contentSchema -- - if let (Some(sch), Some(v)) = (s.content_schema, deserialized) { - if let Err(mut e) = self.schemas.validate(&v, sch) { - e.schema_url = &s.loc; - e.kind = kind!(ContentSchema); - self.errors.push(e.clone_static()); - } - } + // minLength -- + if let Some(min) = s.min_length { + let len = len.get_or_insert_with(|| str.chars().count()); + if *len < min { + self.add_error(kind!(MinLength, *len, min)); + } } - fn num_validate(&mut self, num: &'v Number) { - let s = self.schema; - - // minimum -- - if let Some(min) = &s.minimum { - if let (Some(minf), Some(numf)) = (min.as_f64(), num.as_f64()) { - if numf < minf { - self.add_error(kind!(Minimum, Cow::Borrowed(num), min)); - } - } - } - - // maximum -- - if let Some(max) = &s.maximum { - if let (Some(maxf), Some(numf)) = (max.as_f64(), num.as_f64()) { - if numf > maxf { - self.add_error(kind!(Maximum, Cow::Borrowed(num), max)); - } - } - } - - // exclusiveMinimum -- - if let Some(ex_min) = &s.exclusive_minimum { - if let (Some(ex_minf), Some(numf)) = (ex_min.as_f64(), num.as_f64()) { - if numf <= ex_minf { - self.add_error(kind!(ExclusiveMinimum, Cow::Borrowed(num), ex_min)); - } - } - } - - // exclusiveMaximum -- - if let Some(ex_max) = &s.exclusive_maximum { - if let (Some(ex_maxf), Some(numf)) = (ex_max.as_f64(), num.as_f64()) { - if numf >= ex_maxf { - self.add_error(kind!(ExclusiveMaximum, Cow::Borrowed(num), ex_max)); - } - } - } - - // multipleOf -- - if let Some(mul) = &s.multiple_of { - if let (Some(mulf), Some(numf)) = (mul.as_f64(), num.as_f64()) { - if (numf / mulf).fract() != 0.0 { - self.add_error(kind!(MultipleOf, Cow::Borrowed(num), mul)); - } - } - } + // maxLength -- + if let Some(max) = s.max_length { + let len = len.get_or_insert_with(|| str.chars().count()); + if *len > max { + self.add_error(kind!(MaxLength, *len, max)); + } } + + // pattern -- + if let Some(regex) = &s.pattern { + if !regex.is_match(str) { + self.add_error(kind!(Pattern, str.into(), regex.as_str())); + } + } + + if s.draft_version == 6 { + return; + } + + // contentEncoding -- + let mut decoded = Some(Cow::from(str.as_bytes())); + if let Some(decoder) = &s.content_encoding { + match (decoder.func)(str) { + Ok(bytes) => decoded = Some(Cow::from(bytes)), + Err(err) => { + decoded = None; + self.add_error(ErrorKind::ContentEncoding { + want: decoder.name, + err, + }) + } + } + } + + // contentMediaType -- + let mut deserialized = None; + if let (Some(mt), Some(decoded)) = (&s.content_media_type, decoded) { + match (mt.func)(decoded.as_ref(), s.content_schema.is_some()) { + Ok(des) => deserialized = des, + Err(e) => { + self.add_error(kind!(ContentMediaType, decoded.into(), mt.name, e)); + } + } + } + + // contentSchema -- + if let (Some(sch), Some(v)) = (s.content_schema, deserialized) { + if let Err(mut e) = self.schemas.validate(&v, sch, self.options) { + e.schema_url = &s.loc; + e.kind = kind!(ContentSchema); + self.errors.push(e.clone_static()); + } + } + } + + fn num_validate(&mut self, num: &'v Number) { + let s = self.schema; + + // minimum -- + if let Some(min) = &s.minimum { + if let (Some(minf), Some(numf)) = (min.as_f64(), num.as_f64()) { + if numf < minf { + self.add_error(kind!(Minimum, Cow::Borrowed(num), min)); + } + } + } + + // maximum -- + if let Some(max) = &s.maximum { + if let (Some(maxf), Some(numf)) = (max.as_f64(), num.as_f64()) { + if numf > maxf { + self.add_error(kind!(Maximum, Cow::Borrowed(num), max)); + } + } + } + + // exclusiveMinimum -- + if let Some(ex_min) = &s.exclusive_minimum { + if let (Some(ex_minf), Some(numf)) = (ex_min.as_f64(), num.as_f64()) { + if numf <= ex_minf { + self.add_error(kind!(ExclusiveMinimum, Cow::Borrowed(num), ex_min)); + } + } + } + + // exclusiveMaximum -- + if let Some(ex_max) = &s.exclusive_maximum { + if let (Some(ex_maxf), Some(numf)) = (ex_max.as_f64(), num.as_f64()) { + if numf >= ex_maxf { + self.add_error(kind!(ExclusiveMaximum, Cow::Borrowed(num), ex_max)); + } + } + } + + // multipleOf -- + if let Some(mul) = &s.multiple_of { + if let (Some(mulf), Some(numf)) = (mul.as_f64(), num.as_f64()) { + if (numf / mulf).fract() != 0.0 { + self.add_error(kind!(MultipleOf, Cow::Borrowed(num), mul)); + } + } + } + } } // references validation impl<'v, 's> Validator<'v, 's, '_, '_> { - fn refs_validate(&mut self) { - let s = self.schema; - macro_rules! add_err { - ($result:expr) => { - if let Err(e) = $result { - self.errors.push(e); - } - }; - } - - // $recursiveRef -- - if let Some(mut sch) = s.recursive_ref { - if self.schemas.get(sch).recursive_anchor { - sch = self.resolve_recursive_anchor(sch); - } - add_err!(self.validate_ref(sch, "$recursiveRef")); - } - - // $dynamicRef -- - if let Some(dref) = &s.dynamic_ref { - let mut sch = dref.sch; // initial target - if let Some(anchor) = &dref.anchor { - // $dynamicRef includes anchor - if self.schemas.get(sch).dynamic_anchor == dref.anchor { - // initial target has matching $dynamicAnchor - sch = self.resolve_dynamic_anchor(anchor, sch); - } - } - add_err!(self.validate_ref(sch, "$dynamicRef")); + fn refs_validate(&mut self) { + let s = self.schema; + macro_rules! add_err { + ($result:expr) => { + if let Err(e) = $result { + self.errors.push(e); } + }; } - fn validate_ref( - &mut self, - sch: SchemaIndex, - kw: &'static str, - ) -> Result<(), ValidationError<'s, 'v>> { - if let Err(err) = self._validate_self(sch, kw.into(), false) { - let url = &self.schemas.get(sch).loc; - let mut ref_err = self.error(ErrorKind::Reference { kw, url }); - if let ErrorKind::Group = err.kind { - ref_err.causes = err.causes; - } else { - ref_err.causes.push(err); - } - return Err(ref_err); - } - Ok(()) + // $recursiveRef -- + if let Some(mut sch) = s.recursive_ref { + if self.schemas.get(sch).recursive_anchor { + sch = self.resolve_recursive_anchor(sch); + } + add_err!(self.validate_ref(sch, "$recursiveRef")); } - fn resolve_recursive_anchor(&self, fallback: SchemaIndex) -> SchemaIndex { - let mut sch = fallback; - let mut scope = &self.scope; - loop { - let scope_sch = self.schemas.get(scope.sch); - let base_sch = self.schemas.get(scope_sch.resource); - if base_sch.recursive_anchor { - sch = scope.sch - } - if let Some(parent) = scope.parent { - scope = parent; - } else { - return sch; - } + // $dynamicRef -- + if let Some(dref) = &s.dynamic_ref { + let mut sch = dref.sch; // initial target + if let Some(anchor) = &dref.anchor { + // $dynamicRef includes anchor + if self.schemas.get(sch).dynamic_anchor == dref.anchor { + // initial target has matching $dynamicAnchor + sch = self.resolve_dynamic_anchor(anchor, sch); } + } + add_err!(self.validate_ref(sch, "$dynamicRef")); } + } - fn resolve_dynamic_anchor(&self, name: &String, fallback: SchemaIndex) -> SchemaIndex { - let mut sch = fallback; - let mut scope = &self.scope; - loop { - let scope_sch = self.schemas.get(scope.sch); - let base_sch = self.schemas.get(scope_sch.resource); - debug_assert_eq!(base_sch.idx, base_sch.resource); - if let Some(dsch) = base_sch.dynamic_anchors.get(name) { - sch = *dsch - } - if let Some(parent) = scope.parent { - scope = parent; - } else { - return sch; - } - } + fn validate_ref( + &mut self, + sch: SchemaIndex, + kw: &'static str, + ) -> Result<(), ValidationError<'s, 'v>> { + if let Err(err) = self._validate_self(sch, kw.into(), false) { + let url = &self.schemas.get(sch).loc; + let mut ref_err = self.error(ErrorKind::Reference { kw, url }); + if let ErrorKind::Group = err.kind { + ref_err.causes = err.causes; + } else { + ref_err.causes.push(err); + } + return Err(ref_err); } + Ok(()) + } + + fn resolve_recursive_anchor(&self, fallback: SchemaIndex) -> SchemaIndex { + let mut sch = fallback; + let mut scope = &self.scope; + loop { + let scope_sch = self.schemas.get(scope.sch); + let base_sch = self.schemas.get(scope_sch.resource); + if base_sch.recursive_anchor { + sch = scope.sch + } + if let Some(parent) = scope.parent { + scope = parent; + } else { + return sch; + } + } + } + + fn resolve_dynamic_anchor(&self, name: &String, fallback: SchemaIndex) -> SchemaIndex { + let mut sch = fallback; + let mut scope = &self.scope; + loop { + let scope_sch = self.schemas.get(scope.sch); + let base_sch = self.schemas.get(scope_sch.resource); + debug_assert_eq!(base_sch.idx, base_sch.resource); + if let Some(dsch) = base_sch.dynamic_anchors.get(name) { + sch = *dsch + } + if let Some(parent) = scope.parent { + scope = parent; + } else { + return sch; + } + } + } } // conditional validation impl Validator<'_, '_, '_, '_> { - fn cond_validate(&mut self) { - let s = self.schema; - macro_rules! add_err { - ($result:expr) => { - if let Err(e) = $result { - self.errors.push(e); - } - }; - } - - // not -- - if let Some(not) = s.not { - if self._validate_self(not, None, true).is_ok() { - self.add_error(kind!(Not)); - } - } - - // allOf -- - if !s.all_of.is_empty() { - let mut errors = vec![]; - for sch in &s.all_of { - if let Err(e) = self.validate_self(*sch) { - errors.push(e); - if self.bool_result { - break; - } - } - } - if !errors.is_empty() { - self.add_errors(errors, kind!(AllOf)); - } - } - - // anyOf -- - if !s.any_of.is_empty() { - let mut matched = false; - let mut errors = vec![]; - for sch in &s.any_of { - match self.validate_self(*sch) { - Ok(_) => { - matched = true; - // for uneval, all schemas must be checked - if self.uneval.is_empty() { - break; - } - } - Err(e) => errors.push(e), - } - } - if !matched { - self.add_errors(errors, kind!(AnyOf)); - } - } - - // oneOf -- - if !s.one_of.is_empty() { - let mut matched = None; - let mut errors = vec![]; - for (i, sch) in s.one_of.iter().enumerate() { - if let Err(e) = self._validate_self(*sch, None, matched.is_some()) { - if matched.is_none() { - errors.push(e); - } - } else { - match matched { - None => _ = matched.replace(i), - Some(prev) => { - self.add_error(ErrorKind::OneOf(Some((prev, i)))); - break; - } - } - } - } - if matched.is_none() { - self.add_errors(errors, ErrorKind::OneOf(None)); - } - } - - // if, then, else -- - if let Some(if_) = s.if_ { - if self._validate_self(if_, None, true).is_ok() { - if let Some(then) = s.then { - add_err!(self.validate_self(then)); - } - } else if let Some(else_) = s.else_ { - add_err!(self.validate_self(else_)); - } + fn cond_validate(&mut self) { + let s = self.schema; + macro_rules! add_err { + ($result:expr) => { + if let Err(e) = $result { + self.errors.push(e); } + }; } + + // not -- + if let Some(not) = s.not { + if self._validate_self(not, None, true).is_ok() { + self.add_error(kind!(Not)); + } + } + + // allOf -- + if !s.all_of.is_empty() { + let mut errors = vec![]; + for sch in &s.all_of { + if let Err(e) = self.validate_self(*sch) { + errors.push(e); + if self.bool_result { + break; + } + } + } + if !errors.is_empty() { + self.add_errors(errors, kind!(AllOf)); + } + } + + // anyOf -- + if !s.any_of.is_empty() { + let mut matched = false; + let mut errors = vec![]; + for sch in &s.any_of { + match self.validate_self(*sch) { + Ok(_) => { + matched = true; + // for uneval, all schemas must be checked + if self.uneval.is_empty() { + break; + } + } + Err(e) => errors.push(e), + } + } + if !matched { + self.add_errors(errors, kind!(AnyOf)); + } + } + + // oneOf -- + if !s.one_of.is_empty() { + let mut matched = None; + let mut errors = vec![]; + for (i, sch) in s.one_of.iter().enumerate() { + if let Err(e) = self._validate_self(*sch, None, matched.is_some()) { + if matched.is_none() { + errors.push(e); + } + } else { + match matched { + None => _ = matched.replace(i), + Some(prev) => { + self.add_error(ErrorKind::OneOf(Some((prev, i)))); + break; + } + } + } + } + if matched.is_none() { + self.add_errors(errors, ErrorKind::OneOf(None)); + } + } + + // if, then, else -- + if let Some(if_) = s.if_ { + if self._validate_self(if_, None, true).is_ok() { + if let Some(then) = s.then { + add_err!(self.validate_self(then)); + } + } else if let Some(else_) = s.else_ { + add_err!(self.validate_self(else_)); + } + } + } } // uneval validation impl Validator<'_, '_, '_, '_> { - fn uneval_validate(&mut self) { - let s = self.schema; - let v = self.v; - macro_rules! add_err { - ($result:expr) => { - if let Err(e) = $result { - self.errors.push(e); - } - }; - } - - // unevaluatedProperties -- - if let (Some(sch), Value::Object(obj)) = (s.unevaluated_properties, v) { - let uneval = std::mem::take(&mut self.uneval); - for pname in &uneval.props { - if let Some(pvalue) = obj.get(*pname) { - add_err!(self.validate_val(sch, pvalue, prop!(pname))); - } - } - self.uneval.props.clear(); - } - - // unevaluatedItems -- - if let (Some(sch), Value::Array(arr)) = (s.unevaluated_items, v) { - let uneval = std::mem::take(&mut self.uneval); - for i in &uneval.items { - if let Some(pvalue) = arr.get(*i) { - add_err!(self.validate_val(sch, pvalue, item!(*i))); - } - } - self.uneval.items.clear(); + fn uneval_validate(&mut self) { + let s = self.schema; + let v = self.v; + macro_rules! add_err { + ($result:expr) => { + if let Err(e) = $result { + self.errors.push(e); } + }; } + + let be_strict = self.options.map_or(false, |o| o.be_strict); + + // unevaluatedProperties -- + if let Value::Object(obj) = v { + if let Some(sch_idx) = s.unevaluated_properties { + let sch = self.schemas.get(sch_idx); + if sch.boolean == Some(false) { + // This is `unevaluatedProperties: false`, treat as additional properties + if !self.uneval.props.is_empty() { + let props: Vec> = + self.uneval.props.iter().map(|p| Cow::from((*p).as_str())).collect(); + self.add_error(ErrorKind::AdditionalProperties { got: props }); + } + self.uneval.props.clear(); + } else { + // It's a schema, validate against it + let uneval = std::mem::take(&mut self.uneval); + for pname in &uneval.props { + if let Some(pvalue) = obj.get(*pname) { + add_err!(self.validate_val(sch_idx, pvalue, prop!(pname))); + } + } + self.uneval.props.clear(); + } + } else if be_strict && !self.bool_result { + // 2. Runtime strictness check + if !self.uneval.props.is_empty() { + let props: Vec> = self.uneval.props.iter().map(|p| Cow::from((*p).as_str())).collect(); + self.add_error(ErrorKind::AdditionalProperties { got: props }); + } + self.uneval.props.clear(); + } + } + + // unevaluatedItems -- + if let (Some(sch), Value::Array(arr)) = (s.unevaluated_items, v) { + let uneval = std::mem::take(&mut self.uneval); + for i in &uneval.items { + if let Some(pvalue) = arr.get(*i) { + add_err!(self.validate_val(sch, pvalue, item!(*i))); + } + } + self.uneval.items.clear(); + } + } } // validation helpers impl<'v, 's> Validator<'v, 's, '_, '_> { - fn validate_val( - &mut self, - sch: SchemaIndex, - v: &'v Value, - token: InstanceToken<'v>, - ) -> Result<(), ValidationError<'s, 'v>> { - if self.vloc.len() == self.scope.vid { - self.vloc.push(token); - } else { - self.vloc[self.scope.vid] = token; - } - let scope = self.scope.child(sch, None, self.scope.vid + 1); - let schema = &self.schemas.get(sch); - Validator { - v, - vloc: self.vloc, - schema, - schemas: self.schemas, - scope, - uneval: Uneval::from(v, schema, false), - errors: vec![], - bool_result: self.bool_result, - } - .validate() - .0 + fn validate_val( + &mut self, + sch: SchemaIndex, + v: &'v Value, + token: InstanceToken<'v>, + ) -> Result<(), ValidationError<'s, 'v>> { + if self.vloc.len() == self.scope.vid { + self.vloc.push(token); + } else { + self.vloc[self.scope.vid] = token; } + let scope = self.scope.child(sch, None, self.scope.vid + 1); + let schema = &self.schemas.get(sch); + let be_strict = self.options.map_or(false, |o| o.be_strict); + let (result, _reply) = Validator { + v, + vloc: self.vloc, + schema, + schemas: self.schemas, + scope, + options: self.options, + uneval: Uneval::from(v, schema, be_strict || !self.uneval.is_empty()), + errors: vec![], + bool_result: self.bool_result, + } + .validate(); + // self.uneval.merge(&reply, None); // DO NOT MERGE, see https://github.com/santhosh-tekuri/boon/issues/33 + result + } - fn _validate_self( - &mut self, - sch: SchemaIndex, - ref_kw: Option<&'static str>, - bool_result: bool, - ) -> Result<(), ValidationError<'s, 'v>> { - let scope = self.scope.child(sch, ref_kw, self.scope.vid); - let schema = &self.schemas.get(sch); - let (result, reply) = Validator { - v: self.v, - vloc: self.vloc, - schema, - schemas: self.schemas, - scope, - uneval: Uneval::from(self.v, schema, !self.uneval.is_empty()), - errors: vec![], - bool_result: self.bool_result || bool_result, - } - .validate(); - self.uneval.merge(&reply); - result + fn _validate_self( + &mut self, + sch: SchemaIndex, + ref_kw: Option<&'static str>, + bool_result: bool, + ) -> Result<(), ValidationError<'s, 'v>> { + let scope = self.scope.child(sch, ref_kw, self.scope.vid); + let schema = &self.schemas.get(sch); + let be_strict = self.options.map_or(false, |o| o.be_strict); + let (result, reply) = Validator { + v: self.v, + vloc: self.vloc, + schema, + schemas: self.schemas, + scope, + options: self.options, + uneval: self.uneval.clone(), + errors: vec![], + bool_result: self.bool_result || bool_result, } + .validate(); + self.uneval.merge(&reply, ref_kw); + result + } - #[inline(always)] - fn validate_self(&mut self, sch: SchemaIndex) -> Result<(), ValidationError<'s, 'v>> { - self._validate_self(sch, None, false) - } + #[inline(always)] + fn validate_self(&mut self, sch: SchemaIndex) -> Result<(), ValidationError<'s, 'v>> { + self._validate_self(sch, None, false) + } } // error helpers impl<'v, 's> Validator<'v, 's, '_, '_> { - #[inline(always)] - fn error(&self, kind: ErrorKind<'s, 'v>) -> ValidationError<'s, 'v> { - if self.bool_result { - return ValidationError { - schema_url: &self.schema.loc, - instance_location: InstanceLocation::new(), - kind: ErrorKind::Group, - causes: vec![], - }; - } - ValidationError { - schema_url: &self.schema.loc, - instance_location: self.instance_location(), - kind, - causes: vec![], - } + #[inline(always)] + fn error(&self, kind: ErrorKind<'s, 'v>) -> ValidationError<'s, 'v> { + if self.bool_result { + return ValidationError { + schema_url: &self.schema.loc, + instance_location: InstanceLocation::new(), + kind: ErrorKind::Group, + causes: vec![], + }; } + ValidationError { + schema_url: &self.schema.loc, + instance_location: self.instance_location(), + kind, + causes: vec![], + } + } - #[inline(always)] - fn add_error(&mut self, kind: ErrorKind<'s, 'v>) { - self.errors.push(self.error(kind)); - } + #[inline(always)] + fn add_error(&mut self, kind: ErrorKind<'s, 'v>) { + self.errors.push(self.error(kind)); + } - #[inline(always)] - fn add_errors(&mut self, errors: Vec>, kind: ErrorKind<'s, 'v>) { - if errors.len() == 1 { - self.errors.extend(errors); - } else { - let mut err = self.error(kind); - err.causes = errors; - self.errors.push(err); - } + #[inline(always)] + fn add_errors(&mut self, errors: Vec>, kind: ErrorKind<'s, 'v>) { + if errors.len() == 1 { + self.errors.extend(errors); + } else { + let mut err = self.error(kind); + err.causes = errors; + self.errors.push(err); } + } - fn kw_loc(&self, mut scope: &Scope) -> String { - let mut loc = String::new(); - while let Some(parent) = scope.parent { - if let Some(kw) = scope.ref_kw { - loc.insert_str(0, kw); - loc.insert(0, '/'); - } else { - let cur = &self.schemas.get(scope.sch).loc; - let parent = &self.schemas.get(parent.sch).loc; - loc.insert_str(0, &cur[parent.len()..]); - } - scope = parent; - } - loc + fn kw_loc(&self, mut scope: &Scope) -> String { + let mut loc = String::new(); + while let Some(parent) = scope.parent { + if let Some(kw) = scope.ref_kw { + loc.insert_str(0, kw); + loc.insert(0, '/'); + } else { + let cur = &self.schemas.get(scope.sch).loc; + let parent = &self.schemas.get(parent.sch).loc; + loc.insert_str(0, &cur[parent.len()..]); + } + scope = parent; } + loc + } - fn find_missing( - &self, - obj: &'v Map, - required: &'s [String], - ) -> Option> { - let mut missing = required - .iter() - .filter(|p| !obj.contains_key(p.as_str())) - .map(|p| p.as_str()); - if self.bool_result { - missing.next().map(|_| Vec::new()) - } else { - let missing = missing.collect::>(); - if missing.is_empty() { - None - } else { - Some(missing) - } - } + fn find_missing( + &self, + obj: &'v Map, + required: &'s [String], + ) -> Option> { + let mut missing = required + .iter() + .filter(|p| !obj.contains_key(p.as_str())) + .map(|p| p.as_str()); + if self.bool_result { + missing.next().map(|_| Vec::new()) + } else { + let missing = missing.collect::>(); + if missing.is_empty() { + None + } else { + Some(missing) + } } + } - fn instance_location(&self) -> InstanceLocation<'v> { - let len = self.scope.vid; - let mut tokens = Vec::with_capacity(len); - for tok in &self.vloc[..len] { - tokens.push(tok.clone()); - } - InstanceLocation { tokens } + fn instance_location(&self) -> InstanceLocation<'v> { + let len = self.scope.vid; + let mut tokens = Vec::with_capacity(len); + for tok in &self.vloc[..len] { + tokens.push(tok.clone()); } + InstanceLocation { tokens } + } } // Uneval -- -#[derive(Default)] +#[derive(Default, Clone)] struct Uneval<'v> { - props: HashSet<&'v String>, - items: HashSet, + props: HashSet<&'v String>, + items: HashSet, } impl<'v> Uneval<'v> { - fn is_empty(&self) -> bool { - self.props.is_empty() && self.items.is_empty() - } + fn is_empty(&self) -> bool { + self.props.is_empty() && self.items.is_empty() + } - fn from(v: &'v Value, sch: &Schema, caller_needs: bool) -> Self { - let mut uneval = Self::default(); - match v { - Value::Object(obj) => { - if !sch.all_props_evaluated - && (caller_needs || sch.unevaluated_properties.is_some()) - { - uneval.props = obj.keys().collect(); - } - } - Value::Array(arr) => { - if !sch.all_items_evaluated - && (caller_needs || sch.unevaluated_items.is_some()) - && sch.num_items_evaluated < arr.len() - { - uneval.items = (sch.num_items_evaluated..arr.len()).collect(); - } - } - _ => (), + fn from(v: &'v Value, sch: &Schema, caller_needs: bool) -> Self { + let mut uneval = Self::default(); + match v { + Value::Object(obj) => { + if caller_needs || sch.unevaluated_properties.is_some() || !sch.all_props_evaluated { + uneval.props = obj.keys().collect(); } - uneval + } + Value::Array(arr) => { + if !sch.all_items_evaluated + && (caller_needs || sch.unevaluated_items.is_some()) + && sch.num_items_evaluated < arr.len() + { + uneval.items = (sch.num_items_evaluated..arr.len()).collect(); + } + } + _ => (), } + uneval + } - fn merge(&mut self, other: &Uneval) { - self.props.retain(|p| other.props.contains(p)); - self.items.retain(|i| other.items.contains(i)); - } + fn merge(&mut self, other: &Uneval<'v>, _ref_kw: Option<&'static str>) { + self.props.retain(|p| other.props.contains(p)); + self.items.retain(|i| other.items.contains(i)); + } } // Scope --- #[derive(Debug)] struct Scope<'a> { - sch: SchemaIndex, - // if None, compute from self.sch and self.parent.sh - // not None only when there is jump i.e $ref, $XXXRef - ref_kw: Option<&'static str>, - /// unique id of value being validated - // if two scope validate same value, they will have same vid - vid: usize, - parent: Option<&'a Scope<'a>>, + sch: SchemaIndex, + // if None, compute from self.sch and self.parent.sh + // not None only when there is jump i.e $ref, $XXXRef + ref_kw: Option<&'static str>, + /// unique id of value being validated + // if two scope validate same value, they will have same vid + vid: usize, + parent: Option<&'a Scope<'a>>, } impl Scope<'_> { - fn child<'x>( - &'x self, - sch: SchemaIndex, - ref_kw: Option<&'static str>, - vid: usize, - ) -> Scope<'x> { - Scope { - sch, - ref_kw, - vid, - parent: Some(self), - } + fn child<'x>( + &'x self, + sch: SchemaIndex, + ref_kw: Option<&'static str>, + vid: usize, + ) -> Scope<'x> { + Scope { + sch, + ref_kw, + vid, + parent: Some(self), } + } - fn check_cycle(&self) -> Option<&Scope<'_>> { - let mut scope = self.parent; - while let Some(scp) = scope { - if scp.vid != self.vid { - break; - } - if scp.sch == self.sch { - return Some(scp); - } - scope = scp.parent; - } - None + fn check_cycle(&self) -> Option<&Scope<'_>> { + let mut scope = self.parent; + while let Some(scp) = scope { + if scp.vid != self.vid { + break; + } + if scp.sch == self.sch { + return Some(scp); + } + scope = scp.parent; } + None + } } /// Token in InstanceLocation json-pointer. #[derive(Debug, Clone)] pub enum InstanceToken<'v> { - /// Token for property. - Prop(Cow<'v, str>), - /// Token for array item. - Item(usize), + /// Token for property. + Prop(Cow<'v, str>), + /// Token for array item. + Item(usize), } impl From for InstanceToken<'_> { - fn from(prop: String) -> Self { - InstanceToken::Prop(prop.into()) - } + fn from(prop: String) -> Self { + InstanceToken::Prop(prop.into()) + } } impl<'v> From<&'v str> for InstanceToken<'v> { - fn from(prop: &'v str) -> Self { - InstanceToken::Prop(prop.into()) - } + fn from(prop: &'v str) -> Self { + InstanceToken::Prop(prop.into()) + } } impl From for InstanceToken<'_> { - fn from(index: usize) -> Self { - InstanceToken::Item(index) - } + fn from(index: usize) -> Self { + InstanceToken::Item(index) + } } /// The location of the JSON value within the instance being validated #[derive(Debug, Default)] pub struct InstanceLocation<'v> { - pub tokens: Vec>, + pub tokens: Vec>, } impl InstanceLocation<'_> { - fn new() -> Self { - Self::default() - } + fn new() -> Self { + Self::default() + } - fn clone_static(self) -> InstanceLocation<'static> { - let mut tokens = Vec::with_capacity(self.tokens.len()); - for tok in self.tokens { - let tok = match tok { - InstanceToken::Prop(p) => InstanceToken::Prop(p.into_owned().into()), - InstanceToken::Item(i) => InstanceToken::Item(i), - }; - tokens.push(tok); - } - InstanceLocation { tokens } + fn clone_static(self) -> InstanceLocation<'static> { + let mut tokens = Vec::with_capacity(self.tokens.len()); + for tok in self.tokens { + let tok = match tok { + InstanceToken::Prop(p) => InstanceToken::Prop(p.into_owned().into()), + InstanceToken::Item(i) => InstanceToken::Item(i), + }; + tokens.push(tok); } + InstanceLocation { tokens } + } } impl Display for InstanceLocation<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for tok in &self.tokens { - f.write_char('/')?; - match tok { - InstanceToken::Prop(s) => f.write_str(&escape(s))?, - InstanceToken::Item(i) => write!(f, "{i}")?, - } - } - Ok(()) + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for tok in &self.tokens { + f.write_char('/')?; + match tok { + InstanceToken::Prop(s) => f.write_str(&escape(s))?, + InstanceToken::Item(i) => write!(f, "{i}")?, + } } + Ok(()) + } } impl<'s> ValidationError<'s, '_> { - pub(crate) fn clone_static(self) -> ValidationError<'s, 'static> { - let mut causes = Vec::with_capacity(self.causes.len()); - for cause in self.causes { - causes.push(cause.clone_static()); - } - ValidationError { - instance_location: self.instance_location.clone_static(), - kind: self.kind.clone_static(), - causes, - ..self - } + pub(crate) fn clone_static(self) -> ValidationError<'s, 'static> { + let mut causes = Vec::with_capacity(self.causes.len()); + for cause in self.causes { + causes.push(cause.clone_static()); } + ValidationError { + instance_location: self.instance_location.clone_static(), + kind: self.kind.clone_static(), + causes, + ..self + } + } } impl<'s> ErrorKind<'s, '_> { - fn clone_static(self) -> ErrorKind<'s, 'static> { - use ErrorKind::*; - match self { - AdditionalProperties { got } => AdditionalProperties { - got: got.into_iter().map(|e| e.into_owned().into()).collect(), - }, - Format { got, want, err } => Format { - got: Cow::Owned(got.into_owned()), - want, - err, - }, - Pattern { got, want } => Pattern { - got: got.into_owned().into(), - want, - }, - Minimum { got, want } => Minimum { - got: Cow::Owned(got.into_owned()), - want, - }, - Maximum { got, want } => Maximum { - got: Cow::Owned(got.into_owned()), - want, - }, - ExclusiveMinimum { got, want } => ExclusiveMinimum { - got: Cow::Owned(got.into_owned()), - want, - }, - ExclusiveMaximum { got, want } => ExclusiveMaximum { - got: Cow::Owned(got.into_owned()), - want, - }, - MultipleOf { got, want } => MultipleOf { - got: Cow::Owned(got.into_owned()), - want, - }, - // #[cfg(not(debug_assertions))] - // _ => unsafe { std::mem::transmute(self) }, - Group => Group, - Schema { url } => Schema { url }, - ContentSchema => ContentSchema, - PropertyName { prop } => PropertyName { prop }, - Reference { kw, url } => Reference { kw, url }, - RefCycle { - url, - kw_loc1, - kw_loc2, - } => RefCycle { - url, - kw_loc1, - kw_loc2, - }, - FalseSchema => FalseSchema, - Type { got, want } => Type { got, want }, - Enum { want } => Enum { want }, - Const { want } => Const { want }, - MinProperties { got, want } => MinProperties { got, want }, - MaxProperties { got, want } => MaxProperties { got, want }, - Required { want } => Required { want }, - Dependency { prop, missing } => Dependency { prop, missing }, - DependentRequired { prop, missing } => DependentRequired { prop, missing }, - MinItems { got, want } => MinItems { got, want }, - MaxItems { got, want } => MaxItems { got, want }, - Contains => Contains, - MinContains { got, want } => MinContains { got, want }, - MaxContains { got, want } => MaxContains { got, want }, - UniqueItems { got } => UniqueItems { got }, - AdditionalItems { got } => AdditionalItems { got }, - MinLength { got, want } => MinLength { got, want }, - MaxLength { got, want } => MaxLength { got, want }, - ContentEncoding { want, err } => ContentEncoding { want, err }, - ContentMediaType { got, want, err } => ContentMediaType { got, want, err }, - Not => Not, - AllOf => AllOf, - AnyOf => AnyOf, - OneOf(opt) => OneOf(opt), - } + fn clone_static(self) -> ErrorKind<'s, 'static> { + use ErrorKind::*; + match self { + AdditionalProperties { got } => AdditionalProperties { + got: got.into_iter().map(|e| e.into_owned().into()).collect(), + }, + Format { got, want, err } => Format { + got: Cow::Owned(got.into_owned()), + want, + err, + }, + Pattern { got, want } => Pattern { + got: got.into_owned().into(), + want, + }, + Minimum { got, want } => Minimum { + got: Cow::Owned(got.into_owned()), + want, + }, + Maximum { got, want } => Maximum { + got: Cow::Owned(got.into_owned()), + want, + }, + ExclusiveMinimum { got, want } => ExclusiveMinimum { + got: Cow::Owned(got.into_owned()), + want, + }, + ExclusiveMaximum { got, want } => ExclusiveMaximum { + got: Cow::Owned(got.into_owned()), + want, + }, + MultipleOf { got, want } => MultipleOf { + got: Cow::Owned(got.into_owned()), + want, + }, + // #[cfg(not(debug_assertions))] + // _ => unsafe { std::mem::transmute(self) }, + Group => Group, + Schema { url } => Schema { url }, + ContentSchema => ContentSchema, + PropertyName { prop } => PropertyName { prop }, + Reference { kw, url } => Reference { kw, url }, + RefCycle { + url, + kw_loc1, + kw_loc2, + } => RefCycle { + url, + kw_loc1, + kw_loc2, + }, + FalseSchema => FalseSchema, + Type { got, want } => Type { got, want }, + Enum { want } => Enum { want }, + Const { want } => Const { want }, + MinProperties { got, want } => MinProperties { got, want }, + MaxProperties { got, want } => MaxProperties { got, want }, + Required { want } => Required { want }, + Dependency { prop, missing } => Dependency { prop, missing }, + DependentRequired { prop, missing } => DependentRequired { prop, missing }, + MinItems { got, want } => MinItems { got, want }, + MaxItems { got, want } => MaxItems { got, want }, + Contains => Contains, + MinContains { got, want } => MinContains { got, want }, + MaxContains { got, want } => MaxContains { got, want }, + UniqueItems { got } => UniqueItems { got }, + AdditionalItems { got } => AdditionalItems { got }, + MinLength { got, want } => MinLength { got, want }, + MaxLength { got, want } => MaxLength { got, want }, + ContentEncoding { want, err } => ContentEncoding { want, err }, + ContentMediaType { got, want, err } => ContentMediaType { got, want, err }, + Not => Not, + AllOf => AllOf, + AnyOf => AnyOf, + OneOf(opt) => OneOf(opt), } + } } diff --git a/validator/tests/compiler.rs b/validator/tests/compiler.rs index d657a93..35de0ac 100644 --- a/validator/tests/compiler.rs +++ b/validator/tests/compiler.rs @@ -5,83 +5,83 @@ use serde_json::json; #[test] fn test_metaschema_resource() -> Result<(), Box> { - let main_schema = json!({ - "$schema": "http://tmp.com/meta.json", - "type": "number" - }); - let meta_schema = json!({ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$vocabulary": { - "https://json-schema.org/draft/2020-12/vocab/applicator": true, - "https://json-schema.org/draft/2020-12/vocab/core": true - }, - "allOf": [ - { "$ref": "https://json-schema.org/draft/2020-12/meta/applicator" }, - { "$ref": "https://json-schema.org/draft/2020-12/meta/core" } - ] - }); + let main_schema = json!({ + "$schema": "http://tmp.com/meta.json", + "type": "number" + }); + let meta_schema = json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$vocabulary": { + "https://json-schema.org/draft/2020-12/vocab/applicator": true, + "https://json-schema.org/draft/2020-12/vocab/core": true + }, + "allOf": [ + { "$ref": "https://json-schema.org/draft/2020-12/meta/applicator" }, + { "$ref": "https://json-schema.org/draft/2020-12/meta/core" } + ] + }); - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.add_resource("schema.json", main_schema)?; - compiler.add_resource("http://tmp.com/meta.json", meta_schema)?; - compiler.compile("schema.json", &mut schemas)?; + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.add_resource("schema.json", main_schema)?; + compiler.add_resource("http://tmp.com/meta.json", meta_schema)?; + compiler.compile("schema.json", &mut schemas)?; - Ok(()) + Ok(()) } #[test] fn test_compile_anchor() -> Result<(), Box> { - let schema = json!({ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$defs": { - "x": { - "$anchor": "a1", - "type": "number" - } - } - }); + let schema = json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": { + "x": { + "$anchor": "a1", + "type": "number" + } + } + }); - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.add_resource("schema.json", schema)?; - let sch_index1 = compiler.compile("schema.json#a1", &mut schemas)?; - let sch_index2 = compiler.compile("schema.json#/$defs/x", &mut schemas)?; - assert_eq!(sch_index1, sch_index2); + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.add_resource("schema.json", schema)?; + let sch_index1 = compiler.compile("schema.json#a1", &mut schemas)?; + let sch_index2 = compiler.compile("schema.json#/$defs/x", &mut schemas)?; + assert_eq!(sch_index1, sch_index2); - Ok(()) + Ok(()) } #[test] fn test_compile_nonstd() -> Result<(), Box> { - let schema = json!({ - "components": { - "schemas": { - "foo" : { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$defs": { - "x": { - "$anchor": "a", - "type": "number" - }, - "y": { - "$id": "http://temp.com/y", - "type": "string" - } - }, - "oneOf": [ - { "$ref": "#a" }, - { "$ref": "http://temp.com/y" } - ] - } + let schema = json!({ + "components": { + "schemas": { + "foo" : { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": { + "x": { + "$anchor": "a", + "type": "number" + }, + "y": { + "$id": "http://temp.com/y", + "type": "string" } + }, + "oneOf": [ + { "$ref": "#a" }, + { "$ref": "http://temp.com/y" } + ] } - }); + } + } + }); - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.add_resource("schema.json", schema)?; - compiler.compile("schema.json#/components/schemas/foo", &mut schemas)?; + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.add_resource("schema.json", schema)?; + compiler.compile("schema.json#/components/schemas/foo", &mut schemas)?; - Ok(()) + Ok(()) } diff --git a/validator/tests/debug.rs b/validator/tests/debug.rs index 32b9914..c9cd8f7 100644 --- a/validator/tests/debug.rs +++ b/validator/tests/debug.rs @@ -5,37 +5,37 @@ use serde_json::{Map, Value}; #[test] fn test_debug() -> Result<(), Box> { - let test: Value = serde_json::from_reader(File::open("tests/debug.json")?)?; - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.enable_format_assertions(); - compiler.enable_content_assertions(); - let remotes = Remotes(test["remotes"].as_object().unwrap().clone()); - compiler.use_loader(Box::new(remotes)); - let url = "http://debug.com/schema.json"; - compiler.add_resource(url, test["schema"].clone())?; - let sch = compiler.compile(url, &mut schemas)?; - let result = schemas.validate(&test["data"], sch); - if let Err(e) = &result { - for line in format!("{e}").lines() { - println!(" {line}"); - } - for line in format!("{e:#}").lines() { - println!(" {line}"); - } - println!("{:#}", e.detailed_output()); + let test: Value = serde_json::from_reader(File::open("tests/debug.json")?)?; + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.enable_format_assertions(); + compiler.enable_content_assertions(); + let remotes = Remotes(test["remotes"].as_object().unwrap().clone()); + compiler.use_loader(Box::new(remotes)); + let url = "http://debug.com/schema.json"; + compiler.add_resource(url, test["schema"].clone())?; + let sch = compiler.compile(url, &mut schemas)?; + let result = schemas.validate(&test["data"], sch); + if let Err(e) = &result { + for line in format!("{e}").lines() { + println!(" {line}"); } - assert_eq!(result.is_ok(), test["valid"].as_bool().unwrap()); - Ok(()) + for line in format!("{e:#}").lines() { + println!(" {line}"); + } + println!("{:#}", e.detailed_output()); + } + assert_eq!(result.is_ok(), test["valid"].as_bool().unwrap()); + Ok(()) } struct Remotes(Map); impl UrlLoader for Remotes { - fn load(&self, url: &str) -> Result> { - if let Some(v) = self.0.get(url) { - return Ok(v.clone()); - } - Err("remote not found")? + fn load(&self, url: &str) -> Result> { + if let Some(v) = self.0.get(url) { + return Ok(v.clone()); } + Err("remote not found")? + } } diff --git a/validator/tests/examples.rs b/validator/tests/examples.rs index 5c9eab8..6e4a943 100644 --- a/validator/tests/examples.rs +++ b/validator/tests/examples.rs @@ -7,16 +7,16 @@ use url::Url; #[test] fn example_from_files() -> Result<(), Box> { - let schema_file = "tests/examples/schema.json"; - let instance: Value = serde_json::from_reader(File::open("tests/examples/instance.json")?)?; + let schema_file = "tests/examples/schema.json"; + let instance: Value = serde_json::from_reader(File::open("tests/examples/instance.json")?)?; - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - let sch_index = compiler.compile(schema_file, &mut schemas)?; - let result = schemas.validate(&instance, sch_index); - assert!(result.is_ok()); + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + let sch_index = compiler.compile(schema_file, &mut schemas)?; + let result = schemas.validate(&instance, sch_index); + assert!(result.is_ok()); - Ok(()) + Ok(()) } /** @@ -31,200 +31,200 @@ to local file. */ #[test] fn example_from_strings() -> Result<(), Box> { - let cat_schema: Value = json!({ - "type": "object", - "properties": { - "speak": { "const": "meow" } - }, - "required": ["speak"] - }); - let pet_schema: Value = json!({ - "oneOf": [ - { "$ref": "dog.json" }, - { "$ref": "cat.json" } - ] - }); - let instance: Value = json!({"speak": "bow"}); + let cat_schema: Value = json!({ + "type": "object", + "properties": { + "speak": { "const": "meow" } + }, + "required": ["speak"] + }); + let pet_schema: Value = json!({ + "oneOf": [ + { "$ref": "dog.json" }, + { "$ref": "cat.json" } + ] + }); + let instance: Value = json!({"speak": "bow"}); - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.add_resource("tests/examples/pet.json", pet_schema)?; - compiler.add_resource("tests/examples/cat.json", cat_schema)?; - let sch_index = compiler.compile("tests/examples/pet.json", &mut schemas)?; - let result = schemas.validate(&instance, sch_index); - assert!(result.is_ok()); + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.add_resource("tests/examples/pet.json", pet_schema)?; + compiler.add_resource("tests/examples/cat.json", cat_schema)?; + let sch_index = compiler.compile("tests/examples/pet.json", &mut schemas)?; + let result = schemas.validate(&instance, sch_index); + assert!(result.is_ok()); - Ok(()) + Ok(()) } #[test] #[ignore] fn example_from_https() -> Result<(), Box> { - let schema_url = "https://json-schema.org/learn/examples/geographical-location.schema.json"; - let instance: Value = json!({"latitude": 48.858093, "longitude": 2.294694}); + let schema_url = "https://json-schema.org/learn/examples/geographical-location.schema.json"; + let instance: Value = json!({"latitude": 48.858093, "longitude": 2.294694}); - struct HttpUrlLoader; - impl UrlLoader for HttpUrlLoader { - fn load(&self, url: &str) -> Result> { - let reader = ureq::get(url).call()?.into_reader(); - Ok(serde_json::from_reader(reader)?) - } + struct HttpUrlLoader; + impl UrlLoader for HttpUrlLoader { + fn load(&self, url: &str) -> Result> { + let reader = ureq::get(url).call()?.into_reader(); + Ok(serde_json::from_reader(reader)?) } + } - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - let mut loader = SchemeUrlLoader::new(); - loader.register("file", Box::new(FileLoader)); - loader.register("http", Box::new(HttpUrlLoader)); - loader.register("https", Box::new(HttpUrlLoader)); - compiler.use_loader(Box::new(loader)); - let sch_index = compiler.compile(schema_url, &mut schemas)?; - let result = schemas.validate(&instance, sch_index); - assert!(result.is_ok()); + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + let mut loader = SchemeUrlLoader::new(); + loader.register("file", Box::new(FileLoader)); + loader.register("http", Box::new(HttpUrlLoader)); + loader.register("https", Box::new(HttpUrlLoader)); + compiler.use_loader(Box::new(loader)); + let sch_index = compiler.compile(schema_url, &mut schemas)?; + let result = schemas.validate(&instance, sch_index); + assert!(result.is_ok()); - Ok(()) + Ok(()) } #[test] fn example_from_yaml_files() -> Result<(), Box> { - let schema_file = "tests/examples/schema.yml"; - let instance: Value = serde_yaml::from_reader(File::open("tests/examples/instance.yml")?)?; + let schema_file = "tests/examples/schema.yml"; + let instance: Value = serde_yaml::from_reader(File::open("tests/examples/instance.yml")?)?; - struct FileUrlLoader; - impl UrlLoader for FileUrlLoader { - fn load(&self, url: &str) -> Result> { - let url = Url::parse(url)?; - let path = url.to_file_path().map_err(|_| "invalid file path")?; - let file = File::open(&path)?; - if path - .extension() - .filter(|&ext| ext == "yaml" || ext == "yml") - .is_some() - { - Ok(serde_yaml::from_reader(file)?) - } else { - Ok(serde_json::from_reader(file)?) - } - } + struct FileUrlLoader; + impl UrlLoader for FileUrlLoader { + fn load(&self, url: &str) -> Result> { + let url = Url::parse(url)?; + let path = url.to_file_path().map_err(|_| "invalid file path")?; + let file = File::open(&path)?; + if path + .extension() + .filter(|&ext| ext == "yaml" || ext == "yml") + .is_some() + { + Ok(serde_yaml::from_reader(file)?) + } else { + Ok(serde_json::from_reader(file)?) + } } + } - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - let mut loader = SchemeUrlLoader::new(); - loader.register("file", Box::new(FileUrlLoader)); - compiler.use_loader(Box::new(loader)); - let sch_index = compiler.compile(schema_file, &mut schemas)?; - let result = schemas.validate(&instance, sch_index); - assert!(result.is_ok()); + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + let mut loader = SchemeUrlLoader::new(); + loader.register("file", Box::new(FileUrlLoader)); + compiler.use_loader(Box::new(loader)); + let sch_index = compiler.compile(schema_file, &mut schemas)?; + let result = schemas.validate(&instance, sch_index); + assert!(result.is_ok()); - Ok(()) + Ok(()) } #[test] fn example_custom_format() -> Result<(), Box> { - let schema_url = "http://tmp/schema.json"; - let schema: Value = json!({"type": "string", "format": "palindrome"}); - let instance: Value = json!("step on no pets"); + let schema_url = "http://tmp/schema.json"; + let schema: Value = json!({"type": "string", "format": "palindrome"}); + let instance: Value = json!("step on no pets"); - fn is_palindrome(v: &Value) -> Result<(), Box> { - let Value::String(s) = v else { - return Ok(()); // applicable only on strings - }; - let mut chars = s.chars(); - while let (Some(c1), Some(c2)) = (chars.next(), chars.next_back()) { - if c1 != c2 { - Err("char mismatch")?; - } - } - Ok(()) + fn is_palindrome(v: &Value) -> Result<(), Box> { + let Value::String(s) = v else { + return Ok(()); // applicable only on strings + }; + let mut chars = s.chars(); + while let (Some(c1), Some(c2)) = (chars.next(), chars.next_back()) { + if c1 != c2 { + Err("char mismatch")?; + } } - - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.enable_format_assertions(); // in draft2020-12 format assertions are not enabled by default - compiler.register_format(Format { - name: "palindrome", - func: is_palindrome, - }); - compiler.add_resource(schema_url, schema)?; - let sch_index = compiler.compile(schema_url, &mut schemas)?; - let result = schemas.validate(&instance, sch_index); - assert!(result.is_ok()); - Ok(()) + } + + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.enable_format_assertions(); // in draft2020-12 format assertions are not enabled by default + compiler.register_format(Format { + name: "palindrome", + func: is_palindrome, + }); + compiler.add_resource(schema_url, schema)?; + let sch_index = compiler.compile(schema_url, &mut schemas)?; + let result = schemas.validate(&instance, sch_index); + assert!(result.is_ok()); + + Ok(()) } #[test] fn example_custom_content_encoding() -> Result<(), Box> { - let schema_url = "http://tmp/schema.json"; - let schema: Value = json!({"type": "string", "contentEncoding": "hex"}); - let instance: Value = json!("aBcdxyz"); + let schema_url = "http://tmp/schema.json"; + let schema: Value = json!({"type": "string", "contentEncoding": "hex"}); + let instance: Value = json!("aBcdxyz"); - fn decode(b: u8) -> Result> { - match b { - b'0'..=b'9' => Ok(b - b'0'), - b'a'..=b'f' => Ok(b - b'a' + 10), - b'A'..=b'F' => Ok(b - b'A' + 10), - _ => Err("decode_hex: non-hex char")?, - } + fn decode(b: u8) -> Result> { + match b { + b'0'..=b'9' => Ok(b - b'0'), + b'a'..=b'f' => Ok(b - b'a' + 10), + b'A'..=b'F' => Ok(b - b'A' + 10), + _ => Err("decode_hex: non-hex char")?, } - fn decode_hex(s: &str) -> Result, Box> { - if s.len() % 2 != 0 { - Err("decode_hex: odd length")?; - } - let mut bytes = s.bytes(); - let mut out = Vec::with_capacity(s.len() / 2); - for _ in 0..out.len() { - if let (Some(b1), Some(b2)) = (bytes.next(), bytes.next()) { - out.push(decode(b1)? << 4 | decode(b2)?); - } else { - Err("decode_hex: non-ascii char")?; - } - } - Ok(out) + } + fn decode_hex(s: &str) -> Result, Box> { + if s.len() % 2 != 0 { + Err("decode_hex: odd length")?; } + let mut bytes = s.bytes(); + let mut out = Vec::with_capacity(s.len() / 2); + for _ in 0..out.len() { + if let (Some(b1), Some(b2)) = (bytes.next(), bytes.next()) { + out.push(decode(b1)? << 4 | decode(b2)?); + } else { + Err("decode_hex: non-ascii char")?; + } + } + Ok(out) + } - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.enable_content_assertions(); // content assertions are not enabled by default - compiler.register_content_encoding(Decoder { - name: "hex", - func: decode_hex, - }); - compiler.add_resource(schema_url, schema)?; - let sch_index = compiler.compile(schema_url, &mut schemas)?; - let result = schemas.validate(&instance, sch_index); - assert!(result.is_err()); + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.enable_content_assertions(); // content assertions are not enabled by default + compiler.register_content_encoding(Decoder { + name: "hex", + func: decode_hex, + }); + compiler.add_resource(schema_url, schema)?; + let sch_index = compiler.compile(schema_url, &mut schemas)?; + let result = schemas.validate(&instance, sch_index); + assert!(result.is_err()); - Ok(()) + Ok(()) } #[test] fn example_custom_content_media_type() -> Result<(), Box> { - let schema_url = "http://tmp/schema.json"; - let schema: Value = json!({"type": "string", "contentMediaType": "application/yaml"}); - let instance: Value = json!("name:foobar"); + let schema_url = "http://tmp/schema.json"; + let schema: Value = json!({"type": "string", "contentMediaType": "application/yaml"}); + let instance: Value = json!("name:foobar"); - fn check_yaml(bytes: &[u8], deserialize: bool) -> Result, Box> { - if deserialize { - return Ok(Some(serde_yaml::from_slice(bytes)?)); - } - serde_yaml::from_slice::(bytes)?; - Ok(None) + fn check_yaml(bytes: &[u8], deserialize: bool) -> Result, Box> { + if deserialize { + return Ok(Some(serde_yaml::from_slice(bytes)?)); } + serde_yaml::from_slice::(bytes)?; + Ok(None) + } - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.enable_content_assertions(); // content assertions are not enabled by default - compiler.register_content_media_type(MediaType { - name: "application/yaml", - json_compatible: true, - func: check_yaml, - }); - compiler.add_resource(schema_url, schema)?; - let sch_index = compiler.compile(schema_url, &mut schemas)?; - let result = schemas.validate(&instance, sch_index); - assert!(result.is_ok()); + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.enable_content_assertions(); // content assertions are not enabled by default + compiler.register_content_media_type(MediaType { + name: "application/yaml", + json_compatible: true, + func: check_yaml, + }); + compiler.add_resource(schema_url, schema)?; + let sch_index = compiler.compile(schema_url, &mut schemas)?; + let result = schemas.validate(&instance, sch_index); + assert!(result.is_ok()); - Ok(()) + Ok(()) } diff --git a/validator/tests/filepaths.rs b/validator/tests/filepaths.rs index 2e9a375..43dcb3e 100644 --- a/validator/tests/filepaths.rs +++ b/validator/tests/filepaths.rs @@ -3,42 +3,42 @@ use std::fs; use boon::{CompileError, Compiler, Schemas}; fn test(path: &str) -> Result<(), CompileError> { - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.compile(path, &mut schemas)?; - Ok(()) + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.compile(path, &mut schemas)?; + Ok(()) } #[test] fn test_absolute() -> Result<(), CompileError> { - let path = fs::canonicalize("tests/examples/schema.json").unwrap(); - test(path.to_string_lossy().as_ref()) + let path = fs::canonicalize("tests/examples/schema.json").unwrap(); + test(path.to_string_lossy().as_ref()) } #[test] fn test_relative_slash() -> Result<(), CompileError> { - test("tests/examples/schema.json") + test("tests/examples/schema.json") } #[test] #[cfg(windows)] fn test_relative_backslash() -> Result<(), CompileError> { - test("tests\\examples\\schema.json") + test("tests\\examples\\schema.json") } #[test] fn test_absolutei_space() -> Result<(), CompileError> { - let path = fs::canonicalize("tests/examples/sample schema.json").unwrap(); - test(path.to_string_lossy().as_ref()) + let path = fs::canonicalize("tests/examples/sample schema.json").unwrap(); + test(path.to_string_lossy().as_ref()) } #[test] fn test_relative_slash_space() -> Result<(), CompileError> { - test("tests/examples/sample schema.json") + test("tests/examples/sample schema.json") } #[test] #[cfg(windows)] fn test_relative_backslash_space() -> Result<(), CompileError> { - test("tests\\examples\\sample schema.json") + test("tests\\examples\\sample schema.json") } diff --git a/validator/tests/invalid-schemas.rs b/validator/tests/invalid-schemas.rs index 590d065..869b132 100644 --- a/validator/tests/invalid-schemas.rs +++ b/validator/tests/invalid-schemas.rs @@ -6,62 +6,62 @@ use serde_json::Value; #[derive(Debug, Deserialize)] struct Test { - description: String, - remotes: Option>, - schema: Value, - errors: Option>, + description: String, + remotes: Option>, + schema: Value, + errors: Option>, } #[test] fn test_invalid_schemas() -> Result<(), Box> { - let file = File::open("tests/invalid-schemas.json")?; - let tests: Vec = serde_json::from_reader(file)?; - for test in tests { - println!("{}", test.description); - match compile(&test) { - Ok(_) => { - if test.errors.is_some() { - Err("want compilation to fail")? - } - } - Err(e) => { - println!(" {e}"); - let error = format!("{e:?}"); - let Some(errors) = &test.errors else { - Err("want compilation to succeed")? - }; - for want in errors { - if !error.contains(want) { - println!(" got {error}"); - println!(" want {want}"); - panic!("error mismatch"); - } - } - } + let file = File::open("tests/invalid-schemas.json")?; + let tests: Vec = serde_json::from_reader(file)?; + for test in tests { + println!("{}", test.description); + match compile(&test) { + Ok(_) => { + if test.errors.is_some() { + Err("want compilation to fail")? } + } + Err(e) => { + println!(" {e}"); + let error = format!("{e:?}"); + let Some(errors) = &test.errors else { + Err("want compilation to succeed")? + }; + for want in errors { + if !error.contains(want) { + println!(" got {error}"); + println!(" want {want}"); + panic!("error mismatch"); + } + } + } } - Ok(()) + } + Ok(()) } fn compile(test: &Test) -> Result<(), CompileError> { - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - let url = "http://fake.com/schema.json"; - if let Some(remotes) = &test.remotes { - compiler.use_loader(Box::new(Remotes(remotes.clone()))); - } - compiler.add_resource(url, test.schema.clone())?; - compiler.compile(url, &mut schemas)?; - Ok(()) + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + let url = "http://fake.com/schema.json"; + if let Some(remotes) = &test.remotes { + compiler.use_loader(Box::new(Remotes(remotes.clone()))); + } + compiler.add_resource(url, test.schema.clone())?; + compiler.compile(url, &mut schemas)?; + Ok(()) } struct Remotes(HashMap); impl UrlLoader for Remotes { - fn load(&self, url: &str) -> Result> { - if let Some(v) = self.0.get(url) { - return Ok(v.clone()); - } - Err("remote not found")? + fn load(&self, url: &str) -> Result> { + if let Some(v) = self.0.get(url) { + return Ok(v.clone()); } + Err("remote not found")? + } } diff --git a/validator/tests/output.rs b/validator/tests/output.rs index 98ffd22..6158bfc 100644 --- a/validator/tests/output.rs +++ b/validator/tests/output.rs @@ -6,117 +6,117 @@ use serde_json::Value; #[test] fn test_suites() -> Result<(), Box> { - if let Ok(suite) = env::var("TEST_SUITE") { - test_suite(&suite)?; - } else { - test_suite("tests/JSON-Schema-Test-Suite")?; - test_suite("tests/Extra-Suite")?; - } - Ok(()) + if let Ok(suite) = env::var("TEST_SUITE") { + test_suite(&suite)?; + } else { + test_suite("tests/JSON-Schema-Test-Suite")?; + test_suite("tests/Extra-Suite")?; + } + Ok(()) } fn test_suite(suite: &str) -> Result<(), Box> { - test_folder(suite, "draft2019-09", Draft::V2019_09)?; - test_folder(suite, "draft2020-12", Draft::V2020_12)?; - Ok(()) + test_folder(suite, "draft2019-09", Draft::V2019_09)?; + test_folder(suite, "draft2020-12", Draft::V2020_12)?; + Ok(()) } fn test_folder(suite: &str, folder: &str, draft: Draft) -> Result<(), Box> { - let output_schema_url = format!( - "https://json-schema.org/draft/{}/output/schema", - folder.strip_prefix("draft").unwrap() - ); - let prefix = Path::new(suite).join("output-tests"); - let folder = prefix.join(folder); - let content = folder.join("content"); - if !content.is_dir() { - return Ok(()); - } - let output_schema: Value = - serde_json::from_reader(File::open(folder.join("output-schema.json"))?)?; - for entry in content.read_dir()? { - let entry = entry?; - if !entry.file_type()?.is_file() { - continue; - }; - let entry_path = entry.path(); - println!("{}", entry_path.strip_prefix(&prefix)?.to_str().unwrap()); - let groups: Vec = serde_json::from_reader(File::open(entry_path)?)?; - for group in groups { - println!(" {}", group.description); - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.set_default_draft(draft); - let schema_url = "http://output-tests/schema"; - compiler.add_resource(schema_url, group.schema)?; - let sch = compiler.compile(schema_url, &mut schemas)?; - for test in group.tests { - println!(" {}", test.description); - match schemas.validate(&test.data, sch) { - Ok(_) => println!(" validation success"), - Err(e) => { - if let Some(sch) = test.output.basic { - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.set_default_draft(draft); - compiler.add_resource(&output_schema_url, output_schema.clone())?; - let schema_url = "http://output-tests/schema"; - compiler.add_resource(schema_url, sch)?; - let sch = compiler.compile(schema_url, &mut schemas)?; - let basic: Value = serde_json::from_str(&e.basic_output().to_string())?; - let result = schemas.validate(&basic, sch); - if let Err(e) = result { - println!("{basic:#}\n"); - for line in format!("{e}").lines() { - println!(" {line}"); - } - panic!("basic output did not match"); - } - } - if let Some(sch) = test.output.detailed { - let mut schemas = Schemas::new(); - let mut compiler = Compiler::new(); - compiler.set_default_draft(draft); - compiler.add_resource(&output_schema_url, output_schema.clone())?; - let schema_url = "http://output-tests/schema"; - compiler.add_resource(schema_url, sch)?; - let sch = compiler.compile(schema_url, &mut schemas)?; - let detailed: Value = - serde_json::from_str(&e.detailed_output().to_string())?; - let result = schemas.validate(&detailed, sch); - if let Err(e) = result { - println!("{detailed:#}\n"); - for line in format!("{e}").lines() { - println!(" {line}"); - } - panic!("detailed output did not match"); - } - } - } + let output_schema_url = format!( + "https://json-schema.org/draft/{}/output/schema", + folder.strip_prefix("draft").unwrap() + ); + let prefix = Path::new(suite).join("output-tests"); + let folder = prefix.join(folder); + let content = folder.join("content"); + if !content.is_dir() { + return Ok(()); + } + let output_schema: Value = + serde_json::from_reader(File::open(folder.join("output-schema.json"))?)?; + for entry in content.read_dir()? { + let entry = entry?; + if !entry.file_type()?.is_file() { + continue; + }; + let entry_path = entry.path(); + println!("{}", entry_path.strip_prefix(&prefix)?.to_str().unwrap()); + let groups: Vec = serde_json::from_reader(File::open(entry_path)?)?; + for group in groups { + println!(" {}", group.description); + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.set_default_draft(draft); + let schema_url = "http://output-tests/schema"; + compiler.add_resource(schema_url, group.schema)?; + let sch = compiler.compile(schema_url, &mut schemas)?; + for test in group.tests { + println!(" {}", test.description); + match schemas.validate(&test.data, sch) { + Ok(_) => println!(" validation success"), + Err(e) => { + if let Some(sch) = test.output.basic { + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.set_default_draft(draft); + compiler.add_resource(&output_schema_url, output_schema.clone())?; + let schema_url = "http://output-tests/schema"; + compiler.add_resource(schema_url, sch)?; + let sch = compiler.compile(schema_url, &mut schemas)?; + let basic: Value = serde_json::from_str(&e.basic_output().to_string())?; + let result = schemas.validate(&basic, sch); + if let Err(e) = result { + println!("{basic:#}\n"); + for line in format!("{e}").lines() { + println!(" {line}"); } + panic!("basic output did not match"); + } } + if let Some(sch) = test.output.detailed { + let mut schemas = Schemas::new(); + let mut compiler = Compiler::new(); + compiler.set_default_draft(draft); + compiler.add_resource(&output_schema_url, output_schema.clone())?; + let schema_url = "http://output-tests/schema"; + compiler.add_resource(schema_url, sch)?; + let sch = compiler.compile(schema_url, &mut schemas)?; + let detailed: Value = + serde_json::from_str(&e.detailed_output().to_string())?; + let result = schemas.validate(&detailed, sch); + if let Err(e) = result { + println!("{detailed:#}\n"); + for line in format!("{e}").lines() { + println!(" {line}"); + } + panic!("detailed output did not match"); + } + } + } } + } } + } - Ok(()) + Ok(()) } #[derive(Debug, Serialize, Deserialize)] struct Group { - description: String, - schema: Value, - tests: Vec, + description: String, + schema: Value, + tests: Vec, } #[derive(Debug, Serialize, Deserialize)] struct Test { - description: String, - data: Value, - output: Output, + description: String, + data: Value, + output: Output, } #[derive(Debug, Serialize, Deserialize)] struct Output { - basic: Option, - detailed: Option, + basic: Option, + detailed: Option, } diff --git a/validator/tests/suite.rs b/validator/tests/suite.rs index e2536d2..3e80fcf 100644 --- a/validator/tests/suite.rs +++ b/validator/tests/suite.rs @@ -5,116 +5,116 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; static SKIP: [&str; 2] = [ - "zeroTerminatedFloats.json", // only draft4: this behavior is changed in later drafts - "float-overflow.json", + "zeroTerminatedFloats.json", // only draft4: this behavior is changed in later drafts + "float-overflow.json", ]; #[derive(Debug, Serialize, Deserialize)] struct Group { - description: String, - schema: Value, - tests: Vec, + description: String, + schema: Value, + tests: Vec, } #[derive(Debug, Serialize, Deserialize)] struct Test { - description: String, - data: Value, - valid: bool, + description: String, + data: Value, + valid: bool, } #[test] fn test_suites() -> Result<(), Box> { - if let Ok(suite) = env::var("TEST_SUITE") { - test_suite(&suite)?; - } else { - test_suite("tests/JSON-Schema-Test-Suite")?; - test_suite("tests/Extra-Test-Suite")?; - } - Ok(()) + if let Ok(suite) = env::var("TEST_SUITE") { + test_suite(&suite)?; + } else { + test_suite("tests/JSON-Schema-Test-Suite")?; + test_suite("tests/Extra-Test-Suite")?; + } + Ok(()) } fn test_suite(suite: &str) -> Result<(), Box> { - if !Path::new(suite).exists() { - Err(format!("test suite {suite} does not exist"))?; - } - test_dir(suite, "draft4", Draft::V4)?; - test_dir(suite, "draft6", Draft::V6)?; - test_dir(suite, "draft7", Draft::V7)?; - test_dir(suite, "draft2019-09", Draft::V2019_09)?; - test_dir(suite, "draft2020-12", Draft::V2020_12)?; - Ok(()) + if !Path::new(suite).exists() { + Err(format!("test suite {suite} does not exist"))?; + } + test_dir(suite, "draft4", Draft::V4)?; + test_dir(suite, "draft6", Draft::V6)?; + test_dir(suite, "draft7", Draft::V7)?; + test_dir(suite, "draft2019-09", Draft::V2019_09)?; + test_dir(suite, "draft2020-12", Draft::V2020_12)?; + Ok(()) } fn test_dir(suite: &str, path: &str, draft: Draft) -> Result<(), Box> { - let prefix = Path::new(suite).join("tests"); - let dir = prefix.join(path); - if !dir.is_dir() { - return Ok(()); + let prefix = Path::new(suite).join("tests"); + let dir = prefix.join(path); + if !dir.is_dir() { + return Ok(()); + } + for entry in dir.read_dir()? { + let entry = entry?; + let file_type = entry.file_type()?; + let tmp_entry_path = entry.path(); + let entry_path = tmp_entry_path.strip_prefix(&prefix)?.to_str().unwrap(); + if file_type.is_file() { + if !SKIP.iter().any(|n| OsStr::new(n) == entry.file_name()) { + test_file(suite, entry_path, draft)?; + } + } else if file_type.is_dir() { + test_dir(suite, entry_path, draft)?; } - for entry in dir.read_dir()? { - let entry = entry?; - let file_type = entry.file_type()?; - let tmp_entry_path = entry.path(); - let entry_path = tmp_entry_path.strip_prefix(&prefix)?.to_str().unwrap(); - if file_type.is_file() { - if !SKIP.iter().any(|n| OsStr::new(n) == entry.file_name()) { - test_file(suite, entry_path, draft)?; - } - } else if file_type.is_dir() { - test_dir(suite, entry_path, draft)?; - } - } - Ok(()) + } + Ok(()) } fn test_file(suite: &str, path: &str, draft: Draft) -> Result<(), Box> { - println!("FILE: {path}"); - let path = Path::new(suite).join("tests").join(path); - let optional = path.components().any(|comp| comp.as_os_str() == "optional"); - let file = File::open(path)?; + println!("FILE: {path}"); + let path = Path::new(suite).join("tests").join(path); + let optional = path.components().any(|comp| comp.as_os_str() == "optional"); + let file = File::open(path)?; - let url = "http://testsuite.com/schema.json"; - let groups: Vec = serde_json::from_reader(file)?; - for group in groups { - println!("{}", group.description); - let mut schemas = Schemas::default(); - let mut compiler = Compiler::default(); - compiler.set_default_draft(draft); - if optional { - compiler.enable_format_assertions(); - compiler.enable_content_assertions(); - } - compiler.use_loader(Box::new(RemotesLoader(suite.to_owned()))); - compiler.add_resource(url, group.schema)?; - let sch_index = compiler.compile(url, &mut schemas)?; - for test in group.tests { - println!(" {}", test.description); - let result = schemas.validate(&test.data, sch_index); - if let Err(e) = &result { - for line in format!("{e}").lines() { - println!(" {line}"); - } - for line in format!("{e:#}").lines() { - println!(" {line}"); - } - } - assert_eq!(result.is_ok(), test.valid); - } + let url = "http://testsuite.com/schema.json"; + let groups: Vec = serde_json::from_reader(file)?; + for group in groups { + println!("{}", group.description); + let mut schemas = Schemas::default(); + let mut compiler = Compiler::default(); + compiler.set_default_draft(draft); + if optional { + compiler.enable_format_assertions(); + compiler.enable_content_assertions(); } - Ok(()) + compiler.use_loader(Box::new(RemotesLoader(suite.to_owned()))); + compiler.add_resource(url, group.schema)?; + let sch_index = compiler.compile(url, &mut schemas)?; + for test in group.tests { + println!(" {}", test.description); + let result = schemas.validate(&test.data, sch_index); + if let Err(e) = &result { + for line in format!("{e}").lines() { + println!(" {line}"); + } + for line in format!("{e:#}").lines() { + println!(" {line}"); + } + } + assert_eq!(result.is_ok(), test.valid); + } + } + Ok(()) } struct RemotesLoader(String); impl UrlLoader for RemotesLoader { - fn load(&self, url: &str) -> Result> { - // remotes folder -- - if let Some(path) = url.strip_prefix("http://localhost:1234/") { - let path = Path::new(&self.0).join("remotes").join(path); - let file = File::open(path)?; - let json: Value = serde_json::from_reader(file)?; - return Ok(json); - } - Err("no internet")? + fn load(&self, url: &str) -> Result> { + // remotes folder -- + if let Some(path) = url.strip_prefix("http://localhost:1234/") { + let path = Path::new(&self.0).join("remotes").join(path); + let file = File::open(path)?; + let json: Value = serde_json::from_reader(file)?; + return Ok(json); } + Err("no internet")? + } }