diff --git a/GEMINI.md b/GEMINI.md index 0cbe324..d2fdef2 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -9,7 +9,7 @@ It is designed to serve as the validation engine for the "Punc" architecture, wh 1. **Draft 2020-12 Compliance**: Attempt to adhere to the official JSON Schema Draft 2020-12 specification. 2. **Ultra-Fast Validation**: Compile schemas into an optimized in-memory representation for near-instant validation during high-throughput workloads. 3. **Connection-Bound Caching**: Leverage the PostgreSQL session lifecycle to maintain a per-connection schema cache, eliminating the need for repetitive parsing. -4. **Structural Inheritance**: Support object-oriented schema design via Implicit Keyword Shadowing and virtual `.family` schemas. +4. **Structural Inheritance**: Support object-oriented schema design via Implicit Keyword Shadowing and virtual `$family` references. 5. **Punc Integration**: validation is aware of the "Punc" context (request/response) and can validate `cue` objects efficiently. ## 🔌 API Reference @@ -27,7 +27,7 @@ Loads and compiles the entire schema registry into the session's memory, atomica * **Behavior**: * Parses all inputs into an internal schema graph. * Resolves all internal references (`$ref`). - * Generates virtual `.family` schemas for type hierarchies. + * Generates virtual union schemas for type hierarchies referenced via `$family`. * Compiles schemas into validators. * **Returns**: `{"response": "success"}` or an error object. @@ -78,10 +78,10 @@ Standard JSON Schema composition (`allOf`) is additive (Intersection), meaning c * **Composition (`allOf`)**: When using `allOf`, standard intersection rules apply. No shadowing occurs; all constraints from all branches must pass. This is used for mixins or interfaces. -### 2. Virtual Family Schemas (`.family`) +### 2. Virtual Family References (`$family`) To support polymorphic fields (e.g., a field that accepts any "User" type), JSPG generates virtual schemas representing type hierarchies. -* **Mechanism**: When caching types, if a type defines a `hierarchy` (e.g., `["entity", "organization", "person"]`), JSPG generates a schema like `organization.family` which is a `oneOf` containing refs to all valid descendants. +* **Mechanism**: When caching types, if a type defines a `hierarchy` (e.g., `["entity", "organization", "person"]`), JSPG generates a virtual `oneOf` family containing refs to all valid descendants. These can be pointed to exclusively by using `{"$family": "organization"}`. Because `$family` is a macro-pointer that swaps in the virtual union, it **must** be used exclusively in its schema object; you cannot define other properties alongside it. ### 3. Strict by Default & Extensibility JSPG enforces a "Secure by Default" philosophy. All schemas are treated as if `unevaluatedProperties: false` (and `unevaluatedItems: false`) is set, unless explicitly overridden. diff --git a/src/context.rs b/src/context.rs new file mode 100644 index 0000000..1dabdbb --- /dev/null +++ b/src/context.rs @@ -0,0 +1,118 @@ +use crate::error::ValidationError; +use crate::instance::ValidationInstance; +use crate::result::ValidationResult; +use crate::schema::Schema; +use crate::validator::Validator; +use std::collections::HashSet; + +pub struct ValidationContext<'a, I: ValidationInstance<'a>> { + pub validator: &'a Validator, + pub root: &'a Schema, + pub schema: &'a Schema, + pub instance: I, + pub path: String, + pub depth: usize, + pub scope: Vec, + pub overrides: HashSet, + pub extensible: bool, + pub reporter: bool, +} + +impl<'a, I: ValidationInstance<'a>> ValidationContext<'a, I> { + pub fn new( + validator: &'a Validator, + root: &'a Schema, + schema: &'a Schema, + instance: I, + scope: Vec, + overrides: HashSet, + extensible: bool, + reporter: bool, + ) -> Self { + let effective_extensible = schema.extensible.unwrap_or(extensible); + Self { + validator, + root, + schema, + instance, + path: String::new(), + depth: 0, + scope, + overrides, + extensible: effective_extensible, + reporter, + } + } + + pub fn derive( + &self, + schema: &'a Schema, + instance: I, + path: &str, + scope: Vec, + overrides: HashSet, + extensible: bool, + reporter: bool, + ) -> Self { + let effective_extensible = schema.extensible.unwrap_or(extensible); + + Self { + validator: self.validator, + root: self.root, + schema, + instance, + path: path.to_string(), + depth: self.depth + 1, + scope, + overrides, + extensible: effective_extensible, + reporter, + } + } + + pub fn derive_for_schema(&self, schema: &'a Schema, reporter: bool) -> Self { + self.derive( + schema, + self.instance, + &self.path, + self.scope.clone(), + HashSet::new(), + self.extensible, + reporter, + ) + } + + pub fn validate(&self) -> Result { + let mut effective_scope = self.scope.clone(); + + if let Some(id) = &self.schema.obj.id { + let current_base = self.scope.last().map(|s| s.as_str()).unwrap_or(""); + let mut new_base = id.clone(); + if !current_base.is_empty() { + if let Ok(base_url) = url::Url::parse(current_base) { + if let Ok(joined) = base_url.join(id) { + new_base = joined.to_string(); + } + } + } + + effective_scope.push(new_base); + + let shadow = ValidationContext { + validator: self.validator, + root: self.root, + schema: self.schema, + instance: self.instance, + path: self.path.clone(), + depth: self.depth, + scope: effective_scope, + overrides: self.overrides.clone(), + extensible: self.extensible, + reporter: self.reporter, + }; + return shadow.validate_scoped(); + } + + self.validate_scoped() + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..9e756d9 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,6 @@ +#[derive(Debug, Clone, serde::Serialize)] +pub struct ValidationError { + pub code: String, + pub message: String, + pub path: String, +} diff --git a/src/instance.rs b/src/instance.rs new file mode 100644 index 0000000..341acd5 --- /dev/null +++ b/src/instance.rs @@ -0,0 +1,98 @@ +use serde_json::Value; +use std::collections::HashSet; +use std::ptr::NonNull; + +pub trait ValidationInstance<'a>: Copy + Clone { + fn as_value(&self) -> &'a Value; + fn child_at_key(&self, key: &str) -> Option; + fn child_at_index(&self, idx: usize) -> Option; + fn prune_object(&self, _keys: &HashSet) {} + fn prune_array(&self, _indices: &HashSet) {} +} + +#[derive(Clone, Copy)] +pub struct ReadOnlyInstance<'a>(pub &'a Value); + +impl<'a> ValidationInstance<'a> for ReadOnlyInstance<'a> { + fn as_value(&self) -> &'a Value { + self.0 + } + + fn child_at_key(&self, key: &str) -> Option { + self.0.get(key).map(ReadOnlyInstance) + } + + fn child_at_index(&self, idx: usize) -> Option { + self.0.get(idx).map(ReadOnlyInstance) + } +} + +#[derive(Clone, Copy)] +pub struct MutableInstance { + ptr: NonNull, +} + +impl MutableInstance { + pub fn new(val: &mut Value) -> Self { + Self { + ptr: NonNull::from(val), + } + } +} + +impl<'a> ValidationInstance<'a> for MutableInstance { + fn as_value(&self) -> &'a Value { + unsafe { self.ptr.as_ref() } + } + + fn child_at_key(&self, key: &str) -> Option { + unsafe { + if let Some(obj) = self.ptr.as_ref().as_object() { + if obj.contains_key(key) { + let parent_mut = &mut *self.ptr.as_ptr(); + if let Some(child_val) = parent_mut.get_mut(key) { + return Some(MutableInstance::new(child_val)); + } + } + } + None + } + } + + fn child_at_index(&self, idx: usize) -> Option { + unsafe { + if let Some(arr) = self.ptr.as_ref().as_array() { + if idx < arr.len() { + let parent_mut = &mut *self.ptr.as_ptr(); + if let Some(child_val) = parent_mut.get_mut(idx) { + return Some(MutableInstance::new(child_val)); + } + } + } + None + } + } + + fn prune_object(&self, keys: &HashSet) { + unsafe { + let val_mut = &mut *self.ptr.as_ptr(); + if let Some(obj) = val_mut.as_object_mut() { + obj.retain(|k, _| keys.contains(k)); + } + } + } + + fn prune_array(&self, indices: &HashSet) { + unsafe { + let val_mut = &mut *self.ptr.as_ptr(); + if let Some(arr) = val_mut.as_array_mut() { + let mut i = 0; + arr.retain(|_| { + let keep = indices.contains(&i); + i += 1; + keep + }); + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index ce163eb..b353b2d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,8 +11,13 @@ mod schema; pub mod util; mod validator; -use crate::schema::Schema; -use serde_json::{Value, json}; +pub mod context; +pub mod error; +pub mod instance; +pub mod result; +pub(crate) mod rules; + +use serde_json::json; use std::sync::{Arc, RwLock}; lazy_static::lazy_static! { @@ -26,79 +31,12 @@ lazy_static::lazy_static! { #[pg_extern(strict)] pub fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB { - // 1. Build a new Registry LOCALLY (on stack) - let mut registry = registry::Registry::new(); - - // Generate Family Schemas from Types - { - let mut family_map: std::collections::HashMap> = - std::collections::HashMap::new(); - if let Value::Array(arr) = &types.0 { - for item in arr { - if let Some(name) = item.get("name").and_then(|v| v.as_str()) { - if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) { - for ancestor in hierarchy { - if let Some(anc_str) = ancestor.as_str() { - family_map - .entry(anc_str.to_string()) - .or_default() - .insert(name.to_string()); - } - } - } - } - } - } - - for (family_name, members) in family_map { - let id = format!("{}.family", family_name); - - // Object Union (for polymorphic object validation) - // This allows the schema to match ANY of the types in the family hierarchy - let object_refs: Vec = members.iter().map(|s| json!({ "$ref": s })).collect(); - - let schema_json = json!({ - "$id": id, - "oneOf": object_refs - }); - - if let Ok(schema) = serde_json::from_value::(schema_json) { - registry.add(schema); - } - } - - // Helper to parse and cache a list of items - let mut cache_items = |items: JsonB| { - if let Value::Array(arr) = items.0 { - for item in arr { - // For now, we assume the item structure matches what the generator expects - // or what `json_schemas.sql` sends. - // The `Schema` struct in `schema.rs` is designed to deserialize standard JSON Schema. - // However, the input here is an array of objects that *contain* a `schemas` array. - // We need to extract those inner schemas. - - if let Some(schemas_val) = item.get("schemas") { - if let Value::Array(schemas) = schemas_val { - for schema_val in schemas { - // Deserialize into our robust Schema struct to ensure validity/parsing - if let Ok(schema) = serde_json::from_value::(schema_val.clone()) { - // Registry handles compilation - registry.add(schema); - } - } - } - } - } - } - }; - - cache_items(enums); - cache_items(types); - cache_items(puncs); // public/private distinction logic to come later - } - - // 2. Wrap in Validator and Arc - let new_validator = validator::Validator::new(registry); + // 1 & 2. Build Registry, Families, and Wrap in Validator all in one shot + let new_validator = crate::validator::Validator::from_punc_definition( + Some(&enums.0), + Some(&types.0), + Some(&puncs.0), + ); let new_arc = Arc::new(new_validator); // 3. ATOMIC SWAP diff --git a/src/result.rs b/src/result.rs new file mode 100644 index 0000000..7e79ac9 --- /dev/null +++ b/src/result.rs @@ -0,0 +1,27 @@ +use crate::error::ValidationError; +use std::collections::HashSet; + +#[derive(Debug, Default, Clone, serde::Serialize)] +pub struct ValidationResult { + pub errors: Vec, + #[serde(skip)] + pub evaluated_keys: HashSet, + #[serde(skip)] + pub evaluated_indices: HashSet, +} + +impl ValidationResult { + pub fn new() -> Self { + Self::default() + } + + pub fn merge(&mut self, other: ValidationResult) { + self.errors.extend(other.errors); + self.evaluated_keys.extend(other.evaluated_keys); + self.evaluated_indices.extend(other.evaluated_indices); + } + + pub fn is_valid(&self) -> bool { + self.errors.is_empty() + } +} diff --git a/src/rules.rs b/src/rules.rs new file mode 100644 index 0000000..a48142d --- /dev/null +++ b/src/rules.rs @@ -0,0 +1,1008 @@ +use regex::Regex; +use serde_json::Value; +use std::collections::HashSet; + +use crate::context::ValidationContext; +use crate::error::ValidationError; +use crate::instance::ValidationInstance; +use crate::result::ValidationResult; +use crate::validator::{ResolvedRef, Validator}; + +impl<'a, I: ValidationInstance<'a>> ValidationContext<'a, I> { + pub(crate) fn validate_scoped(&self) -> Result { + let mut result = ValidationResult::new(); + + self.validate_depth()?; + if self.validate_always_fail(&mut result) { + return Ok(result); + } + if self.validate_family(&mut result) { + return Ok(result); + } + + if let Some(ref_res) = self.validate_refs()? { + result.merge(ref_res); + } + + self.validate_core(&mut result); + self.validate_numeric(&mut result); + self.validate_string(&mut result); + self.validate_format(&mut result); + self.validate_object(&mut result)?; + self.validate_array(&mut result)?; + self.validate_combinators(&mut result)?; + self.validate_conditionals(&mut result)?; + self.validate_extensible(&mut result); + self.validate_strictness(&mut result); + + Ok(result) + } + + fn validate_extensible(&self, result: &mut ValidationResult) { + if self.extensible { + let current = self.instance.as_value(); + if let Some(obj) = current.as_object() { + result.evaluated_keys.extend(obj.keys().cloned()); + } else if let Some(arr) = current.as_array() { + result.evaluated_indices.extend(0..arr.len()); + } + } + } + + fn validate_depth(&self) -> Result<(), ValidationError> { + if self.depth > 100 { + Err(ValidationError { + code: "RECURSION_LIMIT_EXCEEDED".to_string(), + message: "Recursion limit exceeded".to_string(), + path: self.path.to_string(), + }) + } else { + Ok(()) + } + } + + fn validate_always_fail(&self, result: &mut ValidationResult) -> bool { + if self.schema.always_fail { + result.errors.push(ValidationError { + code: "FALSE_SCHEMA".to_string(), + message: "Schema is false".to_string(), + path: self.path.to_string(), + }); + true + } else { + false + } + } + + fn validate_family(&self, result: &mut ValidationResult) -> bool { + if self.schema.family.is_some() { + let conflicts = self.schema.type_.is_some() + || self.schema.properties.is_some() + || self.schema.required.is_some() + || self.schema.additional_properties.is_some() + || self.schema.items.is_some() + || self.schema.ref_string.is_some() + || self.schema.one_of.is_some() + || self.schema.any_of.is_some() + || self.schema.all_of.is_some() + || self.schema.enum_.is_some() + || self.schema.const_.is_some(); + + if conflicts { + result.errors.push(ValidationError { + code: "INVALID_SCHEMA".to_string(), + message: "$family must be used exclusively without other constraints".to_string(), + path: self.path.to_string(), + }); + return true; + } + } + false + } + + pub(crate) fn validate_refs(&self) -> Result, ValidationError> { + let mut res = ValidationResult::new(); + let mut handled = false; + + let effective_scope = &self.scope; + let current_base_resolved = effective_scope.last().map(|s| s.as_str()).unwrap_or(""); + + // $ref + if let Some(ref ref_string) = self.schema.ref_string { + handled = true; + if ref_string == "#" { + let mut new_overrides = self.overrides.clone(); + if let Some(props) = &self.schema.properties { + new_overrides.extend(props.keys().cloned()); + } + + let derived = self.derive( + self.root, + self.instance, + &self.path, + effective_scope.clone(), + new_overrides, + self.extensible, + self.reporter, + ); + res.merge(derived.validate()?); + } else { + if let Some((resolved, matched_key)) = + self + .validator + .resolve_ref(self.root, ref_string, current_base_resolved) + { + let (target_root, target_schema) = match &resolved { + ResolvedRef::Local(s) => (self.root, *s), + ResolvedRef::Global(root, s) => (*root, *s), + }; + + let resource_base = if let Some((base, _)) = matched_key.split_once('#') { + base + } else { + &matched_key + }; + + let scope_to_pass = if target_schema.obj.id.is_none() { + if !resource_base.is_empty() && resource_base != current_base_resolved { + let mut new_scope = effective_scope.clone(); + new_scope.push(resource_base.to_string()); + new_scope + } else { + effective_scope.clone() + } + } else { + effective_scope.clone() + }; + + let mut new_overrides = self.overrides.clone(); + if let Some(props) = &self.schema.properties { + new_overrides.extend(props.keys().cloned()); + } + + let target_ctx = ValidationContext::new( + self.validator, + target_root, + target_schema, + self.instance, + scope_to_pass, + new_overrides, + false, + self.reporter, + ); + let mut manual_ctx = target_ctx; + manual_ctx.path = self.path.clone(); + manual_ctx.depth = self.depth + 1; + + let target_res = manual_ctx.validate()?; + res.merge(target_res); + handled = true; + } else { + res.errors.push(ValidationError { + code: "REF_RESOLUTION_FAILED".to_string(), + message: format!("Could not resolve reference '{}'", ref_string), + path: self.path.to_string(), + }); + } + } + } + + // $dynamicRef + if let Some(ref d_ref) = self.schema.obj.dynamic_ref { + handled = true; + let anchor = if let Some(idx) = d_ref.rfind('#') { + &d_ref[idx + 1..] + } else { + d_ref.as_str() + }; + + let mut resolved_target: Option<(ResolvedRef, String)> = None; + let local_resolution = self + .validator + .resolve_ref(self.root, d_ref, current_base_resolved); + + let is_bookended = if let Some((ResolvedRef::Local(s), _)) = &local_resolution { + s.obj.dynamic_anchor.as_deref() == Some(anchor) + } else { + false + }; + + if is_bookended { + for base in effective_scope.iter() { + let resource_base = if let Some((r, _)) = base.split_once('#') { + r + } else { + base + }; + let key = format!("{}#{}", resource_base, anchor); + + if let Some(indexrs) = &self.root.obj.compiled_registry { + if let Some(s) = indexrs.schemas.get(&key) { + if s.obj.dynamic_anchor.as_deref() == Some(anchor) { + resolved_target = Some((ResolvedRef::Local(s.as_ref()), key.clone())); + break; + } + } + } + if resolved_target.is_none() { + if let Some(registry_arc) = &self.root.obj.compiled_registry { + if let Some(compiled) = registry_arc.schemas.get(resource_base) { + if let Some(indexrs) = &compiled.obj.compiled_registry { + if let Some(s) = indexrs.schemas.get(&key) { + if s.obj.dynamic_anchor.as_deref() == Some(anchor) { + resolved_target = Some(( + ResolvedRef::Global(compiled.as_ref(), s.as_ref()), + key.clone(), + )); + break; + } + } + } + } + } else { + if let Some(compiled) = self.validator.registry.schemas.get(resource_base) { + if let Some(indexrs) = &compiled.obj.compiled_registry { + if let Some(s) = indexrs.schemas.get(&key) { + if s.obj.dynamic_anchor.as_deref() == Some(anchor) { + resolved_target = Some(( + ResolvedRef::Global(compiled.as_ref(), s.as_ref()), + key.clone(), + )); + break; + } + } + } + } + } + } + if resolved_target.is_some() { + break; + } + } + } + + if resolved_target.is_none() { + resolved_target = local_resolution; + } + + if let Some((resolved, matched_key)) = resolved_target { + let (target_root, target_schema) = match &resolved { + ResolvedRef::Local(s) => (self.root, *s), + ResolvedRef::Global(root, s) => (*root, *s), + }; + + let resource_base = if let Some((base, _)) = matched_key.split_once('#') { + base + } else { + &matched_key + }; + + let scope_to_pass = if let Some(ref tid) = target_schema.obj.id { + let mut new_scope = effective_scope.clone(); + new_scope.push(tid.clone()); + new_scope + } else { + if !resource_base.is_empty() && resource_base != current_base_resolved { + let mut new_scope = effective_scope.clone(); + new_scope.push(resource_base.to_string()); + new_scope + } else { + effective_scope.clone() + } + }; + + let mut new_overrides = self.overrides.clone(); + if let Some(props) = &self.schema.properties { + new_overrides.extend(props.keys().cloned()); + } + + let target_ctx = ValidationContext::new( + self.validator, + target_root, + target_schema, + self.instance, + scope_to_pass, + new_overrides, + false, + self.reporter, + ); + let mut manual_ctx = target_ctx; + manual_ctx.path = self.path.clone(); + manual_ctx.depth = self.depth + 1; + + res.merge(manual_ctx.validate()?); + } else { + res.errors.push(ValidationError { + code: "REF_RESOLUTION_FAILED".to_string(), + message: format!("Could not resolve dynamic reference '{}'", d_ref), + path: self.path.to_string(), + }); + } + } + + // Family Support Map + if let Some(ref family) = self.schema.obj.family { + if let Some(family_schema) = self.validator.families.get(family) { + let derived = self.derive_for_schema(family_schema.as_ref(), true); + res.merge(derived.validate()?); + handled = true; + } else { + res.errors.push(ValidationError { + code: "FAMILY_NOT_FOUND".to_string(), + message: format!("Family '{}' not found in families map", family), + path: self.path.to_string(), + }); + handled = true; + } + } + + if handled { Ok(Some(res)) } else { Ok(None) } + } + + pub(crate) fn validate_core(&self, result: &mut ValidationResult) { + let current = self.instance.as_value(); + if let Some(ref type_) = self.schema.type_ { + match type_ { + crate::schema::SchemaTypeOrArray::Single(t) => { + if !Validator::check_type(t, current) { + result.errors.push(ValidationError { + code: "INVALID_TYPE".to_string(), + message: format!("Expected type '{}'", t), + path: self.path.to_string(), + }); + } + } + crate::schema::SchemaTypeOrArray::Multiple(types) => { + let mut valid = false; + for t in types { + if Validator::check_type(t, current) { + valid = true; + break; + } + } + if !valid { + result.errors.push(ValidationError { + code: "INVALID_TYPE".to_string(), + message: format!("Expected one of types {:?}", types), + path: self.path.to_string(), + }); + } + } + } + } + + if let Some(ref const_val) = self.schema.const_ { + if !crate::util::equals(current, const_val) { + result.errors.push(ValidationError { + code: "CONST_VIOLATED".to_string(), + message: "Value does not match const".to_string(), + path: self.path.to_string(), + }); + } else { + if let Some(obj) = current.as_object() { + result.evaluated_keys.extend(obj.keys().cloned()); + } else if let Some(arr) = current.as_array() { + result.evaluated_indices.extend(0..arr.len()); + } + } + } + + if let Some(ref enum_vals) = self.schema.enum_ { + let mut found = false; + for val in enum_vals { + if crate::util::equals(current, val) { + found = true; + break; + } + } + if !found { + result.errors.push(ValidationError { + code: "ENUM_MISMATCH".to_string(), + message: "Value is not in enum".to_string(), + path: self.path.to_string(), + }); + } else { + if let Some(obj) = current.as_object() { + result.evaluated_keys.extend(obj.keys().cloned()); + } else if let Some(arr) = current.as_array() { + result.evaluated_indices.extend(0..arr.len()); + } + } + } + } + + pub(crate) fn validate_numeric(&self, result: &mut ValidationResult) { + let current = self.instance.as_value(); + if let Some(num) = current.as_f64() { + if let Some(min) = self.schema.minimum { + if num < min { + result.errors.push(ValidationError { + code: "MINIMUM_VIOLATED".to_string(), + message: format!("Value {} < min {}", num, min), + path: self.path.to_string(), + }); + } + } + if let Some(max) = self.schema.maximum { + if num > max { + result.errors.push(ValidationError { + code: "MAXIMUM_VIOLATED".to_string(), + message: format!("Value {} > max {}", num, max), + path: self.path.to_string(), + }); + } + } + if let Some(ex_min) = self.schema.exclusive_minimum { + if num <= ex_min { + result.errors.push(ValidationError { + code: "EXCLUSIVE_MINIMUM_VIOLATED".to_string(), + message: format!("Value {} <= ex_min {}", num, ex_min), + path: self.path.to_string(), + }); + } + } + if let Some(ex_max) = self.schema.exclusive_maximum { + if num >= ex_max { + result.errors.push(ValidationError { + code: "EXCLUSIVE_MAXIMUM_VIOLATED".to_string(), + message: format!("Value {} >= ex_max {}", num, ex_max), + path: self.path.to_string(), + }); + } + } + if let Some(multiple_of) = self.schema.multiple_of { + let val = num / multiple_of; + if (val - val.round()).abs() > f64::EPSILON { + result.errors.push(ValidationError { + code: "MULTIPLE_OF_VIOLATED".to_string(), + message: format!("Value {} not multiple of {}", num, multiple_of), + path: self.path.to_string(), + }); + } + } + } + } + + pub(crate) fn validate_string(&self, result: &mut ValidationResult) { + let current = self.instance.as_value(); + if let Some(s) = current.as_str() { + if let Some(min) = self.schema.min_length { + if (s.chars().count() as f64) < min { + result.errors.push(ValidationError { + code: "MIN_LENGTH_VIOLATED".to_string(), + message: format!("Length < min {}", min), + path: self.path.to_string(), + }); + } + } + if let Some(max) = self.schema.max_length { + if (s.chars().count() as f64) > max { + result.errors.push(ValidationError { + code: "MAX_LENGTH_VIOLATED".to_string(), + message: format!("Length > max {}", max), + path: self.path.to_string(), + }); + } + } + if let Some(ref compiled_re) = self.schema.compiled_pattern { + if !compiled_re.0.is_match(s) { + result.errors.push(ValidationError { + code: "PATTERN_VIOLATED".to_string(), + message: format!("Pattern mismatch {:?}", self.schema.pattern), + path: self.path.to_string(), + }); + } + } else if let Some(ref pattern) = self.schema.pattern { + if let Ok(re) = Regex::new(pattern) { + if !re.is_match(s) { + result.errors.push(ValidationError { + code: "PATTERN_VIOLATED".to_string(), + message: format!("Pattern mismatch {}", pattern), + path: self.path.to_string(), + }); + } + } + } + } + } + + pub(crate) fn validate_format(&self, result: &mut ValidationResult) { + let current = self.instance.as_value(); + if let Some(ref compiled_fmt) = self.schema.compiled_format { + match compiled_fmt { + crate::compiler::CompiledFormat::Func(f) => { + let should = if let Some(s) = current.as_str() { + !s.is_empty() + } else { + true + }; + if should { + if let Err(e) = f(current) { + result.errors.push(ValidationError { + code: "FORMAT_MISMATCH".to_string(), + message: format!("Format error: {}", e), + path: self.path.to_string(), + }); + } + } + } + crate::compiler::CompiledFormat::Regex(re) => { + if let Some(s) = current.as_str() { + if !re.is_match(s) { + result.errors.push(ValidationError { + code: "FORMAT_MISMATCH".to_string(), + message: "Format regex mismatch".to_string(), + path: self.path.to_string(), + }); + } + } + } + } + } + } + + pub(crate) fn validate_object( + &self, + result: &mut ValidationResult, + ) -> Result<(), ValidationError> { + let current = self.instance.as_value(); + if let Some(obj) = current.as_object() { + if let Some(min) = self.schema.min_properties { + if (obj.len() as f64) < min { + result.errors.push(ValidationError { + code: "MIN_PROPERTIES".to_string(), + message: "Too few properties".to_string(), + path: self.path.to_string(), + }); + } + } + if let Some(max) = self.schema.max_properties { + if (obj.len() as f64) > max { + result.errors.push(ValidationError { + code: "MAX_PROPERTIES".to_string(), + message: "Too many properties".to_string(), + path: self.path.to_string(), + }); + } + } + if let Some(ref req) = self.schema.required { + for field in req { + if !obj.contains_key(field) { + result.errors.push(ValidationError { + code: "REQUIRED_FIELD_MISSING".to_string(), + message: format!("Missing {}", field), + path: format!("{}/{}", self.path, field), + }); + } + } + } + if let Some(ref dep_req) = self.schema.dependent_required { + for (key, required_keys) in dep_req { + if obj.contains_key(key) { + for req_key in required_keys { + if !obj.contains_key(req_key) { + result.errors.push(ValidationError { + code: "DEPENDENT_REQUIRED".to_string(), + message: format!("Missing dependent {}", req_key), + path: self.path.to_string(), + }); + } + } + } + } + } + if let Some(ref dep_sch) = self.schema.dependent_schemas { + for (key, sub_schema) in dep_sch { + if obj.contains_key(key) { + let derived = self.derive( + sub_schema, + self.instance, + &self.path, + self.scope.clone(), + HashSet::new(), + self.extensible, + false, + ); + result.merge(derived.validate()?); + } + } + } + + if let Some(props) = &self.schema.properties { + for (key, sub_schema) in props { + if self.overrides.contains(key) { + continue; + } + + if let Some(child_instance) = self.instance.child_at_key(key) { + let new_path = format!("{}/{}", self.path, key); + let is_ref = sub_schema.ref_string.is_some() || sub_schema.obj.dynamic_ref.is_some(); + let next_extensible = if is_ref { false } else { self.extensible }; + + let derived = self.derive( + sub_schema, + child_instance, + &new_path, + self.scope.clone(), + HashSet::new(), + next_extensible, + false, + ); + let item_res = derived.validate()?; + result.merge(item_res); + result.evaluated_keys.insert(key.clone()); + } + } + } + + if let Some(ref compiled_pp) = self.schema.compiled_pattern_properties { + for (compiled_re, sub_schema) in compiled_pp { + for (key, _) in obj { + if compiled_re.0.is_match(key) { + if let Some(child_instance) = self.instance.child_at_key(key) { + let new_path = format!("{}/{}", self.path, key); + let is_ref = + sub_schema.ref_string.is_some() || sub_schema.obj.dynamic_ref.is_some(); + let next_extensible = if is_ref { false } else { self.extensible }; + + let derived = self.derive( + sub_schema, + child_instance, + &new_path, + self.scope.clone(), + HashSet::new(), + next_extensible, + false, + ); + let item_res = derived.validate()?; + result.merge(item_res); + result.evaluated_keys.insert(key.clone()); + } + } + } + } + } + + if let Some(ref additional_schema) = self.schema.additional_properties { + for (key, _) in obj { + let mut locally_matched = false; + if let Some(props) = &self.schema.properties { + if props.contains_key(key) { + locally_matched = true; + } + } + if !locally_matched { + if let Some(ref compiled_pp) = self.schema.compiled_pattern_properties { + for (compiled_re, _) in compiled_pp { + if compiled_re.0.is_match(key) { + locally_matched = true; + break; + } + } + } + } + + if !locally_matched { + if let Some(child_instance) = self.instance.child_at_key(key) { + let new_path = format!("{}/{}", self.path, key); + let is_ref = additional_schema.ref_string.is_some() + || additional_schema.obj.dynamic_ref.is_some(); + let next_extensible = if is_ref { false } else { self.extensible }; + + let derived = self.derive( + additional_schema, + child_instance, + &new_path, + self.scope.clone(), + HashSet::new(), + next_extensible, + false, + ); + let item_res = derived.validate()?; + result.merge(item_res); + result.evaluated_keys.insert(key.clone()); + } + } + } + } + + if let Some(ref property_names) = self.schema.property_names { + for key in obj.keys() { + let _new_path = format!("{}/propertyNames/{}", self.path, key); + let val_str = Value::String(key.clone()); + + let ctx = ValidationContext::new( + self.validator, + self.root, + property_names, + crate::validator::ReadOnlyInstance(&val_str), + self.scope.clone(), + HashSet::new(), + self.extensible, + self.reporter, + ); + + result.merge(ctx.validate()?); + } + } + } + + if !self.extensible { + self.instance.prune_object(&result.evaluated_keys); + } + + Ok(()) + } + + pub(crate) fn validate_array( + &self, + result: &mut ValidationResult, + ) -> Result<(), ValidationError> { + let current = self.instance.as_value(); + if let Some(arr) = current.as_array() { + if let Some(min) = self.schema.min_items { + if (arr.len() as f64) < min { + result.errors.push(ValidationError { + code: "MIN_ITEMS".to_string(), + message: "Too few items".to_string(), + path: self.path.to_string(), + }); + } + } + if let Some(max) = self.schema.max_items { + if (arr.len() as f64) > max { + result.errors.push(ValidationError { + code: "MAX_ITEMS".to_string(), + message: "Too many items".to_string(), + path: self.path.to_string(), + }); + } + } + + if self.schema.unique_items.unwrap_or(false) { + let mut seen: Vec<&Value> = Vec::new(); + for item in arr { + if seen.contains(&item) { + result.errors.push(ValidationError { + code: "UNIQUE_ITEMS_VIOLATED".to_string(), + message: "Array has duplicate items".to_string(), + path: self.path.to_string(), + }); + break; + } + seen.push(item); + } + } + + if let Some(ref contains_schema) = self.schema.contains { + let mut _match_count = 0; + for i in 0..arr.len() { + if let Some(child_instance) = self.instance.child_at_index(i) { + let derived = self.derive( + contains_schema, + child_instance, + &self.path, + self.scope.clone(), + HashSet::new(), + self.extensible, + false, + ); + + let check = derived.validate()?; + if check.is_valid() { + _match_count += 1; + result.evaluated_indices.insert(i); + } + } + } + + let min = self.schema.min_contains.unwrap_or(1.0) as usize; + if _match_count < min { + result.errors.push(ValidationError { + code: "CONTAINS_VIOLATED".to_string(), + message: format!("Contains matches {} < min {}", _match_count, min), + path: self.path.to_string(), + }); + } + if let Some(max) = self.schema.max_contains { + if _match_count > max as usize { + result.errors.push(ValidationError { + code: "CONTAINS_VIOLATED".to_string(), + message: format!("Contains matches {} > max {}", _match_count, max), + path: self.path.to_string(), + }); + } + } + } + + let len = arr.len(); + let mut validation_index = 0; + + if let Some(ref prefix) = self.schema.prefix_items { + for (i, sub_schema) in prefix.iter().enumerate() { + if i < len { + let path = format!("{}/{}", self.path, i); + if let Some(child_instance) = self.instance.child_at_index(i) { + let derived = self.derive( + sub_schema, + child_instance, + &path, + self.scope.clone(), + HashSet::new(), + self.extensible, + false, + ); + let item_res = derived.validate()?; + result.merge(item_res); + result.evaluated_indices.insert(i); + validation_index += 1; + } + } + } + } + + if let Some(ref items_schema) = self.schema.items { + for i in validation_index..len { + let path = format!("{}/{}", self.path, i); + if let Some(child_instance) = self.instance.child_at_index(i) { + let derived = self.derive( + items_schema, + child_instance, + &path, + self.scope.clone(), + HashSet::new(), + self.extensible, + false, + ); + let item_res = derived.validate()?; + result.merge(item_res); + result.evaluated_indices.insert(i); + } + } + } + } + + if !self.extensible { + self.instance.prune_array(&result.evaluated_indices); + } + + Ok(()) + } + + pub(crate) fn validate_combinators( + &self, + result: &mut ValidationResult, + ) -> Result<(), ValidationError> { + if let Some(ref all_of) = self.schema.all_of { + for sub in all_of { + let derived = self.derive_for_schema(sub, true); + let res = derived.validate()?; + result.merge(res); + } + } + + if let Some(ref any_of) = self.schema.any_of { + let mut valid = false; + + for sub in any_of { + let derived = self.derive_for_schema(sub, true); + let sub_res = derived.validate()?; + if sub_res.is_valid() { + valid = true; + result.merge(sub_res); + } + } + + if !valid { + result.errors.push(ValidationError { + code: "ANY_OF_VIOLATED".to_string(), + message: "Matches none of anyOf schemas".to_string(), + path: self.path.to_string(), + }); + } + } + + if let Some(ref one_of) = self.schema.one_of { + let mut valid_count = 0; + let mut valid_res = ValidationResult::new(); + + for sub in one_of { + let derived = self.derive_for_schema(sub, true); + let sub_res = derived.validate()?; + if sub_res.is_valid() { + valid_count += 1; + valid_res = sub_res; + } + } + + if valid_count == 1 { + result.merge(valid_res); + } else if valid_count == 0 { + result.errors.push(ValidationError { + code: "ONE_OF_VIOLATED".to_string(), + message: "Matches none of oneOf schemas".to_string(), + path: self.path.to_string(), + }); + } else { + result.errors.push(ValidationError { + code: "ONE_OF_VIOLATED".to_string(), + message: format!("Matches {} of oneOf schemas (expected 1)", valid_count), + path: self.path.to_string(), + }); + } + } + + if let Some(ref not_schema) = self.schema.not { + let derived = self.derive_for_schema(not_schema, true); + let sub_res = derived.validate()?; + if sub_res.is_valid() { + result.errors.push(ValidationError { + code: "NOT_VIOLATED".to_string(), + message: "Matched 'not' schema".to_string(), + path: self.path.to_string(), + }); + } + } + + Ok(()) + } + + pub(crate) fn validate_conditionals( + &self, + result: &mut ValidationResult, + ) -> Result<(), ValidationError> { + if let Some(ref if_schema) = self.schema.if_ { + let derived_if = self.derive_for_schema(if_schema, true); + let if_res = derived_if.validate()?; + + result.evaluated_keys.extend(if_res.evaluated_keys.clone()); + result + .evaluated_indices + .extend(if_res.evaluated_indices.clone()); + + if if_res.is_valid() { + if let Some(ref then_schema) = self.schema.then_ { + let derived_then = self.derive_for_schema(then_schema, true); + result.merge(derived_then.validate()?); + } + } else { + if let Some(ref else_schema) = self.schema.else_ { + let derived_else = self.derive_for_schema(else_schema, true); + result.merge(derived_else.validate()?); + } + } + } + + Ok(()) + } + + pub(crate) fn validate_strictness(&self, result: &mut ValidationResult) { + if self.extensible || self.reporter { + return; + } + + if let Some(obj) = self.instance.as_value().as_object() { + for key in obj.keys() { + if !result.evaluated_keys.contains(key) && !self.overrides.contains(key) { + result.errors.push(ValidationError { + code: "STRICT_PROPERTY_VIOLATION".to_string(), + message: format!("Unexpected property '{}'", key), + path: format!("{}/{}", self.path, key), + }); + } + } + } + + if let Some(arr) = self.instance.as_value().as_array() { + for i in 0..arr.len() { + if !result.evaluated_indices.contains(&i) { + result.errors.push(ValidationError { + code: "STRICT_ITEM_VIOLATION".to_string(), + message: format!("Unexpected item at index {}", i), + path: format!("{}/{}", self.path, i), + }); + } + } + } + } +} diff --git a/src/schema.rs b/src/schema.rs index 710b0ce..3277828 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -35,6 +35,9 @@ pub struct SchemaObject { pub pattern_properties: Option>>, #[serde(rename = "additionalProperties")] pub additional_properties: Option>, + #[serde(rename = "$family")] + pub family: Option, + pub required: Option>, // dependencies can be schema dependencies or property dependencies diff --git a/src/util.rs b/src/util.rs index 2e50080..f427576 100644 --- a/src/util.rs +++ b/src/util.rs @@ -50,74 +50,12 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> { let group = &suite[index]; let mut failures = Vec::::new(); - // Create Local Registry for this test group - let mut registry = crate::registry::Registry::new(); - - // Helper to register items with 'schemas' - let register_schemas = |registry: &mut crate::registry::Registry, items_val: Option<&Value>| { - if let Some(val) = items_val { - if let Value::Array(arr) = val { - for item in arr { - if let Some(schemas_val) = item.get("schemas") { - if let Value::Array(schemas) = schemas_val { - for schema_val in schemas { - if let Ok(schema) = - serde_json::from_value::(schema_val.clone()) - { - registry.add(schema); - } - } - } - } - } - } - } - }; - - // 1. Register Family Schemas if 'types' is present - if let Some(types_val) = &group.types { - if let Value::Array(arr) = types_val { - let mut family_map: std::collections::HashMap> = - std::collections::HashMap::new(); - - for item in arr { - if let Some(name) = item.get("name").and_then(|v| v.as_str()) { - if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) { - for ancestor in hierarchy { - if let Some(anc_str) = ancestor.as_str() { - family_map - .entry(anc_str.to_string()) - .or_default() - .insert(name.to_string()); - } - } - } - } - } - - for (family_name, members) in family_map { - let id = format!("{}.family", family_name); - let object_refs: Vec = members - .iter() - .map(|s| serde_json::json!({ "$ref": s })) - .collect(); - - let schema_json = serde_json::json!({ - "$id": id, - "oneOf": object_refs - }); - - if let Ok(schema) = serde_json::from_value::(schema_json) { - registry.add(schema); - } - } - } - } - - // 2. Register items directly - register_schemas(&mut registry, group.enums.as_ref()); - register_schemas(&mut registry, group.types.as_ref()); - register_schemas(&mut registry, group.puncs.as_ref()); + // Create Validator Instance and parse enums, types, and puncs automatically + let mut validator = Validator::from_punc_definition( + group.enums.as_ref(), + group.types.as_ref(), + group.puncs.as_ref(), + ); // 3. Register root 'schemas' if present (generic test support) // Some tests use a raw 'schema' or 'schemas' field at the group level @@ -126,12 +64,12 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> { Ok(mut schema) => { let id_clone = schema.obj.id.clone(); if id_clone.is_some() { - registry.add(schema); + validator.registry.add(schema); } else { // Fallback ID if none provided in schema let id = format!("test:{}:{}", path, index); schema.obj.id = Some(id); - registry.add(schema); + validator.registry.add(schema); } } Err(e) => { @@ -143,9 +81,6 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> { } } - // Create Validator Instance (Takes ownership of registry) - let validator = Validator::new(registry); - // 4. Run Tests for (_test_index, test) in group.tests.iter().enumerate() { let mut schema_id = test.schema_id.clone(); @@ -251,79 +186,13 @@ pub fn run_test_file(path: &str) -> Result<(), String> { let mut failures = Vec::::new(); for (group_index, group) in suite.into_iter().enumerate() { - // Create Isolated Registry for this test group - let mut registry = crate::registry::Registry::new(); + // Create Validator Instance and parse enums, types, and puncs automatically + let mut validator = Validator::from_punc_definition( + group.enums.as_ref(), + group.types.as_ref(), + group.puncs.as_ref(), + ); - // Helper to register items with 'schemas' - let register_schemas = |registry: &mut crate::registry::Registry, items_val: Option| { - if let Some(val) = items_val { - if let Value::Array(arr) = val { - for item in arr { - if let Some(schemas_val) = item.get("schemas") { - if let Value::Array(schemas) = schemas_val { - for schema_val in schemas { - if let Ok(schema) = - serde_json::from_value::(schema_val.clone()) - { - registry.add(schema); - } - } - } - } - } - } - } - }; - - // 1. Register Family Schemas if 'types' is present - if let Some(types_val) = &group.types { - if let Value::Array(arr) = types_val { - let mut family_map: std::collections::HashMap> = - std::collections::HashMap::new(); - - for item in arr { - if let Some(name) = item.get("name").and_then(|v| v.as_str()) { - // Default hierarchy contains self if not specified? - // Usually hierarchy is explicit in these tests. - if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) { - for ancestor in hierarchy { - if let Some(anc_str) = ancestor.as_str() { - family_map - .entry(anc_str.to_string()) - .or_default() - .insert(name.to_string()); - } - } - } - } - } - - for (family_name, members) in family_map { - let id = format!("{}.family", family_name); - let object_refs: Vec = members - .into_iter() - .map(|s| serde_json::json!({ "$ref": s })) - .collect(); - - let schema_json = serde_json::json!({ - "$id": id, - "oneOf": object_refs - }); - - if let Ok(schema) = serde_json::from_value::(schema_json) { - registry.add(schema); - } - } - } - } - - // Register 'types', 'enums', and 'puncs' if present (JSPG style) - register_schemas(&mut registry, group.types); - register_schemas(&mut registry, group.enums); - register_schemas(&mut registry, group.puncs); - - // Register main 'schema' if present (Standard style) - // Ensure ID is a valid URI to avoid Url::parse errors in Compiler let unique_id = format!("test:{}:{}", path, group_index); // Register main 'schema' if present (Standard style) @@ -336,12 +205,9 @@ pub fn run_test_file(path: &str) -> Result<(), String> { if schema.obj.id.is_none() { schema.obj.id = Some(unique_id.clone()); } - registry.add(schema); + validator.registry.add(schema); } - // Create Instance (Takes Ownership) - let validator = Validator::new(registry); - for test in group.tests { // Use explicit schema_id from test, or default to unique_id let schema_id = test.schema_id.as_deref().unwrap_or(&unique_id).to_string(); diff --git a/src/validator.rs b/src/validator.rs index e398f42..c93fd76 100644 --- a/src/validator.rs +++ b/src/validator.rs @@ -1,1282 +1,86 @@ +pub use crate::context::ValidationContext; +pub use crate::error::ValidationError; +pub use crate::instance::{MutableInstance, ReadOnlyInstance}; +pub use crate::result::ValidationResult; + use crate::registry::Registry; use crate::schema::Schema; -use percent_encoding; - -use regex::Regex; use serde_json::Value; use std::collections::HashSet; - -#[derive(Debug, Clone, serde::Serialize)] -pub struct ValidationError { - pub code: String, - pub message: String, - pub path: String, -} +use std::sync::Arc; pub enum ResolvedRef<'a> { Local(&'a Schema), Global(&'a Schema, &'a Schema), } -#[derive(Debug, Default, Clone, serde::Serialize)] -pub struct ValidationResult { - pub errors: Vec, - #[serde(skip)] - pub evaluated_keys: HashSet, - #[serde(skip)] - pub evaluated_indices: HashSet, -} - -impl ValidationResult { - pub fn new() -> Self { - Self::default() - } - - pub fn merge(&mut self, other: ValidationResult) { - self.errors.extend(other.errors); - self.evaluated_keys.extend(other.evaluated_keys); - self.evaluated_indices.extend(other.evaluated_indices); - } - - pub fn is_valid(&self) -> bool { - self.errors.is_empty() - } -} - -use std::ptr::NonNull; - -pub trait ValidationInstance<'a>: Copy + Clone { - fn as_value(&self) -> &'a Value; - fn child_at_key(&self, key: &str) -> Option; - fn child_at_index(&self, idx: usize) -> Option; - fn prune_object(&self, _keys: &HashSet) {} - fn prune_array(&self, _indices: &HashSet) {} -} - -#[derive(Clone, Copy)] -pub struct ReadOnlyInstance<'a>(pub &'a Value); - -impl<'a> ValidationInstance<'a> for ReadOnlyInstance<'a> { - fn as_value(&self) -> &'a Value { - self.0 - } - - fn child_at_key(&self, key: &str) -> Option { - self.0.get(key).map(ReadOnlyInstance) - } - - fn child_at_index(&self, idx: usize) -> Option { - self.0.get(idx).map(ReadOnlyInstance) - } -} - -#[derive(Clone, Copy)] -pub struct MutableInstance { - ptr: NonNull, -} - -impl MutableInstance { - pub fn new(val: &mut Value) -> Self { - Self { - ptr: NonNull::from(val), - } - } -} - -impl<'a> ValidationInstance<'a> for MutableInstance { - fn as_value(&self) -> &'a Value { - unsafe { self.ptr.as_ref() } - } - - fn child_at_key(&self, key: &str) -> Option { - unsafe { - if let Some(obj) = self.ptr.as_ref().as_object() { - // We use as_ref() to check existence (safe read). - if obj.contains_key(key) { - // Now we need mutable pointer to child. - // Since we have *mut parent, and we know key exists... - // casting *mut Value -> &mut Value -> get_mut -> *mut Value - // This is safe because we are single threaded and not holding other refs across this call. - let parent_mut = &mut *self.ptr.as_ptr(); - if let Some(child_val) = parent_mut.get_mut(key) { - return Some(MutableInstance::new(child_val)); - } - } - } - None - } - } - - fn child_at_index(&self, idx: usize) -> Option { - unsafe { - if let Some(arr) = self.ptr.as_ref().as_array() { - if idx < arr.len() { - let parent_mut = &mut *self.ptr.as_ptr(); - if let Some(child_val) = parent_mut.get_mut(idx) { - return Some(MutableInstance::new(child_val)); - } - } - } - None - } - } - - fn prune_object(&self, keys: &HashSet) { - unsafe { - // We must re-acquire mutable reference from pointer - let val_mut = &mut *self.ptr.as_ptr(); - if let Some(obj) = val_mut.as_object_mut() { - obj.retain(|k, _| keys.contains(k)); - } - } - } - - fn prune_array(&self, indices: &HashSet) { - unsafe { - let val_mut = &mut *self.ptr.as_ptr(); - if let Some(arr) = val_mut.as_array_mut() { - let mut i = 0; - arr.retain(|_| { - let keep = indices.contains(&i); - i += 1; - keep - }); - } - } - } -} - -pub struct ValidationContext<'a, I: ValidationInstance<'a>> { - // 1. Global (The Library) - now passed as reference - pub registry: &'a Registry, - pub root: &'a Schema, - - // 2. The Instruction (The Rule) - pub schema: &'a Schema, - - // 3. The Data (The Instance) - pub instance: I, - - // 4. State - pub path: String, - pub depth: usize, - pub scope: Vec, // OWNED to avoid lifetime hell - - // 5. Config - pub overrides: HashSet, // Keywords explicitly defined by callers that I should skip (Inherited Mask) - pub extensible: bool, - pub reporter: bool, // If true, we only report evaluated keys, don't enforce strictness -} - -impl<'a, I: ValidationInstance<'a>> ValidationContext<'a, I> { - pub fn new( - registry: &'a Registry, - root: &'a Schema, - schema: &'a Schema, - instance: I, - scope: Vec, - overrides: HashSet, - extensible: bool, - reporter: bool, - ) -> Self { - let effective_extensible = schema.extensible.unwrap_or(extensible); - Self { - registry, - root, - schema, - instance, - path: String::new(), - depth: 0, - scope, - overrides, - extensible: effective_extensible, - reporter, - } - } - - pub fn derive( - &self, - schema: &'a Schema, - instance: I, // We take I directly (it is Copy) - path: &str, - scope: Vec, - overrides: HashSet, - extensible: bool, - reporter: bool, - ) -> Self { - let effective_extensible = schema.extensible.unwrap_or(extensible); - - Self { - registry: self.registry, - root: self.root, - schema, - instance, - path: path.to_string(), - depth: self.depth + 1, - scope, - overrides, - extensible: effective_extensible, - reporter, - } - } - - // Helper to derive for same instance but different schema (e.g. allOf) - pub fn derive_for_schema(&self, schema: &'a Schema, reporter: bool) -> Self { - self.derive( - schema, - self.instance, // Copy - &self.path, - self.scope.clone(), - HashSet::new(), // Reset overrides for composition/branching (Strict Intersection) - self.extensible, // Inherited extensibility doesn't change for same-level schema switch - reporter, - ) - } - - // --- Main Validation Entry Point --- - - pub fn validate(&self) -> Result { - // Check if we need to update scope due to ID - let mut effective_scope = self.scope.clone(); - - if let Some(id) = &self.schema.obj.id { - let current_base = self.scope.last().map(|s| s.as_str()).unwrap_or(""); - let mut new_base = id.clone(); - if !current_base.is_empty() { - if let Ok(base_url) = url::Url::parse(current_base) { - if let Ok(joined) = base_url.join(id) { - new_base = joined.to_string(); - } - } - } - - effective_scope.push(new_base); - - // If scope changed, we create a shadow context to use the new scope - let shadow = ValidationContext { - registry: self.registry, - root: self.root, - schema: self.schema, - instance: self.instance, - path: self.path.clone(), - depth: self.depth, - scope: effective_scope, - overrides: self.overrides.clone(), - extensible: self.extensible, - reporter: self.reporter, - }; - return shadow.validate_scoped(); - } - - // If no ID change, proceed - self.validate_scoped() - } - - fn validate_scoped(&self) -> Result { - let mut result = ValidationResult::new(); - - if self.depth > 100 { - return Err(ValidationError { - code: "RECURSION_LIMIT_EXCEEDED".to_string(), - message: "Recursion limit exceeded".to_string(), - path: self.path.to_string(), - }); - } - - if self.schema.always_fail { - result.errors.push(ValidationError { - code: "FALSE_SCHEMA".to_string(), - message: "Schema is false".to_string(), - path: self.path.to_string(), - }); - return Ok(result); - } - - // --- Helpers Groups --- - - if let Some(ref_res) = self.validate_refs()? { - result.merge(ref_res); - } - - // 2. Core - self.validate_core(&mut result); - - // 3. Numeric - self.validate_numeric(&mut result); - - // 4. String - self.validate_string(&mut result); - - // 5. Format - self.validate_format(&mut result); - - // 6. Object - self.validate_object(&mut result)?; - - // 7. Array - self.validate_array(&mut result)?; - - // 8. Combinators - self.validate_combinators(&mut result)?; - - // 9. Conditionals - self.validate_conditionals(&mut result)?; - - // If extensible, mark all as evaluated so strictness checks pass and parents don't complain - if self.extensible { - let current = self.instance.as_value(); - if let Some(obj) = current.as_object() { - result.evaluated_keys.extend(obj.keys().cloned()); - } else if let Some(arr) = current.as_array() { - result.evaluated_indices.extend(0..arr.len()); - } - } - - // --- Strictness Check --- - if !self.reporter { - self.check_strictness(&mut result); - } - - Ok(result) - } - - fn validate_refs(&self) -> Result, ValidationError> { - let mut res = ValidationResult::new(); - let mut handled = false; - - // Scope is already effective due to validate() wrapper! - // self.scope is Vec - let effective_scope = &self.scope; - let current_base_resolved = effective_scope.last().map(|s| s.as_str()).unwrap_or(""); - // Removed unused current binding - - // $ref - if let Some(ref ref_string) = self.schema.ref_string { - handled = true; - if ref_string == "#" { - // Self-reference to root - // Calculate new overrides (Masking) - let mut new_overrides = self.overrides.clone(); - if let Some(props) = &self.schema.properties { - new_overrides.extend(props.keys().cloned()); - } - - let derived = self.derive( - self.root, - self.instance, // Copy - &self.path, - effective_scope.clone(), - new_overrides, - self.extensible, - self.reporter, // Inherit so efficient composition (allOf) works, but property refs stay strict - ); - res.merge(derived.validate()?); - } else { - if let Some((resolved, matched_key)) = - Validator::resolve_ref(self.registry, self.root, ref_string, current_base_resolved) - { - let (target_root, target_schema) = match &resolved { - ResolvedRef::Local(s) => (self.root, *s), - ResolvedRef::Global(root, s) => (*root, *s), - }; - - // Scope Injection - let resource_base = if let Some((base, _)) = matched_key.split_once('#') { - base - } else { - &matched_key - }; - - let scope_to_pass = if target_schema.obj.id.is_none() { - if !resource_base.is_empty() && resource_base != current_base_resolved { - let mut new_scope = effective_scope.clone(); - new_scope.push(resource_base.to_string()); - new_scope - } else { - effective_scope.clone() - } - } else { - effective_scope.clone() - }; - - // Calculate new overrides (Masking) - let mut new_overrides = self.overrides.clone(); - if let Some(props) = &self.schema.properties { - new_overrides.extend(props.keys().cloned()); - } - - let target_ctx = ValidationContext::new( - self.registry, - target_root, - target_schema, - self.instance, // Copy - scope_to_pass, - new_overrides, - false, // Reset extensibility for $ref (Default Strict) - self.reporter, // Propagate reporter state - ); - // Manually set path/depth to continue trace - let mut manual_ctx = target_ctx; - manual_ctx.path = self.path.clone(); - manual_ctx.depth = self.depth + 1; - - let target_res = manual_ctx.validate()?; - - res.merge(target_res); - handled = true; - } else { - res.errors.push(ValidationError { - code: "REF_RESOLUTION_FAILED".to_string(), - message: format!("Could not resolve reference '{}'", ref_string), - path: self.path.to_string(), - }); - } - } - } - - // $dynamicRef - if let Some(ref d_ref) = self.schema.obj.dynamic_ref { - handled = true; - let anchor = if let Some(idx) = d_ref.rfind('#') { - &d_ref[idx + 1..] - } else { - d_ref.as_str() - }; - - let mut resolved_target: Option<(ResolvedRef, String)> = None; - let local_resolution = - Validator::resolve_ref(self.registry, self.root, d_ref, current_base_resolved); - - // Bookending - let is_bookended = if let Some((ResolvedRef::Local(s), _)) = &local_resolution { - s.obj.dynamic_anchor.as_deref() == Some(anchor) - } else { - false - }; - - if is_bookended { - // Dynamic Search - for base in effective_scope.iter() { - let resource_base = if let Some((r, _)) = base.split_once('#') { - r - } else { - base - }; - let key = format!("{}#{}", resource_base, anchor); - - // Local - if let Some(indexrs) = &self.root.obj.compiled_registry { - if let Some(s) = indexrs.schemas.get(&key) { - if s.obj.dynamic_anchor.as_deref() == Some(anchor) { - resolved_target = Some((ResolvedRef::Local(s.as_ref()), key.clone())); - break; - } - } - } - // Global - if resolved_target.is_none() { - if let Some(registry_arc) = &self.root.obj.compiled_registry { - if let Some(compiled) = registry_arc.schemas.get(resource_base) { - if let Some(indexrs) = &compiled.obj.compiled_registry { - if let Some(s) = indexrs.schemas.get(&key) { - if s.obj.dynamic_anchor.as_deref() == Some(anchor) { - resolved_target = Some(( - ResolvedRef::Global(compiled.as_ref(), s.as_ref()), - key.clone(), - )); - break; - } - } - } - } - } else { - // Try global registry directly if root doesn't have it (e.g. cross-file) - if let Some(compiled) = self.registry.schemas.get(resource_base) { - if let Some(indexrs) = &compiled.obj.compiled_registry { - if let Some(s) = indexrs.schemas.get(&key) { - if s.obj.dynamic_anchor.as_deref() == Some(anchor) { - resolved_target = Some(( - ResolvedRef::Global(compiled.as_ref(), s.as_ref()), - key.clone(), - )); - break; - } - } - } - } - } - } - if resolved_target.is_some() { - break; - } - } - } - - if resolved_target.is_none() { - resolved_target = local_resolution; - } - - if let Some((resolved, matched_key)) = resolved_target { - let (target_root, target_schema) = match &resolved { - ResolvedRef::Local(s) => (self.root, *s), - ResolvedRef::Global(root, s) => (*root, *s), - }; - - let resource_base = if let Some((base, _)) = matched_key.split_once('#') { - base - } else { - &matched_key - }; - - let scope_to_pass = if let Some(ref tid) = target_schema.obj.id { - let mut new_scope = effective_scope.clone(); - new_scope.push(tid.clone()); - new_scope - } else { - if !resource_base.is_empty() && resource_base != current_base_resolved { - let mut new_scope = effective_scope.clone(); - new_scope.push(resource_base.to_string()); - new_scope - } else { - effective_scope.clone() - } - }; - - // Calculate new overrides (Masking) - let mut new_overrides = self.overrides.clone(); - if let Some(props) = &self.schema.properties { - new_overrides.extend(props.keys().cloned()); - } - - let target_ctx = ValidationContext::new( - self.registry, - target_root, - target_schema, - self.instance, // Copy - scope_to_pass, - new_overrides, - false, - self.reporter, // Propagate reporter - ); - let mut manual_ctx = target_ctx; - manual_ctx.path = self.path.clone(); - manual_ctx.depth = self.depth + 1; - // manual_ctx.reporter = true; - - res.merge(manual_ctx.validate()?); - } else { - res.errors.push(ValidationError { - code: "REF_RESOLUTION_FAILED".to_string(), - message: format!("Could not resolve dynamic reference '{}'", d_ref), - path: self.path.to_string(), - }); - } - } - - if handled { Ok(Some(res)) } else { Ok(None) } - } - - fn validate_core(&self, result: &mut ValidationResult) { - let current = self.instance.as_value(); - // Type - if let Some(ref type_) = self.schema.type_ { - match type_ { - crate::schema::SchemaTypeOrArray::Single(t) => { - if !Validator::check_type(t, current) { - result.errors.push(ValidationError { - code: "INVALID_TYPE".to_string(), - message: format!("Expected type '{}'", t), - path: self.path.to_string(), - }); - } - } - crate::schema::SchemaTypeOrArray::Multiple(types) => { - let mut valid = false; - for t in types { - if Validator::check_type(t, current) { - valid = true; - break; - } - } - if !valid { - result.errors.push(ValidationError { - code: "INVALID_TYPE".to_string(), - message: format!("Expected one of types {:?}", types), - path: self.path.to_string(), - }); - } - } - } - } - - // Const - if let Some(ref const_val) = self.schema.const_ { - if !crate::util::equals(current, const_val) { - result.errors.push(ValidationError { - code: "CONST_VIOLATED".to_string(), - message: "Value does not match const".to_string(), - path: self.path.to_string(), - }); - } else { - if let Some(obj) = current.as_object() { - result.evaluated_keys.extend(obj.keys().cloned()); - } else if let Some(arr) = current.as_array() { - result.evaluated_indices.extend(0..arr.len()); - } - } - } - - // Enum - if let Some(ref enum_vals) = self.schema.enum_ { - let mut found = false; - for val in enum_vals { - if crate::util::equals(current, val) { - found = true; - break; - } - } - if !found { - result.errors.push(ValidationError { - code: "ENUM_MISMATCH".to_string(), - message: "Value is not in enum".to_string(), - path: self.path.to_string(), - }); - } else { - if let Some(obj) = current.as_object() { - result.evaluated_keys.extend(obj.keys().cloned()); - } else if let Some(arr) = current.as_array() { - result.evaluated_indices.extend(0..arr.len()); - } - } - } - } - - fn validate_numeric(&self, result: &mut ValidationResult) { - let current = self.instance.as_value(); - if let Some(num) = current.as_f64() { - if let Some(min) = self.schema.minimum { - if num < min { - result.errors.push(ValidationError { - code: "MINIMUM_VIOLATED".to_string(), - message: format!("Value {} < min {}", num, min), - path: self.path.to_string(), - }); - } - } - if let Some(max) = self.schema.maximum { - if num > max { - result.errors.push(ValidationError { - code: "MAXIMUM_VIOLATED".to_string(), - message: format!("Value {} > max {}", num, max), - path: self.path.to_string(), - }); - } - } - if let Some(ex_min) = self.schema.exclusive_minimum { - if num <= ex_min { - result.errors.push(ValidationError { - code: "EXCLUSIVE_MINIMUM_VIOLATED".to_string(), - message: format!("Value {} <= ex_min {}", num, ex_min), - path: self.path.to_string(), - }); - } - } - if let Some(ex_max) = self.schema.exclusive_maximum { - if num >= ex_max { - result.errors.push(ValidationError { - code: "EXCLUSIVE_MAXIMUM_VIOLATED".to_string(), - message: format!("Value {} >= ex_max {}", num, ex_max), - path: self.path.to_string(), - }); - } - } - if let Some(multiple_of) = self.schema.multiple_of { - let val = num / multiple_of; - if (val - val.round()).abs() > f64::EPSILON { - result.errors.push(ValidationError { - code: "MULTIPLE_OF_VIOLATED".to_string(), - message: format!("Value {} not multiple of {}", num, multiple_of), - path: self.path.to_string(), - }); - } - } - } - } - - fn validate_string(&self, result: &mut ValidationResult) { - let current = self.instance.as_value(); - if let Some(s) = current.as_str() { - if let Some(min) = self.schema.min_length { - if (s.chars().count() as f64) < min { - result.errors.push(ValidationError { - code: "MIN_LENGTH_VIOLATED".to_string(), - message: format!("Length < min {}", min), - path: self.path.to_string(), - }); - } - } - if let Some(max) = self.schema.max_length { - if (s.chars().count() as f64) > max { - result.errors.push(ValidationError { - code: "MAX_LENGTH_VIOLATED".to_string(), - message: format!("Length > max {}", max), - path: self.path.to_string(), - }); - } - } - if let Some(ref compiled_re) = self.schema.compiled_pattern { - if !compiled_re.0.is_match(s) { - result.errors.push(ValidationError { - code: "PATTERN_VIOLATED".to_string(), - message: format!("Pattern mismatch {:?}", self.schema.pattern), - path: self.path.to_string(), - }); - } - } else if let Some(ref pattern) = self.schema.pattern { - if let Ok(re) = Regex::new(pattern) { - if !re.is_match(s) { - result.errors.push(ValidationError { - code: "PATTERN_VIOLATED".to_string(), - message: format!("Pattern mismatch {}", pattern), - path: self.path.to_string(), - }); - } - } - } - } - } - - fn validate_format(&self, result: &mut ValidationResult) { - let current = self.instance.as_value(); - if let Some(ref compiled_fmt) = self.schema.compiled_format { - match compiled_fmt { - crate::compiler::CompiledFormat::Func(f) => { - let should = if let Some(s) = current.as_str() { - !s.is_empty() - } else { - true - }; - if should { - if let Err(e) = f(current) { - result.errors.push(ValidationError { - code: "FORMAT_MISMATCH".to_string(), - message: format!("Format error: {}", e), - path: self.path.to_string(), - }); - } - } - } - crate::compiler::CompiledFormat::Regex(re) => { - if let Some(s) = current.as_str() { - if !re.is_match(s) { - result.errors.push(ValidationError { - code: "FORMAT_MISMATCH".to_string(), - message: "Format regex mismatch".to_string(), - path: self.path.to_string(), - }); - } - } - } - } - } - } - - fn validate_object(&self, result: &mut ValidationResult) -> Result<(), ValidationError> { - let current = self.instance.as_value(); - if let Some(obj) = current.as_object() { - // 1. Min Properties - if let Some(min) = self.schema.min_properties { - if (obj.len() as f64) < min { - result.errors.push(ValidationError { - code: "MIN_PROPERTIES".to_string(), - message: "Too few properties".to_string(), - path: self.path.to_string(), - }); - } - } - if let Some(max) = self.schema.max_properties { - if (obj.len() as f64) > max { - result.errors.push(ValidationError { - code: "MAX_PROPERTIES".to_string(), - message: "Too many properties".to_string(), - path: self.path.to_string(), - }); - } - } - // 2. Required - if let Some(ref req) = self.schema.required { - for field in req { - if !obj.contains_key(field) { - result.errors.push(ValidationError { - code: "REQUIRED_FIELD_MISSING".to_string(), - message: format!("Missing {}", field), - path: format!("{}/{}", self.path, field), - }); - } - } - } - // 3. Dependent Required - if let Some(ref dep_req) = self.schema.dependent_required { - for (key, required_keys) in dep_req { - if obj.contains_key(key) { - for req_key in required_keys { - if !obj.contains_key(req_key) { - result.errors.push(ValidationError { - code: "DEPENDENT_REQUIRED".to_string(), - message: format!("Missing dependent {}", req_key), - path: self.path.to_string(), - }); - } - } - } - } - } - // 4. Dependent Schemas - if let Some(ref dep_sch) = self.schema.dependent_schemas { - for (key, sub_schema) in dep_sch { - if obj.contains_key(key) { - // Dependent Schema applies to the CURRENT INSTANCE. - // Reporter = true (merges results). - let derived = self.derive( - sub_schema, - self.instance, // Copy - &self.path, - self.scope.clone(), - HashSet::new(), - self.extensible, - false, - ); - result.merge(derived.validate()?); - } - } - } - - // 5. Properties - if let Some(props) = &self.schema.properties { - for (key, sub_schema) in props { - // Implicit Shadowing Check - if self.overrides.contains(key) { - continue; - } - - if let Some(child_instance) = self.instance.child_at_key(key) { - let new_path = format!("{}/{}", self.path, key); - - let is_ref = sub_schema.ref_string.is_some() || sub_schema.obj.dynamic_ref.is_some(); - let next_extensible = if is_ref { false } else { self.extensible }; - - let derived = self.derive( - sub_schema, - child_instance, - &new_path, - self.scope.clone(), - HashSet::new(), - next_extensible, - false, - ); - let item_res = derived.validate()?; - result.merge(item_res); - result.evaluated_keys.insert(key.clone()); - } - } - } - - if let Some(ref compiled_pp) = self.schema.compiled_pattern_properties { - for (compiled_re, sub_schema) in compiled_pp { - for (key, _) in obj { - if compiled_re.0.is_match(key) { - // Note: Pattern properties need to derive child instance dynamically for each matching key - if let Some(child_instance) = self.instance.child_at_key(key) { - let new_path = format!("{}/{}", self.path, key); - let is_ref = - sub_schema.ref_string.is_some() || sub_schema.obj.dynamic_ref.is_some(); - let next_extensible = if is_ref { false } else { self.extensible }; - - let derived = self.derive( - sub_schema, - child_instance, - &new_path, - self.scope.clone(), - HashSet::new(), - next_extensible, - false, - ); - let item_res = derived.validate()?; - result.merge(item_res); - result.evaluated_keys.insert(key.clone()); - } - } - } - } - } - - // 6.5. Additional Properties - if let Some(ref additional_schema) = self.schema.additional_properties { - for (key, _) in obj { - let mut locally_matched = false; - if let Some(props) = &self.schema.properties { - if props.contains_key(key) { - locally_matched = true; - } - } - if !locally_matched { - if let Some(ref compiled_pp) = self.schema.compiled_pattern_properties { - for (compiled_re, _) in compiled_pp { - if compiled_re.0.is_match(key) { - locally_matched = true; - break; - } - } - } - } - - if !locally_matched { - if let Some(child_instance) = self.instance.child_at_key(key) { - let new_path = format!("{}/{}", self.path, key); - let is_ref = additional_schema.ref_string.is_some() - || additional_schema.obj.dynamic_ref.is_some(); - let next_extensible = if is_ref { false } else { self.extensible }; - - let derived = self.derive( - additional_schema, - child_instance, - &new_path, - self.scope.clone(), - HashSet::new(), - next_extensible, - false, - ); - let item_res = derived.validate()?; - result.merge(item_res); - // Mark as evaluated so it doesn't trigger strictness failure - result.evaluated_keys.insert(key.clone()); - } - } - } - } - - // 7. Property Names - if let Some(ref property_names) = self.schema.property_names { - for key in obj.keys() { - let _new_path = format!("{}/propertyNames/{}", self.path, key); - let val_str = Value::String(key.clone()); - - let ctx = ValidationContext::new( - self.registry, - self.root, - property_names, - crate::validator::ReadOnlyInstance(&val_str), - self.scope.clone(), - HashSet::new(), - self.extensible, - self.reporter, - ); - - result.merge(ctx.validate()?); - } - } - // 7. Additional Properties (Strictness / Extensibility) - // Done via check_strictness at end OR explicit schema keyword. - } - - // --- Pruning (Masking) --- - if !self.extensible { - self.instance.prune_object(&result.evaluated_keys); - } - - Ok(()) - } - - fn validate_array(&self, result: &mut ValidationResult) -> Result<(), ValidationError> { - let current = self.instance.as_value(); - if let Some(arr) = current.as_array() { - // 1. Min/Max Items - if let Some(min) = self.schema.min_items { - if (arr.len() as f64) < min { - result.errors.push(ValidationError { - code: "MIN_ITEMS".to_string(), - message: "Too few items".to_string(), - path: self.path.to_string(), - }); - } - } - if let Some(max) = self.schema.max_items { - if (arr.len() as f64) > max { - result.errors.push(ValidationError { - code: "MAX_ITEMS".to_string(), - message: "Too many items".to_string(), - path: self.path.to_string(), - }); - } - } - - // 2. Unique Items - if self.schema.unique_items.unwrap_or(false) { - let mut seen: Vec<&Value> = Vec::new(); - for item in arr { - if seen.contains(&item) { - result.errors.push(ValidationError { - code: "UNIQUE_ITEMS_VIOLATED".to_string(), - message: "Array has duplicate items".to_string(), - path: self.path.to_string(), - }); - break; - } - seen.push(item); - } - } - - // 3. Contains - if let Some(ref contains_schema) = self.schema.contains { - let mut _match_count = 0; - // self.instance.as_value() is &Value - // We iterate indices - for i in 0..arr.len() { - if let Some(child_instance) = self.instance.child_at_index(i) { - let derived = self.derive( - contains_schema, - child_instance, - &self.path, - self.scope.clone(), - HashSet::new(), - self.extensible, - false, - ); - - let check = derived.validate()?; - if check.is_valid() { - _match_count += 1; - result.evaluated_indices.insert(i); - } - } - } - - // Min Contains (Default 1) - let min = self.schema.min_contains.unwrap_or(1.0) as usize; - if _match_count < min { - result.errors.push(ValidationError { - code: "CONTAINS_VIOLATED".to_string(), - message: format!("Contains matches {} < min {}", _match_count, min), - path: self.path.to_string(), - }); - } - // Max Contains - if let Some(max) = self.schema.max_contains { - if _match_count > max as usize { - result.errors.push(ValidationError { - code: "CONTAINS_VIOLATED".to_string(), - message: format!("Contains matches {} > max {}", _match_count, max), - path: self.path.to_string(), - }); - } - } - } - - // 4. Items (and PrefixItems) - let len = arr.len(); - let mut validation_index = 0; - - if let Some(ref prefix) = self.schema.prefix_items { - for (i, sub_schema) in prefix.iter().enumerate() { - if i < len { - let path = format!("{}/{}", self.path, i); - if let Some(child_instance) = self.instance.child_at_index(i) { - let derived = self.derive( - sub_schema, - child_instance, - &path, - self.scope.clone(), - HashSet::new(), - self.extensible, - false, - ); - let item_res = derived.validate()?; - result.merge(item_res); - result.evaluated_indices.insert(i); - validation_index += 1; - } - } - } - } - - if let Some(ref items_schema) = self.schema.items { - for i in validation_index..len { - let path = format!("{}/{}", self.path, i); - if let Some(child_instance) = self.instance.child_at_index(i) { - let derived = self.derive( - items_schema, - child_instance, - &path, - self.scope.clone(), - HashSet::new(), - self.extensible, - false, - ); - let item_res = derived.validate()?; - result.merge(item_res); - result.evaluated_indices.insert(i); - } - } - } - } - // --- Pruning (Masking) --- - if !self.extensible { - self.instance.prune_array(&result.evaluated_indices); - } - - Ok(()) - } - - fn validate_combinators(&self, result: &mut ValidationResult) -> Result<(), ValidationError> { - // 1. AllOf - if let Some(ref all_of) = self.schema.all_of { - for sub in all_of { - let derived = self.derive_for_schema(sub, true); // Reporter (Fragment) - let res = derived.validate()?; - result.merge(res); - } - } - - // 2. AnyOf - if let Some(ref any_of) = self.schema.any_of { - let mut valid = false; - - for sub in any_of { - let derived = self.derive_for_schema(sub, true); // Reporter to check validity - let sub_res = derived.validate()?; - if sub_res.is_valid() { - valid = true; - result.merge(sub_res); - } - } - - if !valid { - result.errors.push(ValidationError { - code: "ANY_OF_VIOLATED".to_string(), - message: "Matches none of anyOf schemas".to_string(), - path: self.path.to_string(), - }); - } - } - - // 3. OneOf - if let Some(ref one_of) = self.schema.one_of { - let mut valid_count = 0; - let mut valid_res = ValidationResult::new(); - - for sub in one_of { - let derived = self.derive_for_schema(sub, true); - let sub_res = derived.validate()?; - if sub_res.is_valid() { - valid_count += 1; - valid_res = sub_res; - } - } - - if valid_count == 1 { - result.merge(valid_res); - } else if valid_count == 0 { - result.errors.push(ValidationError { - code: "ONE_OF_VIOLATED".to_string(), - message: "Matches none of oneOf schemas".to_string(), - path: self.path.to_string(), - }); - } else { - result.errors.push(ValidationError { - code: "ONE_OF_VIOLATED".to_string(), - message: format!("Matches {} of oneOf schemas (expected 1)", valid_count), - path: self.path.to_string(), - }); - } - } - - // 4. Not - if let Some(ref not_schema) = self.schema.not { - let derived = self.derive_for_schema(not_schema, true); - let sub_res = derived.validate()?; - if sub_res.is_valid() { - result.errors.push(ValidationError { - code: "NOT_VIOLATED".to_string(), - message: "Matched 'not' schema".to_string(), - path: self.path.to_string(), - }); - } - } - - Ok(()) - } - - fn validate_conditionals(&self, result: &mut ValidationResult) -> Result<(), ValidationError> { - if let Some(ref if_schema) = self.schema.if_ { - let derived_if = self.derive_for_schema(if_schema, true); - let if_res = derived_if.validate()?; - - // Always merge evaluated keys from IF per JSON Schema spec (it is evaluated regardless of result) - result.evaluated_keys.extend(if_res.evaluated_keys.clone()); - result - .evaluated_indices - .extend(if_res.evaluated_indices.clone()); - - if if_res.is_valid() { - // IF passed -> Check THEN - if let Some(ref then_schema) = self.schema.then_ { - let derived_then = self.derive_for_schema(then_schema, true); - result.merge(derived_then.validate()?); - } - } else { - // IF failed -> Check ELSE - if let Some(ref else_schema) = self.schema.else_ { - let derived_else = self.derive_for_schema(else_schema, true); - result.merge(derived_else.validate()?); - } - } - } - - Ok(()) - } - - fn check_strictness(&self, result: &mut ValidationResult) { - // Only check if strict (extensible = false) - // Also skip if reporter mode (collecting keys for composition/refs) - if self.extensible || self.reporter { - return; - } - - // 1. Unevaluated Properties - if let Some(obj) = self.instance.as_value().as_object() { - for key in obj.keys() { - if !result.evaluated_keys.contains(key) && !self.overrides.contains(key) { - result.errors.push(ValidationError { - code: "STRICT_PROPERTY_VIOLATION".to_string(), - message: format!("Unexpected property '{}'", key), - path: format!("{}/{}", self.path, key), - }); - } - } - } - - // 2. Unevaluated Items - if let Some(arr) = self.instance.as_value().as_array() { - for i in 0..arr.len() { - if !result.evaluated_indices.contains(&i) { - result.errors.push(ValidationError { - code: "STRICT_ITEM_VIOLATION".to_string(), - message: format!("Unexpected item at index {}", i), - path: format!("{}/{}", self.path, i), - }); - } - } - } - } -} - pub struct Validator { - registry: Registry, + pub registry: Registry, + pub families: std::collections::HashMap>, } impl Validator { - pub fn new(registry: Registry) -> Self { - Self { registry } + pub fn from_punc_definition( + enums: Option<&Value>, + types: Option<&Value>, + puncs: Option<&Value>, + ) -> Self { + let mut registry = Registry::new(); + let mut families = std::collections::HashMap::new(); + + let mut family_map: std::collections::HashMap> = + std::collections::HashMap::new(); + + if let Some(Value::Array(arr)) = types { + for item in arr { + if let Some(name) = item.get("name").and_then(|v| v.as_str()) { + if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) { + for ancestor in hierarchy { + if let Some(anc_str) = ancestor.as_str() { + family_map + .entry(anc_str.to_string()) + .or_default() + .insert(name.to_string()); + } + } + } + } + } + } + + for (family_name, members) in family_map { + let object_refs: Vec = members + .iter() + .map(|s| serde_json::json!({ "$ref": s })) + .collect(); + let schema_json = serde_json::json!({ + "oneOf": object_refs + }); + if let Ok(schema) = serde_json::from_value::(schema_json) { + let compiled = crate::compiler::Compiler::compile(schema, None); + families.insert(family_name, compiled); + } + } + + let mut cache_items = |items_val: Option<&Value>| { + if let Some(Value::Array(arr)) = items_val { + for item in arr { + if let Some(Value::Array(schemas)) = item.get("schemas") { + for schema_val in schemas { + if let Ok(schema) = serde_json::from_value::(schema_val.clone()) { + registry.add(schema); + } + } + } + } + } + }; + + cache_items(enums); + cache_items(types); + cache_items(puncs); + + Self { registry, families } } pub fn get_schema_ids(&self) -> Vec { @@ -1302,34 +106,28 @@ impl Validator { } pub fn resolve_ref<'a>( - registry: &'a Registry, + &'a self, root: &'a Schema, ref_string: &str, scope: &str, ) -> Option<(ResolvedRef<'a>, String)> { - // 0. Fast path for local fragments (e.g., "#/definitions/foo") - // This is necessary when scope is not a valid URL (e.g. "root" in tests) if ref_string.starts_with('#') { if let Some(indexrs) = &root.obj.compiled_registry { - // eprintln!("DEBUG: Resolving local fragment '{}'", ref_string); if let Some(s) = indexrs.schemas.get(ref_string) { return Some((ResolvedRef::Local(s.as_ref()), ref_string.to_string())); } } } - // 1. Try resolving against scope (Absolute or Relative) if let Ok(base) = url::Url::parse(scope) { if let Ok(joined) = base.join(ref_string) { let joined_str = joined.to_string(); - // Local if let Some(indexrs) = &root.obj.compiled_registry { if let Some(s) = indexrs.schemas.get(&joined_str) { return Some((ResolvedRef::Local(s.as_ref()), joined_str)); } } - // Fallback: Try decoding to match index keys that might not be fully encoded if let Ok(decoded) = percent_encoding::percent_decode_str(&joined_str).decode_utf8() { let decoded_str = decoded.to_string(); if decoded_str != joined_str { @@ -1341,25 +139,20 @@ impl Validator { } } - // Global - if let Some(s) = registry.schemas.get(&joined_str) { + if let Some(s) = self.registry.schemas.get(&joined_str) { return Some((ResolvedRef::Global(s.as_ref(), s.as_ref()), joined_str)); } } } else { - // Fallback for non-URI scopes (e.g. "root" in tests) - // If scope is just a string key, and ref starts with #, simple concat if ref_string.starts_with('#') { let joined_str = format!("{}{}", scope, ref_string); - // Local if let Some(indexrs) = &root.obj.compiled_registry { if let Some(s) = indexrs.schemas.get(&joined_str) { return Some((ResolvedRef::Local(s.as_ref()), joined_str)); } } - // Fallback: Try decoding to match index keys that might not be fully encoded if let Ok(decoded) = percent_encoding::percent_decode_str(&joined_str).decode_utf8() { let decoded_str = decoded.to_string(); if decoded_str != joined_str { @@ -1371,56 +164,41 @@ impl Validator { } } - // Global - { - if let Some(s) = registry.schemas.get(&joined_str) { - // Clone the Arc so we can return it (extending lifetime beyond lock) - return Some((ResolvedRef::Global(s.as_ref(), s.as_ref()), joined_str)); - } + if let Some(s) = self.registry.schemas.get(&joined_str) { + return Some((ResolvedRef::Global(s.as_ref(), s.as_ref()), joined_str)); } } } - // 2. Try as absolute URI (if ref is absolute) if let Ok(parsed) = url::Url::parse(ref_string) { let absolute = parsed.to_string(); - // Local if let Some(indexrs) = &root.obj.compiled_registry { if let Some(s) = indexrs.schemas.get(&absolute) { return Some((ResolvedRef::Local(s.as_ref()), absolute)); } } - // Global let resource_base = if let Some((base, _)) = absolute.split_once('#') { base } else { &absolute }; - if let Some(compiled) = registry.schemas.get(resource_base) { + if let Some(compiled) = self.registry.schemas.get(resource_base) { if let Some(indexrs) = &compiled.obj.compiled_registry { if let Some(s) = indexrs.schemas.get(&absolute) { - // Both are Arcs in compiled_registry return Some((ResolvedRef::Global(compiled.as_ref(), s.as_ref()), absolute)); } } } } - // 3. Fallback: Try as simple string key (Global Registry) - // This supports legacy/JSPG-style IDs that are not valid URIs (e.g. "punc_person") - if let Some(compiled) = registry.schemas.get(ref_string) { - // eprintln!("DEBUG: Resolved Global Ref (fallback): {}", ref_string); + if let Some(compiled) = self.registry.schemas.get(ref_string) { return Some(( ResolvedRef::Global(compiled.as_ref(), compiled.as_ref()), ref_string.to_string(), )); } - // eprintln!( - // "DEBUG: Failed to resolve ref: '{}' scope: '{}'", - // ref_string, scope - // ); None } @@ -1432,13 +210,13 @@ impl Validator { ) -> Result { if let Some(schema) = self.registry.schemas.get(schema_id) { let ctx = ValidationContext::new( - &self.registry, + self, schema, schema, ReadOnlyInstance(instance), vec![], HashSet::new(), - false, // Default strictness (overridden by schema.extensible if present) + false, false, ); ctx.validate() @@ -1458,18 +236,16 @@ impl Validator { ) -> Result { if let Some(schema) = self.registry.schemas.get(schema_id) { let ctx = ValidationContext::new( - &self.registry, + self, schema, schema, MutableInstance::new(instance), vec![], HashSet::new(), - false, // Default strictness + false, false, ); - let res = ctx.validate()?; - Ok(res) } else { Err(ValidationError { diff --git a/tests/fixtures/puncs.json b/tests/fixtures/puncs.json index 3e2e969..df02c8e 100644 --- a/tests/fixtures/puncs.json +++ b/tests/fixtures/puncs.json @@ -1067,7 +1067,7 @@ "schemas": [ { "$id": "polymorphic_org_punc.request", - "$ref": "organization.family" + "$family": "organization" } ] }, @@ -1080,6 +1080,21 @@ "$ref": "organization" } ] + }, + { + "name": "invalid_family_punc", + "public": false, + "schemas": [ + { + "$id": "invalid_family_punc.request", + "$family": "organization", + "properties": { + "extra": { + "type": "string" + } + } + } + ] } ], "tests": [ @@ -1240,6 +1255,23 @@ "path": "/first_name" } ] + }, + { + "description": "invalid schema due to family exclusivity violation", + "schema_id": "invalid_family_punc.request", + "data": { + "id": "org-2", + "type": "organization", + "name": "Strict Corp", + "extra": "value" + }, + "valid": false, + "expect_errors": [ + { + "code": "INVALID_SCHEMA", + "path": "" + } + ] } ] },