validator refactor progress

This commit is contained in:
2026-03-03 00:13:37 -05:00
parent e14f53e7d9
commit 3898c43742
81 changed files with 6331 additions and 7934 deletions

View File

@ -0,0 +1,119 @@
use serde_json::Value;
use std::collections::HashSet;
use crate::validator::context::ValidationContext;
use crate::validator::error::ValidationError;
use crate::validator::result::ValidationResult;
impl<'a> ValidationContext<'a> {
pub(crate) fn validate_array(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
let current = self.instance;
if let Some(arr) = current.as_array() {
if let Some(min) = self.schema.min_items {
if (arr.len() as f64) < min {
result.errors.push(ValidationError {
code: "MIN_ITEMS".to_string(),
message: "Too few items".to_string(),
path: self.path.to_string(),
});
}
}
if let Some(max) = self.schema.max_items {
if (arr.len() as f64) > max {
result.errors.push(ValidationError {
code: "MAX_ITEMS".to_string(),
message: "Too many items".to_string(),
path: self.path.to_string(),
});
}
}
if self.schema.unique_items.unwrap_or(false) {
let mut seen: Vec<&Value> = Vec::new();
for item in arr {
if seen.contains(&item) {
result.errors.push(ValidationError {
code: "UNIQUE_ITEMS_VIOLATED".to_string(),
message: "Array has duplicate items".to_string(),
path: self.path.to_string(),
});
break;
}
seen.push(item);
}
}
if let Some(ref contains_schema) = self.schema.contains {
let mut _match_count = 0;
for (i, child_instance) in arr.iter().enumerate() {
let derived = self.derive(
contains_schema,
child_instance,
&self.path,
self.extensible,
false,
);
let check = derived.validate()?;
if check.is_valid() {
_match_count += 1;
result.evaluated_indices.insert(i);
}
}
let min = self.schema.min_contains.unwrap_or(1.0) as usize;
if _match_count < min {
result.errors.push(ValidationError {
code: "CONTAINS_VIOLATED".to_string(),
message: format!("Contains matches {} < min {}", _match_count, min),
path: self.path.to_string(),
});
}
if let Some(max) = self.schema.max_contains {
if _match_count > max as usize {
result.errors.push(ValidationError {
code: "CONTAINS_VIOLATED".to_string(),
message: format!("Contains matches {} > max {}", _match_count, max),
path: self.path.to_string(),
});
}
}
}
let len = arr.len();
let mut validation_index = 0;
if let Some(ref prefix) = self.schema.prefix_items {
for (i, sub_schema) in prefix.iter().enumerate() {
if i < len {
let path = format!("{}/{}", self.path, i);
if let Some(child_instance) = arr.get(i) {
let derived = self.derive(sub_schema, child_instance, &path, self.extensible, false);
let item_res = derived.validate()?;
result.merge(item_res);
result.evaluated_indices.insert(i);
validation_index += 1;
}
}
}
}
if let Some(ref items_schema) = self.schema.items {
for i in validation_index..len {
let path = format!("{}/{}", self.path, i);
if let Some(child_instance) = arr.get(i) {
let derived = self.derive(items_schema, child_instance, &path, self.extensible, false);
let item_res = derived.validate()?;
result.merge(item_res);
result.evaluated_indices.insert(i);
}
}
}
}
Ok(true)
}
}

View File

@ -0,0 +1,83 @@
use crate::validator::context::ValidationContext;
use crate::validator::error::ValidationError;
use crate::validator::result::ValidationResult;
impl<'a> ValidationContext<'a> {
pub(crate) fn validate_combinators(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
if let Some(ref all_of) = self.schema.all_of {
for sub in all_of {
let derived = self.derive_for_schema(sub, true);
let res = derived.validate()?;
result.merge(res);
}
}
if let Some(ref any_of) = self.schema.any_of {
let mut valid = false;
for sub in any_of {
let derived = self.derive_for_schema(sub, true);
let sub_res = derived.validate()?;
if sub_res.is_valid() {
valid = true;
result.merge(sub_res);
}
}
if !valid {
result.errors.push(ValidationError {
code: "ANY_OF_VIOLATED".to_string(),
message: "Matches none of anyOf schemas".to_string(),
path: self.path.to_string(),
});
}
}
if let Some(ref one_of) = self.schema.one_of {
let mut valid_count = 0;
let mut valid_res = ValidationResult::new();
for sub in one_of {
let derived = self.derive_for_schema(sub, true);
let sub_res = derived.validate()?;
if sub_res.is_valid() {
valid_count += 1;
valid_res = sub_res;
}
}
if valid_count == 1 {
result.merge(valid_res);
} else if valid_count == 0 {
result.errors.push(ValidationError {
code: "ONE_OF_VIOLATED".to_string(),
message: "Matches none of oneOf schemas".to_string(),
path: self.path.to_string(),
});
} else {
result.errors.push(ValidationError {
code: "ONE_OF_VIOLATED".to_string(),
message: format!("Matches {} of oneOf schemas (expected 1)", valid_count),
path: self.path.to_string(),
});
}
}
if let Some(ref not_schema) = self.schema.not {
let derived = self.derive_for_schema(not_schema, true);
let sub_res = derived.validate()?;
if sub_res.is_valid() {
result.errors.push(ValidationError {
code: "NOT_VIOLATED".to_string(),
message: "Matched 'not' schema".to_string(),
path: self.path.to_string(),
});
}
}
Ok(true)
}
}

View File

@ -0,0 +1,69 @@
use crate::validator::context::ValidationContext;
use crate::validator::error::ValidationError;
use crate::validator::result::ValidationResult;
impl<'a> ValidationContext<'a> {
pub(crate) fn validate_conditionals(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
if let Some(ref if_schema) = self.schema.if_ {
let derived_if = self.derive_for_schema(if_schema, true);
let if_res = derived_if.validate()?;
result.evaluated_keys.extend(if_res.evaluated_keys.clone());
result
.evaluated_indices
.extend(if_res.evaluated_indices.clone());
if if_res.is_valid() {
if let Some(ref then_schema) = self.schema.then_ {
let derived_then = self.derive_for_schema(then_schema, true);
result.merge(derived_then.validate()?);
}
} else {
if let Some(ref else_schema) = self.schema.else_ {
let derived_else = self.derive_for_schema(else_schema, true);
result.merge(derived_else.validate()?);
}
}
}
Ok(true)
}
pub(crate) fn validate_strictness(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
if self.extensible || self.reporter {
return Ok(true);
}
if let Some(obj) = self.instance.as_object() {
for key in obj.keys() {
if !result.evaluated_keys.contains(key) {
result.errors.push(ValidationError {
code: "STRICT_PROPERTY_VIOLATION".to_string(),
message: format!("Unexpected property '{}'", key),
path: format!("{}/{}", self.path, key),
});
}
}
}
if let Some(arr) = self.instance.as_array() {
for i in 0..arr.len() {
if !result.evaluated_indices.contains(&i) {
result.errors.push(ValidationError {
code: "STRICT_ITEM_VIOLATION".to_string(),
message: format!("Unexpected item at index {}", i),
path: format!("{}/{}", self.path, i),
});
}
}
}
Ok(true)
}
}

View File

@ -0,0 +1,84 @@
use crate::validator::Validator;
use crate::validator::context::ValidationContext;
use crate::validator::error::ValidationError;
use crate::validator::result::ValidationResult;
impl<'a> ValidationContext<'a> {
pub(crate) fn validate_core(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
let current = self.instance;
if let Some(ref type_) = self.schema.type_ {
match type_ {
crate::database::schema::SchemaTypeOrArray::Single(t) => {
if !Validator::check_type(t, current) {
result.errors.push(ValidationError {
code: "INVALID_TYPE".to_string(),
message: format!("Expected type '{}'", t),
path: self.path.to_string(),
});
}
}
crate::database::schema::SchemaTypeOrArray::Multiple(types) => {
let mut valid = false;
for t in types {
if Validator::check_type(t, current) {
valid = true;
break;
}
}
if !valid {
result.errors.push(ValidationError {
code: "INVALID_TYPE".to_string(),
message: format!("Expected one of types {:?}", types),
path: self.path.to_string(),
});
}
}
}
}
if let Some(ref const_val) = self.schema.const_ {
if !crate::validator::util::equals(current, const_val) {
result.errors.push(ValidationError {
code: "CONST_VIOLATED".to_string(),
message: "Value does not match const".to_string(),
path: self.path.to_string(),
});
} else {
if let Some(obj) = current.as_object() {
result.evaluated_keys.extend(obj.keys().cloned());
} else if let Some(arr) = current.as_array() {
result.evaluated_indices.extend(0..arr.len());
}
}
}
if let Some(ref enum_vals) = self.schema.enum_ {
let mut found = false;
for val in enum_vals {
if crate::validator::util::equals(current, val) {
found = true;
break;
}
}
if !found {
result.errors.push(ValidationError {
code: "ENUM_MISMATCH".to_string(),
message: "Value is not in enum".to_string(),
path: self.path.to_string(),
});
} else {
if let Some(obj) = current.as_object() {
result.evaluated_keys.extend(obj.keys().cloned());
} else if let Some(arr) = current.as_array() {
result.evaluated_indices.extend(0..arr.len());
}
}
}
Ok(true)
}
}

View File

@ -0,0 +1,44 @@
use crate::validator::context::ValidationContext;
use crate::validator::error::ValidationError;
use crate::validator::result::ValidationResult;
impl<'a> ValidationContext<'a> {
pub(crate) fn validate_format(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
let current = self.instance;
if let Some(ref compiled_fmt) = self.schema.compiled_format {
match compiled_fmt {
crate::database::schema::CompiledFormat::Func(f) => {
let should = if let Some(s) = current.as_str() {
!s.is_empty()
} else {
true
};
if should {
if let Err(e) = f(current) {
result.errors.push(ValidationError {
code: "FORMAT_MISMATCH".to_string(),
message: format!("Format error: {}", e),
path: self.path.to_string(),
});
}
}
}
crate::database::schema::CompiledFormat::Regex(re) => {
if let Some(s) = current.as_str() {
if !re.is_match(s) {
result.errors.push(ValidationError {
code: "FORMAT_MISMATCH".to_string(),
message: "Format regex mismatch".to_string(),
path: self.path.to_string(),
});
}
}
}
}
}
Ok(true)
}
}

View File

@ -0,0 +1,93 @@
use serde_json::Value;
use std::collections::HashSet;
use crate::validator::context::ValidationContext;
use crate::validator::error::ValidationError;
use crate::validator::result::ValidationResult;
pub mod array;
pub mod combinators;
pub mod conditionals;
pub mod core;
pub mod format;
pub mod numeric;
pub mod object;
pub mod polymorphism;
pub mod string;
impl<'a> ValidationContext<'a> {
pub(crate) fn validate_scoped(&self) -> Result<ValidationResult, ValidationError> {
let mut result = ValidationResult::new();
// Structural Limits
if !self.validate_depth(&mut result)? {
return Ok(result);
}
if !self.validate_always_fail(&mut result)? {
return Ok(result);
}
if !self.validate_family(&mut result)? {
return Ok(result);
}
if !self.validate_refs(&mut result)? {
return Ok(result);
}
// Core Type Constraints
self.validate_core(&mut result)?;
self.validate_numeric(&mut result)?;
self.validate_string(&mut result)?;
self.validate_format(&mut result)?;
// Complex Structures
self.validate_object(&mut result)?;
self.validate_array(&mut result)?;
// Multipliers & Conditionals
self.validate_combinators(&mut result)?;
self.validate_conditionals(&mut result)?;
// State Tracking
self.validate_extensible(&mut result)?;
self.validate_strictness(&mut result)?;
Ok(result)
}
fn validate_depth(&self, _result: &mut ValidationResult) -> Result<bool, ValidationError> {
if self.depth > 100 {
Err(ValidationError {
code: "RECURSION_LIMIT_EXCEEDED".to_string(),
message: "Recursion limit exceeded".to_string(),
path: self.path.to_string(),
})
} else {
Ok(true)
}
}
fn validate_always_fail(&self, result: &mut ValidationResult) -> Result<bool, ValidationError> {
if self.schema.always_fail {
result.errors.push(ValidationError {
code: "FALSE_SCHEMA".to_string(),
message: "Schema is false".to_string(),
path: self.path.to_string(),
});
// Short-circuit
Ok(false)
} else {
Ok(true)
}
}
fn validate_extensible(&self, result: &mut ValidationResult) -> Result<bool, ValidationError> {
if self.extensible {
if let Some(obj) = self.instance.as_object() {
result.evaluated_keys.extend(obj.keys().cloned());
} else if let Some(arr) = self.instance.as_array() {
result.evaluated_indices.extend(0..arr.len());
}
}
Ok(true)
}
}

View File

@ -0,0 +1,61 @@
use crate::validator::context::ValidationContext;
use crate::validator::error::ValidationError;
use crate::validator::result::ValidationResult;
impl<'a> ValidationContext<'a> {
pub(crate) fn validate_numeric(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
let current = self.instance;
if let Some(num) = current.as_f64() {
if let Some(min) = self.schema.minimum {
if num < min {
result.errors.push(ValidationError {
code: "MINIMUM_VIOLATED".to_string(),
message: format!("Value {} < min {}", num, min),
path: self.path.to_string(),
});
}
}
if let Some(max) = self.schema.maximum {
if num > max {
result.errors.push(ValidationError {
code: "MAXIMUM_VIOLATED".to_string(),
message: format!("Value {} > max {}", num, max),
path: self.path.to_string(),
});
}
}
if let Some(ex_min) = self.schema.exclusive_minimum {
if num <= ex_min {
result.errors.push(ValidationError {
code: "EXCLUSIVE_MINIMUM_VIOLATED".to_string(),
message: format!("Value {} <= ex_min {}", num, ex_min),
path: self.path.to_string(),
});
}
}
if let Some(ex_max) = self.schema.exclusive_maximum {
if num >= ex_max {
result.errors.push(ValidationError {
code: "EXCLUSIVE_MAXIMUM_VIOLATED".to_string(),
message: format!("Value {} >= ex_max {}", num, ex_max),
path: self.path.to_string(),
});
}
}
if let Some(multiple_of) = self.schema.multiple_of {
let val: f64 = num / multiple_of;
if (val - val.round()).abs() > f64::EPSILON {
result.errors.push(ValidationError {
code: "MULTIPLE_OF_VIOLATED".to_string(),
message: format!("Value {} not multiple of {}", num, multiple_of),
path: self.path.to_string(),
});
}
}
}
Ok(true)
}
}

View File

@ -0,0 +1,183 @@
use serde_json::Value;
use std::collections::HashSet;
use crate::validator::context::ValidationContext;
use crate::validator::error::ValidationError;
use crate::validator::result::ValidationResult;
impl<'a> ValidationContext<'a> {
pub(crate) fn validate_object(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
let current = self.instance;
if let Some(obj) = current.as_object() {
// Entity Bound Implicit Type Validation
if let Some(allowed_types) = &self.schema.obj.compiled_allowed_types {
if let Some(type_val) = obj.get("type") {
if let Some(type_str) = type_val.as_str() {
if allowed_types.contains(type_str) {
// Ensure it passes strict mode
result.evaluated_keys.insert("type".to_string());
} else {
result.errors.push(ValidationError {
code: "CONST_VIOLATED".to_string(), // Aligning with original const override errors
message: format!(
"Type '{}' is not a valid descendant for this entity bound schema",
type_str
),
path: format!("{}/type", self.path),
});
}
}
}
}
if let Some(min) = self.schema.min_properties {
if (obj.len() as f64) < min {
result.errors.push(ValidationError {
code: "MIN_PROPERTIES".to_string(),
message: "Too few properties".to_string(),
path: self.path.to_string(),
});
}
}
if let Some(max) = self.schema.max_properties {
if (obj.len() as f64) > max {
result.errors.push(ValidationError {
code: "MAX_PROPERTIES".to_string(),
message: "Too many properties".to_string(),
path: self.path.to_string(),
});
}
}
if let Some(ref req) = self.schema.required {
for field in req {
if !obj.contains_key(field) {
result.errors.push(ValidationError {
code: "REQUIRED_FIELD_MISSING".to_string(),
message: format!("Missing {}", field),
path: format!("{}/{}", self.path, field),
});
}
}
}
if let Some(props) = &self.schema.properties {
for (key, sub_schema) in props {
if let Some(child_instance) = obj.get(key) {
let new_path = format!("{}/{}", self.path, key);
let is_ref = sub_schema.ref_string.is_some() || sub_schema.obj.compiled_ref.is_some();
let next_extensible = if is_ref { false } else { self.extensible };
let derived = self.derive(
sub_schema,
child_instance,
&new_path,
next_extensible,
false,
);
let mut item_res = derived.validate()?;
// Entity Bound Implicit Type Interception
if key == "type" {
if let Some(allowed_types) = &self.schema.obj.compiled_allowed_types {
if let Some(instance_type) = child_instance.as_str() {
if allowed_types.contains(instance_type) {
item_res
.errors
.retain(|e| e.code != "CONST_VIOLATED" && e.code != "ENUM_VIOLATED");
}
}
}
}
result.merge(item_res);
result.evaluated_keys.insert(key.to_string());
}
}
}
if let Some(ref compiled_pp) = self.schema.compiled_pattern_properties {
for (compiled_re, sub_schema) in compiled_pp {
for (key, child_instance) in obj {
if compiled_re.0.is_match(key) {
let new_path = format!("{}/{}", self.path, key);
let is_ref = sub_schema.ref_string.is_some() || sub_schema.obj.compiled_ref.is_some();
let next_extensible = if is_ref { false } else { self.extensible };
let derived = self.derive(
sub_schema,
child_instance,
&new_path,
next_extensible,
false,
);
let item_res = derived.validate()?;
result.merge(item_res);
result.evaluated_keys.insert(key.to_string());
}
}
}
}
if let Some(ref additional_schema) = self.schema.additional_properties {
for (key, child_instance) in obj {
let mut locally_matched = false;
if let Some(props) = &self.schema.properties {
if props.contains_key(&key.to_string()) {
locally_matched = true;
}
}
if !locally_matched {
if let Some(ref compiled_pp) = self.schema.compiled_pattern_properties {
for (compiled_re, _) in compiled_pp {
if compiled_re.0.is_match(key) {
locally_matched = true;
break;
}
}
}
}
if !locally_matched {
let new_path = format!("{}/{}", self.path, key);
let is_ref = additional_schema.ref_string.is_some()
|| additional_schema.obj.compiled_ref.is_some();
let next_extensible = if is_ref { false } else { self.extensible };
let derived = self.derive(
additional_schema,
child_instance,
&new_path,
next_extensible,
false,
);
let item_res = derived.validate()?;
result.merge(item_res);
result.evaluated_keys.insert(key.to_string());
}
}
}
if let Some(ref property_names) = self.schema.property_names {
for key in obj.keys() {
let _new_path = format!("{}/propertyNames/{}", self.path, key);
let val_str = Value::String(key.to_string());
let ctx = ValidationContext::new(
self.schemas,
self.root,
property_names,
&val_str,
self.extensible,
self.reporter,
);
result.merge(ctx.validate()?);
}
}
}
Ok(true)
}
}

View File

@ -0,0 +1,64 @@
use crate::validator::context::ValidationContext;
use crate::validator::error::ValidationError;
use crate::validator::result::ValidationResult;
impl<'a> ValidationContext<'a> {
pub(crate) fn validate_family(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
if self.schema.family.is_some() {
let conflicts = self.schema.type_.is_some()
|| self.schema.properties.is_some()
|| self.schema.required.is_some()
|| self.schema.additional_properties.is_some()
|| self.schema.items.is_some()
|| self.schema.ref_string.is_some()
|| self.schema.one_of.is_some()
|| self.schema.any_of.is_some()
|| self.schema.all_of.is_some()
|| self.schema.enum_.is_some()
|| self.schema.const_.is_some();
if conflicts {
result.errors.push(ValidationError {
code: "INVALID_SCHEMA".to_string(),
message: "$family must be used exclusively without other constraints".to_string(),
path: self.path.to_string(),
});
// Short-circuit: the schema formulation is broken
return Ok(false);
}
}
// Family specific runtime validation will go here later if needed
Ok(true)
}
pub(crate) fn validate_refs(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
// 1. Core $ref logic fully transitioned to memory pointer resolutions.
if let Some(_ref_str) = &self.schema.ref_string {
if let Some(global_schema) = &self.schema.compiled_ref {
let mut shadow = self.derive(
global_schema,
self.instance,
&self.path,
self.extensible,
false,
);
shadow.root = global_schema;
result.merge(shadow.validate()?);
} else {
result.errors.push(ValidationError {
code: "REF_RESOLUTION_FAILED".to_string(),
message: format!("Reference pointer was not compiled inside Database graph"),
path: self.path.to_string(),
});
}
}
Ok(true)
}
}

View File

@ -0,0 +1,53 @@
use crate::validator::context::ValidationContext;
use crate::validator::error::ValidationError;
use crate::validator::result::ValidationResult;
use regex::Regex;
impl<'a> ValidationContext<'a> {
pub(crate) fn validate_string(
&self,
result: &mut ValidationResult,
) -> Result<bool, ValidationError> {
let current = self.instance;
if let Some(s) = current.as_str() {
if let Some(min) = self.schema.min_length {
if (s.chars().count() as f64) < min {
result.errors.push(ValidationError {
code: "MIN_LENGTH_VIOLATED".to_string(),
message: format!("Length < min {}", min),
path: self.path.to_string(),
});
}
}
if let Some(max) = self.schema.max_length {
if (s.chars().count() as f64) > max {
result.errors.push(ValidationError {
code: "MAX_LENGTH_VIOLATED".to_string(),
message: format!("Length > max {}", max),
path: self.path.to_string(),
});
}
}
if let Some(ref compiled_re) = self.schema.compiled_pattern {
if !compiled_re.0.is_match(s) {
result.errors.push(ValidationError {
code: "PATTERN_VIOLATED".to_string(),
message: format!("Pattern mismatch {:?}", self.schema.pattern),
path: self.path.to_string(),
});
}
} else if let Some(ref pattern) = self.schema.pattern {
if let Ok(re) = Regex::new(pattern) {
if !re.is_match(s) {
result.errors.push(ValidationError {
code: "PATTERN_VIOLATED".to_string(),
message: format!("Pattern mismatch {}", pattern),
path: self.path.to_string(),
});
}
}
}
}
Ok(true)
}
}