jspg progress

This commit is contained in:
2026-02-17 21:46:10 -05:00
parent 32ed463df8
commit 623c34c0bc
20 changed files with 3566 additions and 1094 deletions

View File

@ -1,7 +1,7 @@
use crate::schema::Schema;
use regex::Regex;
use serde_json::Value;
use std::collections::HashMap;
// use std::collections::HashMap;
use std::error::Error;
use std::sync::Arc;
@ -14,14 +14,6 @@ pub enum CompiledFormat {
Regex(Regex),
}
/// A fully compiled schema with a root node and a pre-calculated index map.
/// This allows O(1) lookup of any anchor or $id within the schema tree.
#[derive(Debug, Clone)]
pub struct CompiledSchema {
pub root: Arc<Schema>,
pub index: HashMap<String, Arc<Schema>>,
}
/// A wrapper for compiled regex patterns
#[derive(Debug, Clone)]
pub struct CompiledRegex(pub Regex);
@ -169,10 +161,10 @@ impl Compiler {
}
}
/// Recursively traverses the schema tree to build a map of all internal Anchors ($id) and JSON Pointers.
/// Recursively traverses the schema tree to build the local registry index.
fn compile_index(
schema: &Arc<Schema>,
index: &mut HashMap<String, Arc<Schema>>,
registry: &mut crate::registry::Registry,
parent_base: Option<String>,
pointer: json_pointer::JsonPointer<String, Vec<String>>,
) {
@ -186,15 +178,14 @@ impl Compiler {
} else {
format!("{}#{}", base, fragment)
};
index.insert(ptr_uri, schema.clone());
registry.insert(ptr_uri, schema.clone());
}
// 2. Determine Current Scope... (unchanged logic, just use pointer)
// 2. Determine Current Scope... (unchanged logic)
let mut current_base = parent_base.clone();
let mut child_pointer = pointer.clone();
if let Some(id) = &schema.obj.id {
// ... resolve ID logic ...
let mut new_base = None;
if let Ok(_) = url::Url::parse(id) {
new_base = Some(id.clone());
@ -209,40 +200,29 @@ impl Compiler {
}
if let Some(base) = new_base {
index.insert(base.clone(), schema.clone());
// println!("DEBUG: Compiling index for path: {}", base); // Added println
registry.insert(base.clone(), schema.clone());
current_base = Some(base);
child_pointer = json_pointer::JsonPointer::new(vec![]); // Reset
}
}
// 3. Index by Anchor (unchanged)
if let Some(anchor) = &schema.obj.anchor {
if let Some(base) = &current_base {
let anchor_uri = format!("{}#{}", base, anchor);
index.insert(anchor_uri, schema.clone());
}
}
// Index by Dynamic Anchor
if let Some(d_anchor) = &schema.obj.dynamic_anchor {
if let Some(base) = &current_base {
let anchor_uri = format!("{}#{}", base, d_anchor);
index.insert(anchor_uri.clone(), schema.clone());
println!("Indexed Dynamic Anchor: {}", anchor_uri);
}
}
// 3. Index by Anchor
if let Some(anchor) = &schema.obj.anchor {
if let Some(base) = &current_base {
let anchor_uri = format!("{}#{}", base, anchor);
index.insert(anchor_uri.clone(), schema.clone());
println!("Indexed Anchor: {}", anchor_uri);
registry.insert(anchor_uri, schema.clone());
}
}
// Index by Dynamic Anchor
if let Some(d_anchor) = &schema.obj.dynamic_anchor {
if let Some(base) = &current_base {
let anchor_uri = format!("{}#{}", base, d_anchor);
registry.insert(anchor_uri, schema.clone());
}
}
// ... (Const/Enum indexing skipped for brevity, relies on string)
// 4. Recurse
// 4. Recurse (unchanged logic structure, just passing registry)
if let Some(defs) = schema.defs.as_ref().or(schema.definitions.as_ref()) {
let segment = if schema.defs.is_some() {
"$defs"
@ -252,10 +232,9 @@ impl Compiler {
for (key, sub_schema) in defs {
let mut sub = child_pointer.clone();
sub.push(segment.to_string());
// Decode key to avoid double encoding by JsonPointer
let decoded_key = percent_encoding::percent_decode_str(key).decode_utf8_lossy();
sub.push(decoded_key.to_string());
Self::compile_index(sub_schema, index, current_base.clone(), sub);
Self::compile_index(sub_schema, registry, current_base.clone(), sub);
}
}
@ -264,14 +243,14 @@ impl Compiler {
let mut sub = child_pointer.clone();
sub.push("properties".to_string());
sub.push(key.to_string());
Self::compile_index(sub_schema, index, current_base.clone(), sub);
Self::compile_index(sub_schema, registry, current_base.clone(), sub);
}
}
if let Some(items) = &schema.items {
let mut sub = child_pointer.clone();
sub.push("items".to_string());
Self::compile_index(items, index, current_base.clone(), sub);
Self::compile_index(items, registry, current_base.clone(), sub);
}
if let Some(prefix_items) = &schema.prefix_items {
@ -279,7 +258,7 @@ impl Compiler {
let mut sub = child_pointer.clone();
sub.push("prefixItems".to_string());
sub.push(i.to_string());
Self::compile_index(sub_schema, index, current_base.clone(), sub);
Self::compile_index(sub_schema, registry, current_base.clone(), sub);
}
}
@ -288,7 +267,7 @@ impl Compiler {
let mut sub = child_pointer.clone();
sub.push("allOf".to_string());
sub.push(i.to_string());
Self::compile_index(sub_schema, index, current_base.clone(), sub);
Self::compile_index(sub_schema, registry, current_base.clone(), sub);
}
}
if let Some(any_of) = &schema.any_of {
@ -296,7 +275,7 @@ impl Compiler {
let mut sub = child_pointer.clone();
sub.push("anyOf".to_string());
sub.push(i.to_string());
Self::compile_index(sub_schema, index, current_base.clone(), sub);
Self::compile_index(sub_schema, registry, current_base.clone(), sub);
}
}
if let Some(one_of) = &schema.one_of {
@ -304,36 +283,36 @@ impl Compiler {
let mut sub = child_pointer.clone();
sub.push("oneOf".to_string());
sub.push(i.to_string());
Self::compile_index(sub_schema, index, current_base.clone(), sub);
Self::compile_index(sub_schema, registry, current_base.clone(), sub);
}
}
if let Some(not) = &schema.not {
let mut sub = child_pointer.clone();
sub.push("not".to_string());
Self::compile_index(not, index, current_base.clone(), sub);
Self::compile_index(not, registry, current_base.clone(), sub);
}
if let Some(if_) = &schema.if_ {
let mut sub = child_pointer.clone();
sub.push("if".to_string());
Self::compile_index(if_, index, current_base.clone(), sub);
Self::compile_index(if_, registry, current_base.clone(), sub);
}
if let Some(then_) = &schema.then_ {
let mut sub = child_pointer.clone();
sub.push("then".to_string());
Self::compile_index(then_, index, current_base.clone(), sub);
Self::compile_index(then_, registry, current_base.clone(), sub);
}
if let Some(else_) = &schema.else_ {
let mut sub = child_pointer.clone();
sub.push("else".to_string());
Self::compile_index(else_, index, current_base.clone(), sub);
Self::compile_index(else_, registry, current_base.clone(), sub);
}
if let Some(deps) = &schema.dependent_schemas {
for (key, sub_schema) in deps {
let mut sub = child_pointer.clone();
sub.push("dependentSchemas".to_string());
sub.push(key.to_string());
Self::compile_index(sub_schema, index, current_base.clone(), sub);
Self::compile_index(sub_schema, registry, current_base.clone(), sub);
}
}
if let Some(pp) = &schema.pattern_properties {
@ -341,55 +320,67 @@ impl Compiler {
let mut sub = child_pointer.clone();
sub.push("patternProperties".to_string());
sub.push(key.to_string());
Self::compile_index(sub_schema, index, current_base.clone(), sub);
Self::compile_index(sub_schema, registry, current_base.clone(), sub);
}
}
if let Some(contains) = &schema.contains {
let mut sub = child_pointer.clone();
sub.push("contains".to_string());
Self::compile_index(contains, index, current_base.clone(), sub);
Self::compile_index(contains, registry, current_base.clone(), sub);
}
if let Some(property_names) = &schema.property_names {
let mut sub = child_pointer.clone();
sub.push("propertyNames".to_string());
Self::compile_index(property_names, index, current_base.clone(), sub);
Self::compile_index(property_names, registry, current_base.clone(), sub);
}
}
/// Resolves a format string to a CompiledFormat (future optimization)
pub fn compile_format(_format: &str) -> Option<CompiledFormat> {
None
}
pub fn compile(mut root_schema: Schema, root_id: Option<String>) -> CompiledSchema {
// 1. Compile in-place (formats/regexes)
pub fn compile(mut root_schema: Schema, root_id: Option<String>) -> Arc<Schema> {
// 1. Compile in-place (formats/regexes/normalization)
Self::compile_formats_and_regexes(&mut root_schema);
// Apply root_id override if schema ID is missing
if let Some(ref rid) = root_id {
if let Some(rid) = &root_id {
if root_schema.obj.id.is_none() {
root_schema.obj.id = Some(rid.clone());
}
}
// 2. Wrap in Arc
let root = Arc::new(root_schema);
let mut index = HashMap::new();
// 2. Build ID/Pointer Index
let mut registry = crate::registry::Registry::new();
// We need a temporary Arc to satisfy compile_index recursion
// But we are modifying root_schema.
// This is tricky. compile_index takes &Arc<Schema>.
// We should build the index first, THEN attach it.
let root = Arc::new(root_schema);
// Default base_uri to ""
let base_uri = root_id
.clone()
.or_else(|| root.obj.id.clone())
.or(Some("".to_string()));
// 3. Build ID/Pointer Index
// Default base_uri to "" so that pointers like "#/foo" are indexed even if no root ID exists
Self::compile_index(
&root,
&mut index,
root_id.clone().or(Some("".to_string())),
&mut registry,
base_uri,
json_pointer::JsonPointer::new(vec![]),
);
// Also ensure root id is indexed if present
if let Some(rid) = root_id {
index.insert(rid, root.clone());
registry.insert(rid, root.clone());
}
CompiledSchema { root, index }
// Now we need to attach this registry to the root schema.
// Since root is an Arc, we might need to recreate it if we can't mutate.
// Schema struct modifications require &mut.
let mut final_schema = Arc::try_unwrap(root).unwrap_or_else(|arc| (*arc).clone());
final_schema.obj.compiled_schemas = Some(Arc::new(registry));
Arc::new(final_schema)
}
}

View File

@ -1,4 +1,4 @@
use crate::compiler::CompiledSchema; // Changed from crate::schema::Schema
use crate::schema::Schema;
use lazy_static::lazy_static;
use std::collections::HashMap;
use std::sync::RwLock;
@ -9,8 +9,9 @@ lazy_static! {
use std::sync::Arc;
#[derive(Debug, Clone, Default)]
pub struct Registry {
pub schemas: HashMap<String, Arc<CompiledSchema>>, // Changed from Schema
pub schemas: HashMap<String, Arc<Schema>>,
}
impl Registry {
@ -20,14 +21,12 @@ impl Registry {
}
}
pub fn insert(&mut self, id: String, compiled: CompiledSchema) {
if self.schemas.contains_key(&id) {
panic!("Duplicate schema ID inserted into registry: '{}'", id);
}
self.schemas.insert(id, Arc::new(compiled));
pub fn insert(&mut self, id: String, schema: Arc<Schema>) {
// We allow overwriting for now to support re-compilation in tests/dev
self.schemas.insert(id, schema);
}
pub fn get(&self, id: &str) -> Option<Arc<CompiledSchema>> {
pub fn get(&self, id: &str) -> Option<Arc<Schema>> {
self.schemas.get(id).cloned()
}

View File

@ -34,7 +34,6 @@ pub struct SchemaObject {
#[serde(rename = "patternProperties")]
pub pattern_properties: Option<BTreeMap<String, Arc<Schema>>>,
pub required: Option<Vec<String>>,
// additionalProperties can be checks against a schema or boolean (handled by Schema wrapper)
// dependencies can be schema dependencies or property dependencies
pub dependencies: Option<BTreeMap<String, Dependency>>,
@ -88,7 +87,11 @@ pub struct SchemaObject {
pub format: Option<String>,
#[serde(rename = "enum")]
pub enum_: Option<Vec<Value>>, // `enum` is a reserved keyword in Rust
#[serde(default, rename = "const")]
#[serde(
default,
rename = "const",
deserialize_with = "crate::util::deserialize_some"
)]
pub const_: Option<Value>,
// Numeric Validation
@ -135,6 +138,8 @@ pub struct SchemaObject {
pub compiled_pattern: Option<crate::compiler::CompiledRegex>,
#[serde(skip)]
pub compiled_pattern_properties: Option<Vec<(crate::compiler::CompiledRegex, Arc<Schema>)>>,
#[serde(skip)]
pub compiled_schemas: Option<Arc<crate::registry::Registry>>,
}
#[derive(Debug, Clone, Serialize)]
@ -183,7 +188,7 @@ impl<'de> Deserialize<'de> for Schema {
always_fail: !b,
});
}
let obj: SchemaObject = serde_json::from_value(v).map_err(serde::de::Error::custom)?;
let obj: SchemaObject = serde_json::from_value(v.clone()).map_err(serde::de::Error::custom)?;
Ok(Schema {
obj,

File diff suppressed because it is too large Load Diff

View File

@ -39,10 +39,10 @@ where
pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
// Clear registry to ensure isolation
{
let mut registry = REGISTRY.write().unwrap();
registry.clear();
}
// {
// let mut registry = REGISTRY.write().unwrap();
// registry.clear();
// }
let content =
fs::read_to_string(path).unwrap_or_else(|_| panic!("Failed to read file: {}", path));
@ -56,8 +56,10 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
let group = &suite[index];
let mut failures = Vec::<String>::new();
let mut registry = crate::registry::Registry::new();
// Helper to register items with 'schemas'
let register_schemas = |items_val: Option<&Value>| {
let register_schemas = |registry: &mut crate::registry::Registry, items_val: Option<&Value>| {
if let Some(val) = items_val {
if let Value::Array(arr) = val {
for item in arr {
@ -69,8 +71,6 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
{
// Clone ID upfront to avoid borrow issues
if let Some(id_clone) = schema.obj.id.clone() {
let mut registry = REGISTRY.write().unwrap();
// Utilize the new compile method which handles strictness
let compiled =
crate::compiler::Compiler::compile(schema, Some(id_clone.clone()));
registry.insert(id_clone, compiled);
@ -118,7 +118,6 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
});
if let Ok(schema) = serde_json::from_value::<crate::schema::Schema>(schema_json) {
let mut registry = REGISTRY.write().unwrap();
let compiled = crate::compiler::Compiler::compile(schema, Some(id.clone()));
registry.insert(id, compiled);
}
@ -127,32 +126,40 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
}
// 2. Register items directly
register_schemas(group.enums.as_ref());
register_schemas(group.types.as_ref());
register_schemas(group.puncs.as_ref());
register_schemas(&mut registry, group.enums.as_ref());
register_schemas(&mut registry, group.types.as_ref());
register_schemas(&mut registry, group.puncs.as_ref());
// 3. Register root 'schemas' if present (generic test support)
// Some tests use a raw 'schema' or 'schemas' field at the group level
if let Some(schema_val) = &group.schema {
if let Ok(schema) = serde_json::from_value::<crate::schema::Schema>(schema_val.clone()) {
let id = schema
.obj
.id
.clone()
.or_else(|| {
// Fallback ID if none provided in schema
Some("root".to_string())
})
.unwrap();
match serde_json::from_value::<crate::schema::Schema>(schema_val.clone()) {
Ok(schema) => {
let id = schema
.obj
.id
.clone()
.or_else(|| {
// Fallback ID if none provided in schema
Some(format!("test:{}:{}", path, index))
})
.unwrap();
let mut registry = REGISTRY.write().unwrap();
let compiled = crate::compiler::Compiler::compile(schema, Some(id.clone()));
registry.insert(id, compiled);
let mut registry_ref = &mut registry;
let compiled = crate::compiler::Compiler::compile(schema, Some(id.clone()));
registry_ref.insert(id, compiled);
}
Err(e) => {
eprintln!(
"DEBUG: FAILED to deserialize group schema for index {}: {}",
index, e
);
}
}
}
// 4. Run Tests
for (test_index, test) in group.tests.iter().enumerate() {
for (_test_index, test) in group.tests.iter().enumerate() {
let mut schema_id = test.schema_id.clone();
// If no explicit schema_id, try to infer from the single schema in the group
@ -165,7 +172,7 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
}
}
if schema_id.is_none() {
schema_id = Some("root".to_string());
schema_id = Some(format!("test:{}:{}", path, index));
}
}
}
@ -186,7 +193,7 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
}
if let Some(sid) = schema_id {
let result = Validator::validate(&sid, &test.data);
let result = Validator::validate_with_registry(&sid, &test.data, &registry);
if !result.errors.is_empty() != !test.valid {
failures.push(format!(
@ -194,7 +201,7 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
group.description,
test.description,
test.valid,
!result.errors.is_empty(),
!result.errors.is_empty(), // "Got Invalid?"
result.errors
));
}

View File

@ -1,12 +1,10 @@
use crate::compiler::CompiledSchema;
use crate::registry::REGISTRY;
use crate::schema::Schema;
use percent_encoding;
use regex::Regex;
use serde_json::Value;
use std::collections::{BTreeMap, HashSet};
use std::sync::Arc;
use std::collections::HashSet;
#[derive(Debug, Clone)]
pub struct ValidationError {
@ -15,10 +13,9 @@ pub struct ValidationError {
pub path: String,
}
#[derive(Debug)]
pub enum ResolvedRef<'a> {
Local(&'a Schema),
External(Arc<CompiledSchema>, Arc<Schema>),
Global(&'a Schema, &'a Schema),
}
#[derive(Debug, Default, Clone)]
@ -46,7 +43,7 @@ impl ValidationResult {
pub struct ValidationContext<'a> {
// 1. Global (The Library)
pub root: &'a CompiledSchema,
pub root: &'a Schema,
// 2. The Instruction (The Rule)
pub schema: &'a Schema,
@ -63,16 +60,19 @@ pub struct ValidationContext<'a> {
pub overrides: HashSet<String>, // Keywords explicitly defined by callers that I should skip (Inherited Mask)
pub extensible: bool,
pub reporter: bool, // If true, we only report evaluated keys, don't enforce strictness
pub registry: &'a crate::registry::Registry,
}
impl<'a> ValidationContext<'a> {
pub fn new(
root: &'a CompiledSchema,
root: &'a Schema,
schema: &'a Schema,
current: &'a Value,
scope: &'a [String],
overrides: HashSet<String>,
extensible: bool,
reporter: bool,
registry: &'a crate::registry::Registry,
) -> Self {
let effective_extensible = schema.extensible.unwrap_or(extensible);
@ -85,7 +85,8 @@ impl<'a> ValidationContext<'a> {
scope,
overrides,
extensible: effective_extensible,
reporter: false,
reporter,
registry,
}
}
@ -111,6 +112,7 @@ impl<'a> ValidationContext<'a> {
overrides,
extensible: effective_extensible,
reporter,
registry: self.registry,
}
}
@ -161,6 +163,7 @@ impl<'a> ValidationContext<'a> {
overrides: self.overrides.clone(),
extensible: self.extensible,
reporter: self.reporter,
registry: self.registry,
};
return shadow.validate_scoped();
}
@ -191,7 +194,15 @@ impl<'a> ValidationContext<'a> {
// --- Helpers Groups ---
if let Some(ref_res) = self.validate_refs()? {
eprintln!(
"DEBUG: validate_refs returned {} errors",
ref_res.errors.len()
);
result.merge(ref_res);
eprintln!(
"DEBUG: result has {} errors after refs merge",
result.errors.len()
);
}
// 2. Core
@ -229,7 +240,7 @@ impl<'a> ValidationContext<'a> {
// --- Strictness Check ---
if !self.reporter {
self.check_strictness(&result)?;
self.check_strictness(&mut result);
}
Ok(result)
@ -255,7 +266,7 @@ impl<'a> ValidationContext<'a> {
}
let derived = self.derive(
&self.root.root,
self.root,
self.current,
self.path,
effective_scope,
@ -266,11 +277,11 @@ impl<'a> ValidationContext<'a> {
res.merge(derived.validate()?);
} else {
if let Some((resolved, matched_key)) =
Validator::resolve_ref(self.root, ref_string, current_base_resolved)
Validator::resolve_ref(self.root, ref_string, current_base_resolved, self.registry)
{
let (target_root, target_schema) = match resolved {
ResolvedRef::Local(s) => (self.root, s),
ResolvedRef::External(ref c, ref s) => (c.as_ref(), s.as_ref()),
ResolvedRef::Global(c, s) => (c, s),
};
// Scope Injection
@ -305,13 +316,14 @@ impl<'a> ValidationContext<'a> {
self.current,
scope_to_pass,
new_overrides,
false, // Reset extensibility for $ref (Default Strict)
false, // Reset extensibility for $ref (Default Strict)
self.reporter, // Propagate reporter state
self.registry,
);
// Manually set reporter/path/depth to continue trace
// Manually set path/depth to continue trace
let mut manual_ctx = target_ctx;
manual_ctx.path = self.path;
manual_ctx.depth = self.depth + 1;
manual_ctx.reporter = true;
let target_res = manual_ctx.validate()?;
@ -337,7 +349,8 @@ impl<'a> ValidationContext<'a> {
};
let mut resolved_target: Option<(ResolvedRef, String)> = None;
let local_resolution = Validator::resolve_ref(self.root, d_ref, current_base_resolved);
let local_resolution =
Validator::resolve_ref(self.root, d_ref, current_base_resolved, self.registry);
// Bookending
let is_bookended = if let Some((ResolvedRef::Local(s), _)) = &local_resolution {
@ -357,20 +370,22 @@ impl<'a> ValidationContext<'a> {
let key = format!("{}#{}", resource_base, anchor);
// Local
if let Some(s) = self.root.index.get(&key) {
if s.obj.dynamic_anchor.as_deref() == Some(anchor) {
resolved_target = Some((ResolvedRef::Local(s), key.clone()));
break;
if let Some(indexrs) = &self.root.obj.compiled_schemas {
if let Some(s) = indexrs.schemas.get(&key) {
if s.obj.dynamic_anchor.as_deref() == Some(anchor) {
resolved_target = Some((ResolvedRef::Local(s.as_ref()), key.clone()));
break;
}
}
}
// Global
if resolved_target.is_none() {
if let Ok(registry) = crate::registry::REGISTRY.read() {
if let Some(compiled) = registry.get(resource_base) {
if let Some(s) = compiled.index.get(&key) {
if let Some(compiled) = self.registry.schemas.get(resource_base) {
if let Some(indexrs) = &compiled.obj.compiled_schemas {
if let Some(s) = indexrs.schemas.get(&key) {
if s.obj.dynamic_anchor.as_deref() == Some(anchor) {
resolved_target = Some((
ResolvedRef::External(compiled.clone(), s.clone()),
ResolvedRef::Global(compiled.as_ref(), s.as_ref()),
key.clone(),
));
break;
@ -392,7 +407,7 @@ impl<'a> ValidationContext<'a> {
if let Some((resolved, matched_key)) = resolved_target {
let (target_root, target_schema) = match resolved {
ResolvedRef::Local(s) => (self.root, s),
ResolvedRef::External(ref c, ref s) => (c.as_ref(), s.as_ref()),
ResolvedRef::Global(root, s) => (root, s),
};
let resource_base = if let Some((base, _)) = matched_key.split_once('#') {
@ -402,7 +417,11 @@ impl<'a> ValidationContext<'a> {
};
let mut new_scope_buffer: Vec<String>;
let scope_to_pass = if target_schema.obj.id.is_none() {
let scope_to_pass = if let Some(ref tid) = target_schema.obj.id {
new_scope_buffer = effective_scope.to_vec();
new_scope_buffer.push(tid.clone());
&new_scope_buffer
} else {
if !resource_base.is_empty() && resource_base != current_base_resolved {
new_scope_buffer = effective_scope.to_vec();
new_scope_buffer.push(resource_base.to_string());
@ -410,8 +429,6 @@ impl<'a> ValidationContext<'a> {
} else {
effective_scope
}
} else {
effective_scope
};
// Calculate new overrides (Masking)
@ -427,11 +444,13 @@ impl<'a> ValidationContext<'a> {
scope_to_pass,
new_overrides,
false,
self.reporter, // Propagate reporter
self.registry,
);
let mut manual_ctx = target_ctx;
manual_ctx.path = self.path;
manual_ctx.depth = self.depth + 1;
manual_ctx.reporter = true;
// manual_ctx.reporter = true;
res.merge(manual_ctx.validate()?);
} else {
@ -443,84 +462,83 @@ impl<'a> ValidationContext<'a> {
}
}
if handled { Ok(Some(res)) } else { Ok(None) }
if handled {
// eprintln!("DEBUG: validate_refs returning Some with {} errors", res.errors.len());
Ok(Some(res))
} else {
Ok(None)
}
}
fn validate_core(&self, result: &mut ValidationResult) {
// Type
if !self.overrides.contains("type") {
if let Some(ref type_) = self.schema.type_ {
match type_ {
crate::schema::SchemaTypeOrArray::Single(t) => {
if !Validator::check_type(t, self.current) {
result.errors.push(ValidationError {
code: "INVALID_TYPE".to_string(),
message: format!("Expected type '{}'", t),
path: self.path.to_string(),
});
if let Some(ref type_) = self.schema.type_ {
match type_ {
crate::schema::SchemaTypeOrArray::Single(t) => {
if !Validator::check_type(t, self.current) {
result.errors.push(ValidationError {
code: "INVALID_TYPE".to_string(),
message: format!("Expected type '{}'", t),
path: self.path.to_string(),
});
}
}
crate::schema::SchemaTypeOrArray::Multiple(types) => {
let mut valid = false;
for t in types {
if Validator::check_type(t, self.current) {
valid = true;
break;
}
}
crate::schema::SchemaTypeOrArray::Multiple(types) => {
let mut valid = false;
for t in types {
if Validator::check_type(t, self.current) {
valid = true;
break;
}
}
if !valid {
result.errors.push(ValidationError {
code: "INVALID_TYPE".to_string(),
message: format!("Expected one of types {:?}", types),
path: self.path.to_string(),
});
}
if !valid {
result.errors.push(ValidationError {
code: "INVALID_TYPE".to_string(),
message: format!("Expected one of types {:?}", types),
path: self.path.to_string(),
});
}
}
}
}
// Const
if !self.overrides.contains("const") {
if let Some(ref const_val) = self.schema.const_ {
if !crate::util::equals(self.current, const_val) {
result.errors.push(ValidationError {
code: "CONST_VIOLATED".to_string(),
message: "Value does not match const".to_string(),
path: self.path.to_string(),
});
} else {
if let Some(obj) = self.current.as_object() {
result.evaluated_keys.extend(obj.keys().cloned());
} else if let Some(arr) = self.current.as_array() {
result.evaluated_indices.extend(0..arr.len());
}
if let Some(ref const_val) = self.schema.const_ {
if !crate::util::equals(self.current, const_val) {
result.errors.push(ValidationError {
code: "CONST_VIOLATED".to_string(),
message: "Value does not match const".to_string(),
path: self.path.to_string(),
});
} else {
if let Some(obj) = self.current.as_object() {
result.evaluated_keys.extend(obj.keys().cloned());
} else if let Some(arr) = self.current.as_array() {
result.evaluated_indices.extend(0..arr.len());
}
}
}
// Enum
if !self.overrides.contains("enum") {
if let Some(ref enum_vals) = self.schema.enum_ {
let mut found = false;
for val in enum_vals {
if crate::util::equals(self.current, val) {
found = true;
break;
}
if let Some(ref enum_vals) = self.schema.enum_ {
let mut found = false;
for val in enum_vals {
if crate::util::equals(self.current, val) {
found = true;
break;
}
if !found {
result.errors.push(ValidationError {
code: "ENUM_MISMATCH".to_string(),
message: "Value is not in enum".to_string(),
path: self.path.to_string(),
});
} else {
if let Some(obj) = self.current.as_object() {
result.evaluated_keys.extend(obj.keys().cloned());
} else if let Some(arr) = self.current.as_array() {
result.evaluated_indices.extend(0..arr.len());
}
}
if !found {
result.errors.push(ValidationError {
code: "ENUM_MISMATCH".to_string(),
message: "Value is not in enum".to_string(),
path: self.path.to_string(),
});
} else {
if let Some(obj) = self.current.as_object() {
result.evaluated_keys.extend(obj.keys().cloned());
} else if let Some(arr) = self.current.as_array() {
result.evaluated_indices.extend(0..arr.len());
}
}
}
@ -528,60 +546,50 @@ impl<'a> ValidationContext<'a> {
fn validate_numeric(&self, result: &mut ValidationResult) {
if let Some(num) = self.current.as_f64() {
if !self.overrides.contains("minimum") {
if let Some(min) = self.schema.minimum {
if num < min {
result.errors.push(ValidationError {
code: "MINIMUM_VIOLATED".to_string(),
message: format!("Value {} < min {}", num, min),
path: self.path.to_string(),
});
}
if let Some(min) = self.schema.minimum {
if num < min {
result.errors.push(ValidationError {
code: "MINIMUM_VIOLATED".to_string(),
message: format!("Value {} < min {}", num, min),
path: self.path.to_string(),
});
}
}
if !self.overrides.contains("maximum") {
if let Some(max) = self.schema.maximum {
if num > max {
result.errors.push(ValidationError {
code: "MAXIMUM_VIOLATED".to_string(),
message: format!("Value {} > max {}", num, max),
path: self.path.to_string(),
});
}
if let Some(max) = self.schema.maximum {
if num > max {
result.errors.push(ValidationError {
code: "MAXIMUM_VIOLATED".to_string(),
message: format!("Value {} > max {}", num, max),
path: self.path.to_string(),
});
}
}
if !self.overrides.contains("exclusiveMinimum") {
if let Some(ex_min) = self.schema.exclusive_minimum {
if num <= ex_min {
result.errors.push(ValidationError {
code: "EXCLUSIVE_MINIMUM_VIOLATED".to_string(),
message: format!("Value {} <= ex_min {}", num, ex_min),
path: self.path.to_string(),
});
}
if let Some(ex_min) = self.schema.exclusive_minimum {
if num <= ex_min {
result.errors.push(ValidationError {
code: "EXCLUSIVE_MINIMUM_VIOLATED".to_string(),
message: format!("Value {} <= ex_min {}", num, ex_min),
path: self.path.to_string(),
});
}
}
if !self.overrides.contains("exclusiveMaximum") {
if let Some(ex_max) = self.schema.exclusive_maximum {
if num >= ex_max {
result.errors.push(ValidationError {
code: "EXCLUSIVE_MAXIMUM_VIOLATED".to_string(),
message: format!("Value {} >= ex_max {}", num, ex_max),
path: self.path.to_string(),
});
}
if let Some(ex_max) = self.schema.exclusive_maximum {
if num >= ex_max {
result.errors.push(ValidationError {
code: "EXCLUSIVE_MAXIMUM_VIOLATED".to_string(),
message: format!("Value {} >= ex_max {}", num, ex_max),
path: self.path.to_string(),
});
}
}
if !self.overrides.contains("multipleOf") {
if let Some(multiple_of) = self.schema.multiple_of {
let val = num / multiple_of;
if (val - val.round()).abs() > f64::EPSILON {
result.errors.push(ValidationError {
code: "MULTIPLE_OF_VIOLATED".to_string(),
message: format!("Value {} not multiple of {}", num, multiple_of),
path: self.path.to_string(),
});
}
if let Some(multiple_of) = self.schema.multiple_of {
let val = num / multiple_of;
if (val - val.round()).abs() > f64::EPSILON {
result.errors.push(ValidationError {
code: "MULTIPLE_OF_VIOLATED".to_string(),
message: format!("Value {} not multiple of {}", num, multiple_of),
path: self.path.to_string(),
});
}
}
}
@ -589,81 +597,73 @@ impl<'a> ValidationContext<'a> {
fn validate_string(&self, result: &mut ValidationResult) {
if let Some(s) = self.current.as_str() {
if !self.overrides.contains("minLength") {
if let Some(min) = self.schema.min_length {
if (s.chars().count() as f64) < min {
result.errors.push(ValidationError {
code: "MIN_LENGTH_VIOLATED".to_string(),
message: format!("Length < min {}", min),
path: self.path.to_string(),
});
}
if let Some(min) = self.schema.min_length {
if (s.chars().count() as f64) < min {
result.errors.push(ValidationError {
code: "MIN_LENGTH_VIOLATED".to_string(),
message: format!("Length < min {}", min),
path: self.path.to_string(),
});
}
}
if !self.overrides.contains("maxLength") {
if let Some(max) = self.schema.max_length {
if (s.chars().count() as f64) > max {
result.errors.push(ValidationError {
code: "MAX_LENGTH_VIOLATED".to_string(),
message: format!("Length > max {}", max),
path: self.path.to_string(),
});
}
if let Some(max) = self.schema.max_length {
if (s.chars().count() as f64) > max {
result.errors.push(ValidationError {
code: "MAX_LENGTH_VIOLATED".to_string(),
message: format!("Length > max {}", max),
path: self.path.to_string(),
});
}
}
if !self.overrides.contains("pattern") {
if let Some(ref compiled_re) = self.schema.compiled_pattern {
if !compiled_re.0.is_match(s) {
if let Some(ref compiled_re) = self.schema.compiled_pattern {
if !compiled_re.0.is_match(s) {
result.errors.push(ValidationError {
code: "PATTERN_VIOLATED".to_string(),
message: format!("Pattern mismatch {:?}", self.schema.pattern),
path: self.path.to_string(),
});
}
} else if let Some(ref pattern) = self.schema.pattern {
if let Ok(re) = Regex::new(pattern) {
if !re.is_match(s) {
result.errors.push(ValidationError {
code: "PATTERN_VIOLATED".to_string(),
message: format!("Pattern mismatch {:?}", self.schema.pattern),
message: format!("Pattern mismatch {}", pattern),
path: self.path.to_string(),
});
}
} else if let Some(ref pattern) = self.schema.pattern {
if let Ok(re) = Regex::new(pattern) {
if !re.is_match(s) {
result.errors.push(ValidationError {
code: "PATTERN_VIOLATED".to_string(),
message: format!("Pattern mismatch {}", pattern),
path: self.path.to_string(),
});
}
}
}
}
}
}
fn validate_format(&self, result: &mut ValidationResult) {
if !self.overrides.contains("format") {
if let Some(ref compiled_fmt) = self.schema.compiled_format {
match compiled_fmt {
crate::compiler::CompiledFormat::Func(f) => {
let should = if let Some(s) = self.current.as_str() {
!s.is_empty()
} else {
true
};
if should {
if let Err(e) = f(self.current) {
result.errors.push(ValidationError {
code: "FORMAT_MISMATCH".to_string(),
message: format!("Format error: {}", e),
path: self.path.to_string(),
});
}
if let Some(ref compiled_fmt) = self.schema.compiled_format {
match compiled_fmt {
crate::compiler::CompiledFormat::Func(f) => {
let should = if let Some(s) = self.current.as_str() {
!s.is_empty()
} else {
true
};
if should {
if let Err(e) = f(self.current) {
result.errors.push(ValidationError {
code: "FORMAT_MISMATCH".to_string(),
message: format!("Format error: {}", e),
path: self.path.to_string(),
});
}
}
crate::compiler::CompiledFormat::Regex(re) => {
if let Some(s) = self.current.as_str() {
if !re.is_match(s) {
result.errors.push(ValidationError {
code: "FORMAT_MISMATCH".to_string(),
message: "Format regex mismatch".to_string(),
path: self.path.to_string(),
});
}
}
crate::compiler::CompiledFormat::Regex(re) => {
if let Some(s) = self.current.as_str() {
if !re.is_match(s) {
result.errors.push(ValidationError {
code: "FORMAT_MISMATCH".to_string(),
message: "Format regex mismatch".to_string(),
path: self.path.to_string(),
});
}
}
}
@ -733,7 +733,7 @@ impl<'a> ValidationContext<'a> {
self.scope,
HashSet::new(),
self.extensible,
true,
false,
);
result.merge(derived.validate()?);
}
@ -759,11 +759,10 @@ impl<'a> ValidationContext<'a> {
val,
&new_path,
self.scope,
HashSet::new(), // Property sub-schemas start fresh (no overrides passed down)
HashSet::new(),
next_extensible,
false, // Not reporter
false,
);
let item_res = derived.validate()?;
result.merge(item_res);
result.evaluated_keys.insert(key.clone());
@ -771,7 +770,6 @@ impl<'a> ValidationContext<'a> {
}
}
// 6. Pattern Properties
if let Some(ref compiled_pp) = self.schema.compiled_pattern_properties {
for (compiled_re, sub_schema) in compiled_pp {
for (key, val) in obj {
@ -825,6 +823,10 @@ impl<'a> ValidationContext<'a> {
}
}
}
// 8. Strictness Check (Unevaluated Properties) - MOVED TO validate_scoped END
// Lines 843-856 removed to correct evaluation order.
// if !self.extensible && !self.reporter { ... }
Ok(())
}
@ -872,7 +874,7 @@ impl<'a> ValidationContext<'a> {
// 3. Contains
if let Some(ref contains_schema) = self.schema.contains {
let mut match_count = 0;
let mut _match_count = 0;
for (i, param) in arr.iter().enumerate() {
let derived = self.derive(
contains_schema,
@ -881,16 +883,35 @@ impl<'a> ValidationContext<'a> {
self.scope,
HashSet::new(),
self.extensible,
true,
false,
);
let check = derived.validate()?;
if check.is_valid() {
match_count += 1;
_match_count += 1;
result.evaluated_indices.insert(i);
}
}
// ... (matches/min/max logic remains) ...
// Min Contains (Default 1)
let min = self.schema.min_contains.unwrap_or(1.0) as usize;
if _match_count < min {
result.errors.push(ValidationError {
code: "CONTAINS_VIOLATED".to_string(),
message: format!("Contains matches {} < min {}", _match_count, min),
path: self.path.to_string(),
});
}
// Max Contains
if let Some(max) = self.schema.max_contains {
if _match_count > max as usize {
result.errors.push(ValidationError {
code: "CONTAINS_VIOLATED".to_string(),
message: format!("Contains matches {} > max {}", _match_count, max),
path: self.path.to_string(),
});
}
}
}
// 4. Items (and PrefixItems)
@ -946,7 +967,8 @@ impl<'a> ValidationContext<'a> {
if let Some(ref all_of) = self.schema.all_of {
for sub in all_of {
let derived = self.derive_for_schema(sub, true); // Reporter (Fragment)
result.merge(derived.validate()?);
let res = derived.validate()?;
result.merge(res);
}
}
@ -1024,10 +1046,14 @@ impl<'a> ValidationContext<'a> {
let derived_if = self.derive_for_schema(if_schema, true);
let if_res = derived_if.validate()?;
// Always merge evaluated keys from IF per JSON Schema spec (it is evaluated regardless of result)
result.evaluated_keys.extend(if_res.evaluated_keys.clone());
result
.evaluated_indices
.extend(if_res.evaluated_indices.clone());
if if_res.is_valid() {
// IF passed -> Check THEN
result.merge(if_res);
if let Some(ref then_schema) = self.schema.then_ {
let derived_then = self.derive_for_schema(then_schema, true);
result.merge(derived_then.validate()?);
@ -1044,37 +1070,18 @@ impl<'a> ValidationContext<'a> {
Ok(())
}
fn check_strictness(&self, result: &ValidationResult) -> Result<(), ValidationError> {
fn check_strictness(&self, result: &mut ValidationResult) {
// Only check if strict (extensible = false)
// Also skip if reporter mode (collecting keys for composition/refs)
if self.extensible || self.reporter {
return Ok(());
return;
}
// 1. Unevaluated Properties
if let Some(obj) = self.current.as_object() {
for key in obj.keys() {
if !result.evaluated_keys.contains(key) {
// Implicit Shadowing: If a key is shadowed, we largely consider it "handled" by the child
// and thus it shouldn't trigger strictness violations in the parent.
// However, if the child defines it, it should have been validated (and thus in evaluated_keys)
// by the child's validation run.
// The Parent is running here.
// If the Parent has `const: entity`, and Child has `const: person`.
// Child validates `type`. `evaluated_keys` += `type`.
// Parent skips `type`. `evaluated_keys` does NOT add `type`.
// BUT `result` passed to Parent is merged from Child?
// NO. `validate_refs` creates a NEW scope/result context for the $ref,
// but it merges the *returned* result into the current result.
// SO `evaluated_keys` from Child SHOULD be present here if we merged them correctly.
// Wait, `derive` creates a fresh result? No, `validate` creates a fresh result.
// In `validate_refs`, we call `derived.validate()?` and `res.merge(derived.validate()?)`.
// `ValidationResult::merge` merges `evaluated_keys`.
// So if the Child validated the key, it is in `result.evaluated_keys`.
// So we don't need to check overrides here.
return Err(ValidationError {
if !result.evaluated_keys.contains(key) && !self.overrides.contains(key) {
result.errors.push(ValidationError {
code: "STRICT_PROPERTY_VIOLATION".to_string(),
message: format!("Unexpected property '{}'", key),
path: format!("{}/{}", self.path, key),
@ -1087,7 +1094,7 @@ impl<'a> ValidationContext<'a> {
if let Some(arr) = self.current.as_array() {
for i in 0..arr.len() {
if !result.evaluated_indices.contains(&i) {
return Err(ValidationError {
result.errors.push(ValidationError {
code: "STRICT_ITEM_VIOLATION".to_string(),
message: format!("Unexpected item at index {}", i),
path: format!("{}/{}", self.path, i),
@ -1095,8 +1102,6 @@ impl<'a> ValidationContext<'a> {
}
}
}
Ok(())
}
}
@ -1104,6 +1109,11 @@ pub struct Validator;
impl Validator {
pub fn check_type(t: &str, val: &Value) -> bool {
if let Value::String(s) = val {
if s.is_empty() {
return true;
}
}
match t {
"null" => val.is_null(),
"boolean" => val.is_boolean(),
@ -1117,46 +1127,85 @@ impl Validator {
}
pub fn resolve_ref<'a>(
root: &'a CompiledSchema,
root: &'a Schema,
ref_string: &str,
scope: &str,
registry: &'a crate::registry::Registry,
) -> Option<(ResolvedRef<'a>, String)> {
// 0. Fast path for local fragments (e.g., "#/definitions/foo")
// This is necessary when scope is not a valid URL (e.g. "root" in tests)
if ref_string.starts_with('#') {
if let Some(indexrs) = &root.obj.compiled_schemas {
eprintln!("DEBUG: Resolving local fragment '{}'", ref_string);
// println!("DEBUG: Resolving local fragment '{}'", ref_string);
// for k in indexrs.schemas.keys() {
// println!("DEBUG: Key in index: {}", k);
// }
if let Some(s) = indexrs.schemas.get(ref_string) {
return Some((ResolvedRef::Local(s.as_ref()), ref_string.to_string()));
}
} else {
// println!("DEBUG: No compiled_schemas index found on root!");
}
}
// 1. Try resolving against scope (Absolute or Relative)
if let Ok(base) = url::Url::parse(scope) {
if let Ok(joined) = base.join(ref_string) {
let joined_str = joined.to_string();
// Local
if let Some(s) = root.index.get(&joined_str) {
return Some((ResolvedRef::Local(s), joined_str));
if let Some(indexrs) = &root.obj.compiled_schemas {
if let Some(s) = indexrs.schemas.get(&joined_str) {
return Some((ResolvedRef::Local(s.as_ref()), joined_str));
}
}
// Fallback: Try decoding to match index keys that might not be fully encoded
if let Ok(decoded) = percent_encoding::percent_decode_str(&joined_str).decode_utf8() {
let decoded_str = decoded.to_string();
if decoded_str != joined_str {
if let Some(s) = root.index.get(&decoded_str) {
return Some((ResolvedRef::Local(s), decoded_str));
if let Some(indexrs) = &root.obj.compiled_schemas {
if let Some(s) = indexrs.schemas.get(&decoded_str) {
return Some((ResolvedRef::Local(s.as_ref()), decoded_str));
}
}
}
}
// Global
let resource_base = if let Some((base, _)) = joined_str.split_once('#') {
base
} else {
&joined_str
};
if let Some(s) = registry.schemas.get(&joined_str) {
return Some((ResolvedRef::Global(s.as_ref(), s.as_ref()), joined_str));
}
}
} else {
// Fallback for non-URI scopes (e.g. "root" in tests)
// If scope is just a string key, and ref starts with #, simple concat
if ref_string.starts_with('#') {
let joined_str = format!("{}{}", scope, ref_string);
if let Ok(registry) = REGISTRY.read() {
if let Some(compiled) = registry.get(resource_base) {
if let Some(s) = compiled.index.get(&joined_str) {
return Some((
ResolvedRef::External(compiled.clone(), s.clone()),
joined_str,
));
// Local
if let Some(indexrs) = &root.obj.compiled_schemas {
if let Some(s) = indexrs.schemas.get(&joined_str) {
return Some((ResolvedRef::Local(s.as_ref()), joined_str));
}
}
// Fallback: Try decoding to match index keys that might not be fully encoded
if let Ok(decoded) = percent_encoding::percent_decode_str(&joined_str).decode_utf8() {
let decoded_str = decoded.to_string();
if decoded_str != joined_str {
if let Some(indexrs) = &root.obj.compiled_schemas {
if let Some(s) = indexrs.schemas.get(&decoded_str) {
return Some((ResolvedRef::Local(s.as_ref()), decoded_str));
}
}
}
}
// Global
if let Some(s) = registry.schemas.get(&joined_str) {
return Some((ResolvedRef::Global(s.as_ref(), s.as_ref()), joined_str));
}
}
}
@ -1164,8 +1213,10 @@ impl Validator {
if let Ok(parsed) = url::Url::parse(ref_string) {
let absolute = parsed.to_string();
// Local
if let Some(s) = root.index.get(&absolute) {
return Some((ResolvedRef::Local(s), absolute));
if let Some(indexrs) = &root.obj.compiled_schemas {
if let Some(s) = indexrs.schemas.get(&absolute) {
return Some((ResolvedRef::Local(s.as_ref()), absolute));
}
}
// Global
@ -1174,10 +1225,11 @@ impl Validator {
} else {
&absolute
};
if let Ok(registry) = REGISTRY.read() {
if let Some(compiled) = registry.get(resource_base) {
if let Some(s) = compiled.index.get(&absolute) {
return Some((ResolvedRef::External(compiled.clone(), s.clone()), absolute));
if let Some(compiled) = registry.schemas.get(resource_base) {
if let Some(indexrs) = &compiled.obj.compiled_schemas {
if let Some(s) = indexrs.schemas.get(&absolute) {
return Some((ResolvedRef::Global(compiled.as_ref(), s.as_ref()), absolute));
}
}
}
@ -1185,35 +1237,45 @@ impl Validator {
// 3. Fallback: Try as simple string key (Global Registry)
// This supports legacy/JSPG-style IDs that are not valid URIs (e.g. "punc_person")
if let Ok(registry) = REGISTRY.read() {
if let Some(compiled) = registry.get(ref_string) {
if let Some(s) = compiled.index.get(ref_string) {
return Some((
ResolvedRef::External(compiled.clone(), s.clone()),
ref_string.to_string(),
));
}
}
if let Some(compiled) = registry.schemas.get(ref_string) {
eprintln!("DEBUG: Resolved Global Ref (fallback): {}", ref_string);
return Some((
ResolvedRef::Global(compiled.as_ref(), compiled.as_ref()),
ref_string.to_string(),
));
}
eprintln!(
"DEBUG: Failed to resolve ref: '{}' scope: '{}'",
ref_string, scope
);
None
}
pub fn validate(schema_id: &str, instance: &Value) -> crate::drop::Drop {
let compiled_opt = REGISTRY.read().unwrap().get(schema_id);
let registry = REGISTRY.read().unwrap();
Self::validate_with_registry(schema_id, instance, &registry)
}
if let Some(compiled) = compiled_opt {
let root_id = compiled.root.obj.id.clone().unwrap_or_default();
let scope = vec![root_id];
pub fn validate_with_registry(
schema_id: &str,
instance: &Value,
registry: &crate::registry::Registry,
) -> crate::drop::Drop {
if let Some(root) = registry.get(schema_id) {
let root_id = root.obj.id.clone().unwrap_or_default();
let scope = vec![root_id.clone()];
// Initial Context
let ctx = ValidationContext::new(
&compiled,
&compiled.root,
&root,
&root,
instance,
&scope,
HashSet::new(),
false,
false, // reporter = false (Default)
registry, // Use the passed registry
);
match ctx.validate() {