From 29c5160b4952e984b6057a3a33c65048b2c5cae0 Mon Sep 17 00:00:00 2001 From: Alex Groleau Date: Wed, 18 Feb 2026 13:45:40 -0500 Subject: [PATCH] jspg masking system installed --- GEMINI.md | 21 ++ src/drop.rs | 85 +++--- src/lib.rs | 126 +++++--- src/tests.rs | 24 ++ src/util.rs | 109 +++++-- src/validator.rs | 554 +++++++++++++++++++++--------------- tests/fixtures/masking.json | 171 +++++++++++ tests/tests.rs | 24 ++ 8 files changed, 789 insertions(+), 325 deletions(-) create mode 100644 tests/fixtures/masking.json diff --git a/GEMINI.md b/GEMINI.md index c49f80c..4f360a2 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -31,6 +31,17 @@ Loads and compiles the entire schema registry into the session's memory, atomica * Compiles schemas into validators. * **Returns**: `{"response": "success"}` or an error object. +### `mask_json_schema(schema_id text, instance jsonb) -> jsonb` + +Validates a JSON instance and returns a new JSON object with unknown properties removed (pruned) based on the schema. + +* **Inputs**: + * `schema_id`: The `$id` of the schema to mask against. + * `instance`: The JSON data to mask. +* **Returns**: + * On success: A `Drop` containing the **masked data**. + * On failure: A `Drop` containing validation errors. + ### `validate_json_schema(schema_id text, instance jsonb) -> jsonb` Validates a JSON instance against a pre-compiled schema. @@ -83,6 +94,16 @@ JSPG enforces a "Secure by Default" philosophy. All schemas are treated as if `u ### 4. Format Leniency for Empty Strings To simplify frontend form logic, the format validators for `uuid`, `date-time`, and `email` explicitly allow empty strings (`""`). This treats an empty string as "present but unset" rather than "invalid format". +### 5. Masking (Constructive Validation) +JSPG supports a "Constructive Validation" mode via `mask_json_schema`. This is designed for high-performance API responses where the schema dictates the exact shape of the returned data. + +* **Mechanism**: The validator traverses the instance against the schema. +* **Valid Fields**: Kept in the output. +* **Unknown/Extra Fields**: Silently removed (pruned) if `extensible: false` (default). +* **Invalid Fields**: Still trigger standard validation errors. + +This allows the database to return "raw" joined rows (e.g. `SELECT * FROM person JOIN organization ...`) and have JSPG automatically shape the result into the expected API response, removing any internal or unrelated columns not defined in the schema. + ## 🏗️ Architecture The extension is written in Rust using `pgrx` and structures its schema parser to mirror the Punc Generator's design: diff --git a/src/drop.rs b/src/drop.rs index a25a5a5..1b8c7ad 100644 --- a/src/drop.rs +++ b/src/drop.rs @@ -3,59 +3,66 @@ use serde_json::Value; #[derive(Debug, Serialize, Deserialize, Clone)] pub struct Drop { - // We don't need id, frequency, etc. for the validation result specifically, - // as they are added by the SQL wrapper. We just need to conform to the structure. - // The user said "Validator::validate always needs to return this drop type". - // So we should match it as closely as possible. - - #[serde(rename = "type")] - pub type_: String, // "drop" - - #[serde(skip_serializing_if = "Option::is_none")] - pub response: Option, - - #[serde(default)] - pub errors: Vec, + // We don't need id, frequency, etc. for the validation result specifically, + // as they are added by the SQL wrapper. We just need to conform to the structure. + // The user said "Validator::validate always needs to return this drop type". + // So we should match it as closely as possible. + #[serde(rename = "type")] + pub type_: String, // "drop" + + #[serde(skip_serializing_if = "Option::is_none")] + pub response: Option, + + #[serde(default)] + pub errors: Vec, } impl Drop { - pub fn new() -> Self { - Self { - type_: "drop".to_string(), - response: None, - errors: vec![], - } + pub fn new() -> Self { + Self { + type_: "drop".to_string(), + response: None, + errors: vec![], } + } - pub fn success() -> Self { - Self { - type_: "drop".to_string(), - response: Some(serde_json::json!({ "result": "success" })), // Or appropriate success response - errors: vec![], - } + pub fn success() -> Self { + Self { + type_: "drop".to_string(), + response: Some(serde_json::json!({ "result": "success" })), // Or appropriate success response + errors: vec![], } + } - pub fn with_errors(errors: Vec) -> Self { - Self { - type_: "drop".to_string(), - response: None, - errors, - } + pub fn success_with_val(val: Value) -> Self { + Self { + type_: "drop".to_string(), + response: Some(val), + errors: vec![], } + } + + pub fn with_errors(errors: Vec) -> Self { + Self { + type_: "drop".to_string(), + response: None, + errors, + } + } } #[derive(Debug, Serialize, Deserialize, Clone)] pub struct Error { - #[serde(skip_serializing_if = "Option::is_none")] - pub punc: Option, - pub code: String, - pub message: String, - pub details: ErrorDetails, + #[serde(skip_serializing_if = "Option::is_none")] + pub punc: Option, + pub code: String, + pub message: String, + pub details: ErrorDetails, } #[derive(Debug, Serialize, Deserialize, Clone)] pub struct ErrorDetails { - pub path: String, - // Extensions can be added here (package, cause, etc) - // For now, validator only provides path + pub path: String, + // Extensions can be added here (package, cause, etc) + // For now, validator only provides path } diff --git a/src/lib.rs b/src/lib.rs index 7c576d5..43eb40e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -110,6 +110,65 @@ fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB { JsonB(json!({ "response": "success" })) } +#[pg_extern(strict, parallel_safe)] +fn mask_json_schema(schema_id: &str, instance: JsonB) -> JsonB { + // 1. Acquire Snapshot + let validator_arc = { + let lock = GLOBAL_VALIDATOR.read().unwrap(); + lock.clone() + }; + + // 2. Validate (Lock-Free) + if let Some(validator) = validator_arc { + // We need a mutable copy of the value to mask it + let mut mutable_instance = instance.0.clone(); + + match validator.mask(schema_id, &mut mutable_instance) { + Ok(result) => { + // If valid, return the MASKED instance + if result.is_valid() { + let drop = crate::drop::Drop::success_with_val(mutable_instance); + JsonB(serde_json::to_value(drop).unwrap()) + } else { + // If invalid, return errors (Schema Validation Errors) + let errors: Vec = result + .errors + .into_iter() + .map(|e| crate::drop::Error { + punc: None, + code: e.code, + message: e.message, + details: crate::drop::ErrorDetails { path: e.path }, + }) + .collect(); + let drop = crate::drop::Drop::with_errors(errors); + JsonB(serde_json::to_value(drop).unwrap()) + } + } + Err(e) => { + // Schema Not Found or other fatal error + let error = crate::drop::Error { + punc: None, + code: e.code, + message: e.message, + details: crate::drop::ErrorDetails { path: e.path }, + }; + let drop = crate::drop::Drop::with_errors(vec![error]); + JsonB(serde_json::to_value(drop).unwrap()) + } + } + } else { + JsonB(json!({ + "punc": null, + "errors": [{ + "code": "VALIDATOR_NOT_INITIALIZED", + "message": "JSON Schemas have not been cached yet. Run cache_json_schemas()", + "details": { "path": "" } + }] + })) + } +} + #[pg_extern(strict, parallel_safe)] fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { // 1. Acquire Snapshot @@ -120,8 +179,37 @@ fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { // 2. Validate (Lock-Free) if let Some(validator) = validator_arc { - let drop = validator.validate(schema_id, &instance.0); - JsonB(serde_json::to_value(drop).unwrap()) + match validator.validate(schema_id, &instance.0) { + Ok(result) => { + if result.is_valid() { + let drop = crate::drop::Drop::success(); + JsonB(serde_json::to_value(drop).unwrap()) + } else { + let errors: Vec = result + .errors + .into_iter() + .map(|e| crate::drop::Error { + punc: None, + code: e.code, + message: e.message, + details: crate::drop::ErrorDetails { path: e.path }, + }) + .collect(); + let drop = crate::drop::Drop::with_errors(errors); + JsonB(serde_json::to_value(drop).unwrap()) + } + } + Err(e) => { + let error = crate::drop::Error { + punc: None, + code: e.code, + message: e.message, + details: crate::drop::ErrorDetails { path: e.path }, + }; + let drop = crate::drop::Drop::with_errors(vec![error]); + JsonB(serde_json::to_value(drop).unwrap()) + } + } } else { JsonB(json!({ "punc": null, @@ -136,34 +224,11 @@ fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { #[pg_extern(strict, parallel_safe)] fn json_schema_cached(schema_id: &str) -> bool { - // Acquire Snapshot for safe read if let Some(validator) = GLOBAL_VALIDATOR.read().unwrap().as_ref() { - // We can expose a get/contains method on Validator or peek inside - // Since Validator owns Registry, we need a method there or hack it - // Let's assume Validator exposes a minimal check or we just check validity of that schema? - // Actually, registry access is private inside Validator now. - // We should add `has_schema` to Validator. - // For now, let's just cheat: Validate against it, if schema not found error, return false. - // Or better: Add `has_schema` to Validator. - // Let's do that in a follow up if needed, but for now we need a way. - // I'll add `has_schema` to Validator via a quick task or assume it exists? - // No, I just overwrote Validator without it. - // Better Logic: Try to validate "null" against it? - // No, simpler: Update Validator to expose has_schema. - // But I cannot call replace_validator now. - // Wait, I can try to access the public underlying registry if I expose it? - // Validator struct: `pub struct Validator { registry: Registry }`? - // No, keeping it opaque is better. - // Let's execute validate and check if error code is SCHEMA_NOT_FOUND. - let drop = validator.validate(schema_id, &serde_json::Value::Null); // Minimal payload - if !drop.errors.is_empty() { - for e in drop.errors { - if e.code == "SCHEMA_NOT_FOUND" { - return false; - } - } + match validator.validate(schema_id, &serde_json::Value::Null) { + Err(e) if e.code == "SCHEMA_NOT_FOUND" => false, + _ => true, } - true } else { false } @@ -178,13 +243,8 @@ fn clear_json_schemas() -> JsonB { #[pg_extern(strict, parallel_safe)] fn show_json_schemas() -> JsonB { - // Use _validator to suppress warning if let Some(_validator) = GLOBAL_VALIDATOR.read().unwrap().as_ref() { - // Debug dump - // We need Validator to expose len() or debug info? - // Or just return success for now as in original code. JsonB(json!({ "response": "success", "status": "active" })) - // Ideally: validator.registry_len() } else { JsonB(json!({ "response": "success", "status": "empty" })) } diff --git a/src/tests.rs b/src/tests.rs index 3b10636..fab586e 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1097,6 +1097,30 @@ fn test_pattern_1() { crate::util::run_test_file_at_index(&path, 1).unwrap(); } +#[pg_test] +fn test_masking_0() { + let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR")); + crate::util::run_test_file_at_index(&path, 0).unwrap(); +} + +#[pg_test] +fn test_masking_1() { + let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR")); + crate::util::run_test_file_at_index(&path, 1).unwrap(); +} + +#[pg_test] +fn test_masking_2() { + let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR")); + crate::util::run_test_file_at_index(&path, 2).unwrap(); +} + +#[pg_test] +fn test_masking_3() { + let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR")); + crate::util::run_test_file_at_index(&path, 3).unwrap(); +} + #[pg_test] fn test_max_properties_0() { let path = format!("{}/tests/fixtures/maxProperties.json", env!("CARGO_MANIFEST_DIR")); diff --git a/src/util.rs b/src/util.rs index abfba1f..2e50080 100644 --- a/src/util.rs +++ b/src/util.rs @@ -183,15 +183,51 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> { if let Some(sid) = schema_id { let result = validator.validate(&sid, &test.data); - if !result.errors.is_empty() != !test.valid { - failures.push(format!( - "[{}] Test '{}' failed. Expected: {}, Got: {}. Errors: {:?}", - group.description, - test.description, - test.valid, - !result.errors.is_empty(), // "Got Invalid?" - result.errors - )); + let (got_valid, _errors) = match &result { + Ok(res) => (res.is_valid(), &res.errors), + Err(_e) => { + // If we encounter an execution error (e.g. Schema Not Found), + // we treat it as a test failure. + (false, &vec![]) + } + }; + + if let Some(expected) = &test.expected { + // Masking Test + let mut data_for_mask = test.data.clone(); + match validator.mask(&sid, &mut data_for_mask) { + Ok(_) => { + if !equals(&data_for_mask, expected) { + let msg = format!( + "Masking Test '{}' failed.\nExpected: {:?}\nGot: {:?}", + test.description, expected, data_for_mask + ); + eprintln!("{}", msg); + failures.push(msg); + } + } + Err(e) => { + let msg = format!( + "Masking Test '{}' failed with execution error: {:?}", + test.description, e + ); + eprintln!("{}", msg); + failures.push(msg); + } + } + } else { + // Standard Validation Test + if got_valid != test.valid { + let error_msg = match &result { + Ok(res) => format!("{:?}", res.errors), + Err(e) => format!("Execution Error: {:?}", e), + }; + + failures.push(format!( + "[{}] Test '{}' failed. Expected: {}, Got: {}. Errors: {}", + group.description, test.description, test.valid, got_valid, error_msg + )); + } } } else { failures.push(format!( @@ -310,28 +346,47 @@ pub fn run_test_file(path: &str) -> Result<(), String> { // Use explicit schema_id from test, or default to unique_id let schema_id = test.schema_id.as_deref().unwrap_or(&unique_id).to_string(); - let drop = validator.validate(&schema_id, &test.data); + let result = validator.validate(&schema_id, &test.data); if test.valid { - if !drop.errors.is_empty() { - let msg = format!( - "Test failed (expected valid): {}\nSchema: {:?}\nData: {:?}\nErrors: {:?}", - test.description, - group.schema, // We might need to find the actual schema used if schema_id is custom - test.data, - drop.errors - ); - eprintln!("{}", msg); - failures.push(msg); + match result { + Ok(res) => { + if !res.is_valid() { + let msg = format!( + "Test failed (expected valid): {}\nSchema: {:?}\nData: {:?}\nErrors: {:?}", + test.description, + group.schema, // We might need to find the actual schema used if schema_id is custom + test.data, + res.errors + ); + eprintln!("{}", msg); + failures.push(msg); + } + } + Err(e) => { + let msg = format!( + "Test failed (expected valid) but got execution error: {}\nSchema: {:?}\nData: {:?}\nError: {:?}", + test.description, group.schema, test.data, e + ); + eprintln!("{}", msg); + failures.push(msg); + } } } else { - if drop.errors.is_empty() { - let msg = format!( - "Test failed (expected invalid): {}\nSchema: {:?}\nData: {:?}\nErrors: (Empty)", - test.description, group.schema, test.data - ); - println!("{}", msg); - failures.push(msg); + match result { + Ok(res) => { + if res.is_valid() { + let msg = format!( + "Test failed (expected invalid): {}\nSchema: {:?}\nData: {:?}", + test.description, group.schema, test.data + ); + eprintln!("{}", msg); + failures.push(msg); + } + } + Err(_) => { + // Expected invalid, got error (which implies invalid/failure), so this is PASS. + } } } } diff --git a/src/validator.rs b/src/validator.rs index fc8293f..8bf3f2f 100644 --- a/src/validator.rs +++ b/src/validator.rs @@ -6,7 +6,7 @@ use regex::Regex; use serde_json::Value; use std::collections::HashSet; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, serde::Serialize)] pub struct ValidationError { pub code: String, pub message: String, @@ -18,10 +18,12 @@ pub enum ResolvedRef<'a> { Global(&'a Schema, &'a Schema), } -#[derive(Debug, Default, Clone)] +#[derive(Debug, Default, Clone, serde::Serialize)] pub struct ValidationResult { pub errors: Vec, + #[serde(skip)] pub evaluated_keys: HashSet, + #[serde(skip)] pub evaluated_indices: HashSet, } @@ -41,7 +43,110 @@ impl ValidationResult { } } -pub struct ValidationContext<'a> { +use std::ptr::NonNull; + +pub trait ValidationInstance<'a>: Copy + Clone { + fn as_value(&self) -> &'a Value; + fn child_at_key(&self, key: &str) -> Option; + fn child_at_index(&self, idx: usize) -> Option; + fn prune_object(&self, _keys: &HashSet) {} + fn prune_array(&self, _indices: &HashSet) {} +} + +#[derive(Clone, Copy)] +pub struct ReadOnlyInstance<'a>(pub &'a Value); + +impl<'a> ValidationInstance<'a> for ReadOnlyInstance<'a> { + fn as_value(&self) -> &'a Value { + self.0 + } + + fn child_at_key(&self, key: &str) -> Option { + self.0.get(key).map(ReadOnlyInstance) + } + + fn child_at_index(&self, idx: usize) -> Option { + self.0.get(idx).map(ReadOnlyInstance) + } +} + +#[derive(Clone, Copy)] +pub struct MutableInstance { + ptr: NonNull, +} + +impl MutableInstance { + pub fn new(val: &mut Value) -> Self { + Self { + ptr: NonNull::from(val), + } + } +} + +impl<'a> ValidationInstance<'a> for MutableInstance { + fn as_value(&self) -> &'a Value { + unsafe { self.ptr.as_ref() } + } + + fn child_at_key(&self, key: &str) -> Option { + unsafe { + if let Some(obj) = self.ptr.as_ref().as_object() { + // We use as_ref() to check existence (safe read). + if obj.contains_key(key) { + // Now we need mutable pointer to child. + // Since we have *mut parent, and we know key exists... + // casting *mut Value -> &mut Value -> get_mut -> *mut Value + // This is safe because we are single threaded and not holding other refs across this call. + let parent_mut = &mut *self.ptr.as_ptr(); + if let Some(child_val) = parent_mut.get_mut(key) { + return Some(MutableInstance::new(child_val)); + } + } + } + None + } + } + + fn child_at_index(&self, idx: usize) -> Option { + unsafe { + if let Some(arr) = self.ptr.as_ref().as_array() { + if idx < arr.len() { + let parent_mut = &mut *self.ptr.as_ptr(); + if let Some(child_val) = parent_mut.get_mut(idx) { + return Some(MutableInstance::new(child_val)); + } + } + } + None + } + } + + fn prune_object(&self, keys: &HashSet) { + unsafe { + // We must re-acquire mutable reference from pointer + let val_mut = &mut *self.ptr.as_ptr(); + if let Some(obj) = val_mut.as_object_mut() { + obj.retain(|k, _| keys.contains(k)); + } + } + } + + fn prune_array(&self, indices: &HashSet) { + unsafe { + let val_mut = &mut *self.ptr.as_ptr(); + if let Some(arr) = val_mut.as_array_mut() { + let mut i = 0; + arr.retain(|_| { + let keep = indices.contains(&i); + i += 1; + keep + }); + } + } + } +} + +pub struct ValidationContext<'a, I: ValidationInstance<'a>> { // 1. Global (The Library) - now passed as reference pub registry: &'a Registry, pub root: &'a Schema, @@ -50,12 +155,12 @@ pub struct ValidationContext<'a> { pub schema: &'a Schema, // 3. The Data (The Instance) - pub current: &'a Value, + pub instance: I, // 4. State - pub path: &'a str, + pub path: String, pub depth: usize, - pub scope: &'a [String], + pub scope: Vec, // OWNED to avoid lifetime hell // 5. Config pub overrides: HashSet, // Keywords explicitly defined by callers that I should skip (Inherited Mask) @@ -63,13 +168,13 @@ pub struct ValidationContext<'a> { pub reporter: bool, // If true, we only report evaluated keys, don't enforce strictness } -impl<'a> ValidationContext<'a> { +impl<'a, I: ValidationInstance<'a>> ValidationContext<'a, I> { pub fn new( registry: &'a Registry, root: &'a Schema, schema: &'a Schema, - current: &'a Value, - scope: &'a [String], + instance: I, + scope: Vec, overrides: HashSet, extensible: bool, reporter: bool, @@ -79,8 +184,8 @@ impl<'a> ValidationContext<'a> { registry, root, schema, - current, - path: "", + instance, + path: String::new(), depth: 0, scope, overrides, @@ -92,9 +197,9 @@ impl<'a> ValidationContext<'a> { pub fn derive( &self, schema: &'a Schema, - current: &'a Value, - path: &'a str, - scope: &'a [String], + instance: I, // We take I directly (it is Copy) + path: &str, + scope: Vec, overrides: HashSet, extensible: bool, reporter: bool, @@ -105,8 +210,8 @@ impl<'a> ValidationContext<'a> { registry: self.registry, root: self.root, schema, - current, - path, + instance, + path: path.to_string(), depth: self.depth + 1, scope, overrides, @@ -119,9 +224,9 @@ impl<'a> ValidationContext<'a> { pub fn derive_for_schema(&self, schema: &'a Schema, reporter: bool) -> Self { self.derive( schema, - self.current, - self.path, - self.scope, + self.instance, // Copy + &self.path, + self.scope.clone(), HashSet::new(), // Reset overrides for composition/branching (Strict Intersection) self.extensible, // Inherited extensibility doesn't change for same-level schema switch reporter, @@ -132,8 +237,7 @@ impl<'a> ValidationContext<'a> { pub fn validate(&self) -> Result { // Check if we need to update scope due to ID - let mut effective_scope = self.scope; - let mut new_scope_buf: Vec; + let mut effective_scope = self.scope.clone(); if let Some(id) = &self.schema.obj.id { let current_base = self.scope.last().map(|s| s.as_str()).unwrap_or(""); @@ -146,18 +250,15 @@ impl<'a> ValidationContext<'a> { } } - new_scope_buf = self.scope.to_vec(); - new_scope_buf.push(new_base); - effective_scope = &new_scope_buf; - } + effective_scope.push(new_base); - if effective_scope.len() != self.scope.len() { + // If scope changed, we create a shadow context to use the new scope let shadow = ValidationContext { registry: self.registry, root: self.root, schema: self.schema, - current: self.current, - path: self.path, + instance: self.instance, + path: self.path.clone(), depth: self.depth, scope: effective_scope, overrides: self.overrides.clone(), @@ -167,6 +268,7 @@ impl<'a> ValidationContext<'a> { return shadow.validate_scoped(); } + // If no ID change, proceed self.validate_scoped() } @@ -193,15 +295,7 @@ impl<'a> ValidationContext<'a> { // --- Helpers Groups --- if let Some(ref_res) = self.validate_refs()? { - // eprintln!( - // "DEBUG: validate_refs returned {} errors", - // ref_res.errors.len() - // ); result.merge(ref_res); - // eprintln!( - // "DEBUG: result has {} errors after refs merge", - // result.errors.len() - // ); } // 2. Core @@ -230,9 +324,10 @@ impl<'a> ValidationContext<'a> { // If extensible, mark all as evaluated so strictness checks pass and parents don't complain if self.extensible { - if let Some(obj) = self.current.as_object() { + let current = self.instance.as_value(); + if let Some(obj) = current.as_object() { result.evaluated_keys.extend(obj.keys().cloned()); - } else if let Some(arr) = self.current.as_array() { + } else if let Some(arr) = current.as_array() { result.evaluated_indices.extend(0..arr.len()); } } @@ -250,8 +345,10 @@ impl<'a> ValidationContext<'a> { let mut handled = false; // Scope is already effective due to validate() wrapper! - let effective_scope = self.scope; + // self.scope is Vec + let effective_scope = &self.scope; let current_base_resolved = effective_scope.last().map(|s| s.as_str()).unwrap_or(""); + // Removed unused current binding // $ref if let Some(ref ref_string) = self.schema.ref_string { @@ -266,9 +363,9 @@ impl<'a> ValidationContext<'a> { let derived = self.derive( self.root, - self.current, - self.path, - effective_scope, + self.instance, // Copy + &self.path, + effective_scope.clone(), new_overrides, self.extensible, self.reporter, // Inherit so efficient composition (allOf) works, but property refs stay strict @@ -290,17 +387,16 @@ impl<'a> ValidationContext<'a> { &matched_key }; - let mut new_scope_buffer: Vec; let scope_to_pass = if target_schema.obj.id.is_none() { if !resource_base.is_empty() && resource_base != current_base_resolved { - new_scope_buffer = effective_scope.to_vec(); - new_scope_buffer.push(resource_base.to_string()); - &new_scope_buffer + let mut new_scope = effective_scope.clone(); + new_scope.push(resource_base.to_string()); + new_scope } else { - effective_scope + effective_scope.clone() } } else { - effective_scope + effective_scope.clone() }; // Calculate new overrides (Masking) @@ -313,7 +409,7 @@ impl<'a> ValidationContext<'a> { self.registry, target_root, target_schema, - self.current, + self.instance, // Copy scope_to_pass, new_overrides, false, // Reset extensibility for $ref (Default Strict) @@ -321,7 +417,7 @@ impl<'a> ValidationContext<'a> { ); // Manually set path/depth to continue trace let mut manual_ctx = target_ctx; - manual_ctx.path = self.path; + manual_ctx.path = self.path.clone(); manual_ctx.depth = self.depth + 1; let target_res = manual_ctx.validate()?; @@ -432,18 +528,17 @@ impl<'a> ValidationContext<'a> { &matched_key }; - let mut new_scope_buffer: Vec; let scope_to_pass = if let Some(ref tid) = target_schema.obj.id { - new_scope_buffer = effective_scope.to_vec(); - new_scope_buffer.push(tid.clone()); - &new_scope_buffer + let mut new_scope = effective_scope.clone(); + new_scope.push(tid.clone()); + new_scope } else { if !resource_base.is_empty() && resource_base != current_base_resolved { - new_scope_buffer = effective_scope.to_vec(); - new_scope_buffer.push(resource_base.to_string()); - &new_scope_buffer + let mut new_scope = effective_scope.clone(); + new_scope.push(resource_base.to_string()); + new_scope } else { - effective_scope + effective_scope.clone() } }; @@ -457,14 +552,14 @@ impl<'a> ValidationContext<'a> { self.registry, target_root, target_schema, - self.current, + self.instance, // Copy scope_to_pass, new_overrides, false, self.reporter, // Propagate reporter ); let mut manual_ctx = target_ctx; - manual_ctx.path = self.path; + manual_ctx.path = self.path.clone(); manual_ctx.depth = self.depth + 1; // manual_ctx.reporter = true; @@ -482,11 +577,12 @@ impl<'a> ValidationContext<'a> { } fn validate_core(&self, result: &mut ValidationResult) { + let current = self.instance.as_value(); // Type if let Some(ref type_) = self.schema.type_ { match type_ { crate::schema::SchemaTypeOrArray::Single(t) => { - if !Validator::check_type(t, self.current) { + if !Validator::check_type(t, current) { result.errors.push(ValidationError { code: "INVALID_TYPE".to_string(), message: format!("Expected type '{}'", t), @@ -497,7 +593,7 @@ impl<'a> ValidationContext<'a> { crate::schema::SchemaTypeOrArray::Multiple(types) => { let mut valid = false; for t in types { - if Validator::check_type(t, self.current) { + if Validator::check_type(t, current) { valid = true; break; } @@ -515,16 +611,16 @@ impl<'a> ValidationContext<'a> { // Const if let Some(ref const_val) = self.schema.const_ { - if !crate::util::equals(self.current, const_val) { + if !crate::util::equals(current, const_val) { result.errors.push(ValidationError { code: "CONST_VIOLATED".to_string(), message: "Value does not match const".to_string(), path: self.path.to_string(), }); } else { - if let Some(obj) = self.current.as_object() { + if let Some(obj) = current.as_object() { result.evaluated_keys.extend(obj.keys().cloned()); - } else if let Some(arr) = self.current.as_array() { + } else if let Some(arr) = current.as_array() { result.evaluated_indices.extend(0..arr.len()); } } @@ -534,7 +630,7 @@ impl<'a> ValidationContext<'a> { if let Some(ref enum_vals) = self.schema.enum_ { let mut found = false; for val in enum_vals { - if crate::util::equals(self.current, val) { + if crate::util::equals(current, val) { found = true; break; } @@ -546,9 +642,9 @@ impl<'a> ValidationContext<'a> { path: self.path.to_string(), }); } else { - if let Some(obj) = self.current.as_object() { + if let Some(obj) = current.as_object() { result.evaluated_keys.extend(obj.keys().cloned()); - } else if let Some(arr) = self.current.as_array() { + } else if let Some(arr) = current.as_array() { result.evaluated_indices.extend(0..arr.len()); } } @@ -556,7 +652,8 @@ impl<'a> ValidationContext<'a> { } fn validate_numeric(&self, result: &mut ValidationResult) { - if let Some(num) = self.current.as_f64() { + let current = self.instance.as_value(); + if let Some(num) = current.as_f64() { if let Some(min) = self.schema.minimum { if num < min { result.errors.push(ValidationError { @@ -607,7 +704,8 @@ impl<'a> ValidationContext<'a> { } fn validate_string(&self, result: &mut ValidationResult) { - if let Some(s) = self.current.as_str() { + let current = self.instance.as_value(); + if let Some(s) = current.as_str() { if let Some(min) = self.schema.min_length { if (s.chars().count() as f64) < min { result.errors.push(ValidationError { @@ -649,16 +747,17 @@ impl<'a> ValidationContext<'a> { } fn validate_format(&self, result: &mut ValidationResult) { + let current = self.instance.as_value(); if let Some(ref compiled_fmt) = self.schema.compiled_format { match compiled_fmt { crate::compiler::CompiledFormat::Func(f) => { - let should = if let Some(s) = self.current.as_str() { + let should = if let Some(s) = current.as_str() { !s.is_empty() } else { true }; if should { - if let Err(e) = f(self.current) { + if let Err(e) = f(current) { result.errors.push(ValidationError { code: "FORMAT_MISMATCH".to_string(), message: format!("Format error: {}", e), @@ -668,7 +767,7 @@ impl<'a> ValidationContext<'a> { } } crate::compiler::CompiledFormat::Regex(re) => { - if let Some(s) = self.current.as_str() { + if let Some(s) = current.as_str() { if !re.is_match(s) { result.errors.push(ValidationError { code: "FORMAT_MISMATCH".to_string(), @@ -683,7 +782,8 @@ impl<'a> ValidationContext<'a> { } fn validate_object(&self, result: &mut ValidationResult) -> Result<(), ValidationError> { - if let Some(obj) = self.current.as_object() { + let current = self.instance.as_value(); + if let Some(obj) = current.as_object() { // 1. Min Properties if let Some(min) = self.schema.min_properties { if (obj.len() as f64) < min { @@ -739,9 +839,9 @@ impl<'a> ValidationContext<'a> { // Reporter = true (merges results). let derived = self.derive( sub_schema, - self.current, - self.path, - self.scope, + self.instance, // Copy + &self.path, + self.scope.clone(), HashSet::new(), self.extensible, false, @@ -759,7 +859,7 @@ impl<'a> ValidationContext<'a> { continue; } - if let Some(val) = obj.get(key) { + if let Some(child_instance) = self.instance.child_at_key(key) { let new_path = format!("{}/{}", self.path, key); let is_ref = sub_schema.ref_string.is_some() || sub_schema.obj.dynamic_ref.is_some(); @@ -767,9 +867,9 @@ impl<'a> ValidationContext<'a> { let derived = self.derive( sub_schema, - val, + child_instance, &new_path, - self.scope, + self.scope.clone(), HashSet::new(), next_extensible, false, @@ -783,31 +883,28 @@ impl<'a> ValidationContext<'a> { if let Some(ref compiled_pp) = self.schema.compiled_pattern_properties { for (compiled_re, sub_schema) in compiled_pp { - for (key, val) in obj { + for (key, _) in obj { if compiled_re.0.is_match(key) { - // Note: Pattern properties are not shadowed by property names in standard override logic typically, - // but if we supported masking pattern props, we'd check here. For now, assuming standard behavior + no masking. + // Note: Pattern properties need to derive child instance dynamically for each matching key + if let Some(child_instance) = self.instance.child_at_key(key) { + let new_path = format!("{}/{}", self.path, key); + let is_ref = + sub_schema.ref_string.is_some() || sub_schema.obj.dynamic_ref.is_some(); + let next_extensible = if is_ref { false } else { self.extensible }; - let new_path = format!("{}/{}", self.path, key); - let is_ref = sub_schema.ref_string.is_some() || sub_schema.obj.dynamic_ref.is_some(); - let next_extensible = if is_ref { false } else { self.extensible }; - - let derived = self.derive( - sub_schema, - val, - &new_path, - self.scope, - HashSet::new(), - next_extensible, - false, - ); - let item_res = derived.validate()?; - // eprintln!( - // "PPROP VALIDATE: path={} key={} keys={:?}", - // self.path, key, item_res.evaluated_keys - // ); - result.merge(item_res); - result.evaluated_keys.insert(key.clone()); + let derived = self.derive( + sub_schema, + child_instance, + &new_path, + self.scope.clone(), + HashSet::new(), + next_extensible, + false, + ); + let item_res = derived.validate()?; + result.merge(item_res); + result.evaluated_keys.insert(key.clone()); + } } } } @@ -816,39 +913,44 @@ impl<'a> ValidationContext<'a> { // 7. Property Names if let Some(ref property_names) = self.schema.property_names { for key in obj.keys() { - let new_path = format!("{}/propertyNames/{}", self.path, key); + let _new_path = format!("{}/propertyNames/{}", self.path, key); let val_str = Value::String(key.clone()); - // Validating the KEY as a STRING instance. - // New scope. - let derived = self.derive( + let ctx = ValidationContext::new( + self.registry, + self.root, property_names, - &val_str, - &new_path, - self.scope, + crate::validator::ReadOnlyInstance(&val_str), + self.scope.clone(), HashSet::new(), self.extensible, - false, + self.reporter, ); - result.merge(derived.validate()?); + + result.merge(ctx.validate()?); } } + // 7. Additional Properties (Strictness / Extensibility) + // Done via check_strictness at end OR explicit schema keyword. + } + + // --- Pruning (Masking) --- + if !self.extensible { + self.instance.prune_object(&result.evaluated_keys); } - // 8. Strictness Check (Unevaluated Properties) - MOVED TO validate_scoped END - // Lines 843-856 removed to correct evaluation order. - // if !self.extensible && !self.reporter { ... } Ok(()) } fn validate_array(&self, result: &mut ValidationResult) -> Result<(), ValidationError> { - if let Some(arr) = self.current.as_array() { + let current = self.instance.as_value(); + if let Some(arr) = current.as_array() { // 1. Min/Max Items if let Some(min) = self.schema.min_items { if (arr.len() as f64) < min { result.errors.push(ValidationError { code: "MIN_ITEMS".to_string(), - message: format!("Length < min {}", min), + message: "Too few items".to_string(), path: self.path.to_string(), }); } @@ -857,26 +959,22 @@ impl<'a> ValidationContext<'a> { if (arr.len() as f64) > max { result.errors.push(ValidationError { code: "MAX_ITEMS".to_string(), - message: format!("Length > max {}", max), + message: "Too many items".to_string(), path: self.path.to_string(), }); } } + // 2. Unique Items if self.schema.unique_items.unwrap_or(false) { - let mut seen: Vec<&Value> = Vec::with_capacity(arr.len()); - for (i, item) in arr.iter().enumerate() { - for seen_item in &seen { - if crate::util::equals(item, *seen_item) { - result.errors.push(ValidationError { - code: "UNIQUE_ITEMS".to_string(), - message: format!("Duplicate item at index {}", i), - path: format!("{}/{}", self.path, i), - }); - break; - } - } - if !result.errors.is_empty() { + let mut seen: Vec<&Value> = Vec::new(); + for item in arr { + if seen.contains(&item) { + result.errors.push(ValidationError { + code: "UNIQUE_ITEMS_VIOLATED".to_string(), + message: "Array has duplicate items".to_string(), + path: self.path.to_string(), + }); break; } seen.push(item); @@ -886,21 +984,25 @@ impl<'a> ValidationContext<'a> { // 3. Contains if let Some(ref contains_schema) = self.schema.contains { let mut _match_count = 0; - for (i, param) in arr.iter().enumerate() { - let derived = self.derive( - contains_schema, - param, - self.path, - self.scope, - HashSet::new(), - self.extensible, - false, - ); + // self.instance.as_value() is &Value + // We iterate indices + for i in 0..arr.len() { + if let Some(child_instance) = self.instance.child_at_index(i) { + let derived = self.derive( + contains_schema, + child_instance, + &self.path, + self.scope.clone(), + HashSet::new(), + self.extensible, + false, + ); - let check = derived.validate()?; - if check.is_valid() { - _match_count += 1; - result.evaluated_indices.insert(i); + let check = derived.validate()?; + if check.is_valid() { + _match_count += 1; + result.evaluated_indices.insert(i); + } } } @@ -933,20 +1035,21 @@ impl<'a> ValidationContext<'a> { for (i, sub_schema) in prefix.iter().enumerate() { if i < len { let path = format!("{}/{}", self.path, i); - - let derived = self.derive( - sub_schema, - &arr[i], - &path, - self.scope, - HashSet::new(), - self.extensible, - false, - ); - let item_res = derived.validate()?; - result.merge(item_res); - result.evaluated_indices.insert(i); - validation_index += 1; + if let Some(child_instance) = self.instance.child_at_index(i) { + let derived = self.derive( + sub_schema, + child_instance, + &path, + self.scope.clone(), + HashSet::new(), + self.extensible, + false, + ); + let item_res = derived.validate()?; + result.merge(item_res); + result.evaluated_indices.insert(i); + validation_index += 1; + } } } } @@ -954,22 +1057,28 @@ impl<'a> ValidationContext<'a> { if let Some(ref items_schema) = self.schema.items { for i in validation_index..len { let path = format!("{}/{}", self.path, i); - - let derived = self.derive( - items_schema, - &arr[i], - &path, - self.scope, - HashSet::new(), - self.extensible, - false, - ); - let item_res = derived.validate()?; - result.merge(item_res); - result.evaluated_indices.insert(i); + if let Some(child_instance) = self.instance.child_at_index(i) { + let derived = self.derive( + items_schema, + child_instance, + &path, + self.scope.clone(), + HashSet::new(), + self.extensible, + false, + ); + let item_res = derived.validate()?; + result.merge(item_res); + result.evaluated_indices.insert(i); + } } } } + // --- Pruning (Masking) --- + if !self.extensible { + self.instance.prune_array(&result.evaluated_indices); + } + Ok(()) } @@ -1089,7 +1198,7 @@ impl<'a> ValidationContext<'a> { } // 1. Unevaluated Properties - if let Some(obj) = self.current.as_object() { + if let Some(obj) = self.instance.as_value().as_object() { for key in obj.keys() { if !result.evaluated_keys.contains(key) && !self.overrides.contains(key) { result.errors.push(ValidationError { @@ -1102,7 +1211,7 @@ impl<'a> ValidationContext<'a> { } // 2. Unevaluated Items - if let Some(arr) = self.current.as_array() { + if let Some(arr) = self.instance.as_value().as_array() { for i in 0..arr.len() { if !result.evaluated_indices.contains(&i) { result.errors.push(ValidationError { @@ -1267,65 +1376,58 @@ impl Validator { None } - pub fn validate(&self, schema_id: &str, instance: &Value) -> crate::drop::Drop { - let registry = &self.registry; - // Registry is owned, so we can access it directly. No mutex needed. - // However, Validator owns it, so we need &self to access. - - if let Some(root) = registry.get(schema_id) { - let root_id = root.obj.id.clone().unwrap_or_default(); - let scope = vec![root_id.clone()]; - - // Initial Context + pub fn validate( + &self, + schema_id: &str, + instance: &Value, + ) -> Result { + if let Some(schema) = self.registry.schemas.get(schema_id) { let ctx = ValidationContext::new( - registry, - &root, - &root, - instance, - &scope, + &self.registry, + schema, + schema, + ReadOnlyInstance(instance), + vec![], HashSet::new(), + false, // Default strictness (overridden by schema.extensible if present) + false, + ); + ctx.validate() + } else { + Err(ValidationError { + code: "SCHEMA_NOT_FOUND".to_string(), + message: format!("Schema {} not found", schema_id), + path: "".to_string(), + }) + } + } + + pub fn mask( + &self, + schema_id: &str, + instance: &mut Value, + ) -> Result { + if let Some(schema) = self.registry.schemas.get(schema_id) { + let ctx = ValidationContext::new( + &self.registry, + schema, + schema, + MutableInstance::new(instance), + vec![], + HashSet::new(), + false, // Default strictness false, - false, // reporter = false (Default) ); - match ctx.validate() { - Ok(result) => { - if result.is_valid() { - crate::drop::Drop::success() - } else { - let errors = result - .errors - .into_iter() - .map(|e| crate::drop::Error { - punc: None, - code: e.code, - message: e.message, - details: crate::drop::ErrorDetails { path: e.path }, - }) - .collect(); - crate::drop::Drop::with_errors(errors) - } - } - Err(e) => { - let error = crate::drop::Error { - punc: None, - code: e.code, - message: e.message, - details: crate::drop::ErrorDetails { path: e.path }, - }; - crate::drop::Drop::with_errors(vec![error]) - } - } + let res = ctx.validate()?; + + Ok(res) } else { - let error = crate::drop::Error { - punc: None, + Err(ValidationError { code: "SCHEMA_NOT_FOUND".to_string(), - message: format!("Schema '{}' not found", schema_id), - details: crate::drop::ErrorDetails { - path: "".to_string(), - }, - }; - crate::drop::Drop::with_errors(vec![error]) + message: format!("Schema {} not found", schema_id), + path: "".to_string(), + }) } } } diff --git a/tests/fixtures/masking.json b/tests/fixtures/masking.json new file mode 100644 index 0000000..bf28817 --- /dev/null +++ b/tests/fixtures/masking.json @@ -0,0 +1,171 @@ +[ + { + "description": "Masking Properties", + "schema": { + "$id": "mask_properties", + "type": "object", + "properties": { + "foo": { + "type": "string" + }, + "bar": { + "type": "integer" + } + }, + "required": [ + "foo" + ], + "extensible": false + }, + "tests": [ + { + "description": "Keep valid properties", + "data": { + "foo": "a", + "bar": 1 + }, + "valid": true, + "expected": { + "foo": "a", + "bar": 1 + } + }, + { + "description": "Remove unknown properties", + "data": { + "foo": "a", + "baz": true + }, + "valid": true, + "expected": { + "foo": "a" + } + }, + { + "description": "Keep valid properties with unknown", + "data": { + "foo": "a", + "bar": 1, + "baz": true + }, + "valid": true, + "expected": { + "foo": "a", + "bar": 1 + } + } + ] + }, + { + "description": "Masking Nested Objects", + "schema": { + "$id": "mask_nested", + "type": "object", + "properties": { + "meta": { + "type": "object", + "properties": { + "id": { + "type": "integer" + } + }, + "extensible": false + } + }, + "extensible": false + }, + "tests": [ + { + "description": "Mask nested object", + "data": { + "meta": { + "id": 1, + "extra": "x" + }, + "top_extra": "y" + }, + "valid": true, + "expected": { + "meta": { + "id": 1 + } + } + } + ] + }, + { + "description": "Masking Arrays", + "schema": { + "$id": "mask_arrays", + "type": "object", + "properties": { + "tags": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "extensible": false + }, + "tests": [ + { + "description": "Arrays are kept (items are valid)", + "data": { + "tags": [ + "a", + "b" + ] + }, + "valid": true, + "expected": { + "tags": [ + "a", + "b" + ] + } + } + ] + }, + { + "description": "Masking Tuple Arrays (prefixItems)", + "schema": { + "$id": "mask_tuple", + "type": "object", + "properties": { + "coord": { + "type": "array", + "prefixItems": [ + { + "type": "number" + }, + { + "type": "number" + } + ] + } + }, + "extensible": false + }, + "tests": [ + { + "description": "Extra tuple items removed", + "data": { + "coord": [ + 1, + 2, + 3, + "extra" + ] + }, + "valid": true, + "expected": { + "coord": [ + 1, + 2 + ] + } + } + ] + } +] \ No newline at end of file diff --git a/tests/tests.rs b/tests/tests.rs index 867155e..67c70a2 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1098,6 +1098,30 @@ fn test_pattern_1() { util::run_test_file_at_index(&path, 1).unwrap(); } +#[test] +fn test_masking_0() { + let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR")); + util::run_test_file_at_index(&path, 0).unwrap(); +} + +#[test] +fn test_masking_1() { + let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR")); + util::run_test_file_at_index(&path, 1).unwrap(); +} + +#[test] +fn test_masking_2() { + let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR")); + util::run_test_file_at_index(&path, 2).unwrap(); +} + +#[test] +fn test_masking_3() { + let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR")); + util::run_test_file_at_index(&path, 3).unwrap(); +} + #[test] fn test_max_properties_0() { let path = format!("{}/tests/fixtures/maxProperties.json", env!("CARGO_MANIFEST_DIR"));