jspg masking system installed

This commit is contained in:
2026-02-18 13:45:40 -05:00
parent 944675d669
commit 29c5160b49
8 changed files with 789 additions and 325 deletions

View File

@ -31,6 +31,17 @@ Loads and compiles the entire schema registry into the session's memory, atomica
* Compiles schemas into validators.
* **Returns**: `{"response": "success"}` or an error object.
### `mask_json_schema(schema_id text, instance jsonb) -> jsonb`
Validates a JSON instance and returns a new JSON object with unknown properties removed (pruned) based on the schema.
* **Inputs**:
* `schema_id`: The `$id` of the schema to mask against.
* `instance`: The JSON data to mask.
* **Returns**:
* On success: A `Drop` containing the **masked data**.
* On failure: A `Drop` containing validation errors.
### `validate_json_schema(schema_id text, instance jsonb) -> jsonb`
Validates a JSON instance against a pre-compiled schema.
@ -83,6 +94,16 @@ JSPG enforces a "Secure by Default" philosophy. All schemas are treated as if `u
### 4. Format Leniency for Empty Strings
To simplify frontend form logic, the format validators for `uuid`, `date-time`, and `email` explicitly allow empty strings (`""`). This treats an empty string as "present but unset" rather than "invalid format".
### 5. Masking (Constructive Validation)
JSPG supports a "Constructive Validation" mode via `mask_json_schema`. This is designed for high-performance API responses where the schema dictates the exact shape of the returned data.
* **Mechanism**: The validator traverses the instance against the schema.
* **Valid Fields**: Kept in the output.
* **Unknown/Extra Fields**: Silently removed (pruned) if `extensible: false` (default).
* **Invalid Fields**: Still trigger standard validation errors.
This allows the database to return "raw" joined rows (e.g. `SELECT * FROM person JOIN organization ...`) and have JSPG automatically shape the result into the expected API response, removing any internal or unrelated columns not defined in the schema.
## 🏗️ Architecture
The extension is written in Rust using `pgrx` and structures its schema parser to mirror the Punc Generator's design:

View File

@ -3,59 +3,66 @@ use serde_json::Value;
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Drop {
// We don't need id, frequency, etc. for the validation result specifically,
// as they are added by the SQL wrapper. We just need to conform to the structure.
// The user said "Validator::validate always needs to return this drop type".
// So we should match it as closely as possible.
// We don't need id, frequency, etc. for the validation result specifically,
// as they are added by the SQL wrapper. We just need to conform to the structure.
// The user said "Validator::validate always needs to return this drop type".
// So we should match it as closely as possible.
#[serde(rename = "type")]
pub type_: String, // "drop"
#[serde(rename = "type")]
pub type_: String, // "drop"
#[serde(skip_serializing_if = "Option::is_none")]
pub response: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub response: Option<Value>,
#[serde(default)]
pub errors: Vec<Error>,
#[serde(default)]
pub errors: Vec<Error>,
}
impl Drop {
pub fn new() -> Self {
Self {
type_: "drop".to_string(),
response: None,
errors: vec![],
}
pub fn new() -> Self {
Self {
type_: "drop".to_string(),
response: None,
errors: vec![],
}
}
pub fn success() -> Self {
Self {
type_: "drop".to_string(),
response: Some(serde_json::json!({ "result": "success" })), // Or appropriate success response
errors: vec![],
}
pub fn success() -> Self {
Self {
type_: "drop".to_string(),
response: Some(serde_json::json!({ "result": "success" })), // Or appropriate success response
errors: vec![],
}
}
pub fn with_errors(errors: Vec<Error>) -> Self {
Self {
type_: "drop".to_string(),
response: None,
errors,
}
pub fn success_with_val(val: Value) -> Self {
Self {
type_: "drop".to_string(),
response: Some(val),
errors: vec![],
}
}
pub fn with_errors(errors: Vec<Error>) -> Self {
Self {
type_: "drop".to_string(),
response: None,
errors,
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Error {
#[serde(skip_serializing_if = "Option::is_none")]
pub punc: Option<String>,
pub code: String,
pub message: String,
pub details: ErrorDetails,
#[serde(skip_serializing_if = "Option::is_none")]
pub punc: Option<String>,
pub code: String,
pub message: String,
pub details: ErrorDetails,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ErrorDetails {
pub path: String,
// Extensions can be added here (package, cause, etc)
// For now, validator only provides path
pub path: String,
// Extensions can be added here (package, cause, etc)
// For now, validator only provides path
}

View File

@ -110,6 +110,65 @@ fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB {
JsonB(json!({ "response": "success" }))
}
#[pg_extern(strict, parallel_safe)]
fn mask_json_schema(schema_id: &str, instance: JsonB) -> JsonB {
// 1. Acquire Snapshot
let validator_arc = {
let lock = GLOBAL_VALIDATOR.read().unwrap();
lock.clone()
};
// 2. Validate (Lock-Free)
if let Some(validator) = validator_arc {
// We need a mutable copy of the value to mask it
let mut mutable_instance = instance.0.clone();
match validator.mask(schema_id, &mut mutable_instance) {
Ok(result) => {
// If valid, return the MASKED instance
if result.is_valid() {
let drop = crate::drop::Drop::success_with_val(mutable_instance);
JsonB(serde_json::to_value(drop).unwrap())
} else {
// If invalid, return errors (Schema Validation Errors)
let errors: Vec<crate::drop::Error> = result
.errors
.into_iter()
.map(|e| crate::drop::Error {
punc: None,
code: e.code,
message: e.message,
details: crate::drop::ErrorDetails { path: e.path },
})
.collect();
let drop = crate::drop::Drop::with_errors(errors);
JsonB(serde_json::to_value(drop).unwrap())
}
}
Err(e) => {
// Schema Not Found or other fatal error
let error = crate::drop::Error {
punc: None,
code: e.code,
message: e.message,
details: crate::drop::ErrorDetails { path: e.path },
};
let drop = crate::drop::Drop::with_errors(vec![error]);
JsonB(serde_json::to_value(drop).unwrap())
}
}
} else {
JsonB(json!({
"punc": null,
"errors": [{
"code": "VALIDATOR_NOT_INITIALIZED",
"message": "JSON Schemas have not been cached yet. Run cache_json_schemas()",
"details": { "path": "" }
}]
}))
}
}
#[pg_extern(strict, parallel_safe)]
fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB {
// 1. Acquire Snapshot
@ -120,8 +179,37 @@ fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB {
// 2. Validate (Lock-Free)
if let Some(validator) = validator_arc {
let drop = validator.validate(schema_id, &instance.0);
JsonB(serde_json::to_value(drop).unwrap())
match validator.validate(schema_id, &instance.0) {
Ok(result) => {
if result.is_valid() {
let drop = crate::drop::Drop::success();
JsonB(serde_json::to_value(drop).unwrap())
} else {
let errors: Vec<crate::drop::Error> = result
.errors
.into_iter()
.map(|e| crate::drop::Error {
punc: None,
code: e.code,
message: e.message,
details: crate::drop::ErrorDetails { path: e.path },
})
.collect();
let drop = crate::drop::Drop::with_errors(errors);
JsonB(serde_json::to_value(drop).unwrap())
}
}
Err(e) => {
let error = crate::drop::Error {
punc: None,
code: e.code,
message: e.message,
details: crate::drop::ErrorDetails { path: e.path },
};
let drop = crate::drop::Drop::with_errors(vec![error]);
JsonB(serde_json::to_value(drop).unwrap())
}
}
} else {
JsonB(json!({
"punc": null,
@ -136,34 +224,11 @@ fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB {
#[pg_extern(strict, parallel_safe)]
fn json_schema_cached(schema_id: &str) -> bool {
// Acquire Snapshot for safe read
if let Some(validator) = GLOBAL_VALIDATOR.read().unwrap().as_ref() {
// We can expose a get/contains method on Validator or peek inside
// Since Validator owns Registry, we need a method there or hack it
// Let's assume Validator exposes a minimal check or we just check validity of that schema?
// Actually, registry access is private inside Validator now.
// We should add `has_schema` to Validator.
// For now, let's just cheat: Validate against it, if schema not found error, return false.
// Or better: Add `has_schema` to Validator.
// Let's do that in a follow up if needed, but for now we need a way.
// I'll add `has_schema` to Validator via a quick task or assume it exists?
// No, I just overwrote Validator without it.
// Better Logic: Try to validate "null" against it?
// No, simpler: Update Validator to expose has_schema.
// But I cannot call replace_validator now.
// Wait, I can try to access the public underlying registry if I expose it?
// Validator struct: `pub struct Validator { registry: Registry }`?
// No, keeping it opaque is better.
// Let's execute validate and check if error code is SCHEMA_NOT_FOUND.
let drop = validator.validate(schema_id, &serde_json::Value::Null); // Minimal payload
if !drop.errors.is_empty() {
for e in drop.errors {
if e.code == "SCHEMA_NOT_FOUND" {
return false;
}
}
match validator.validate(schema_id, &serde_json::Value::Null) {
Err(e) if e.code == "SCHEMA_NOT_FOUND" => false,
_ => true,
}
true
} else {
false
}
@ -178,13 +243,8 @@ fn clear_json_schemas() -> JsonB {
#[pg_extern(strict, parallel_safe)]
fn show_json_schemas() -> JsonB {
// Use _validator to suppress warning
if let Some(_validator) = GLOBAL_VALIDATOR.read().unwrap().as_ref() {
// Debug dump
// We need Validator to expose len() or debug info?
// Or just return success for now as in original code.
JsonB(json!({ "response": "success", "status": "active" }))
// Ideally: validator.registry_len()
} else {
JsonB(json!({ "response": "success", "status": "empty" }))
}

View File

@ -1097,6 +1097,30 @@ fn test_pattern_1() {
crate::util::run_test_file_at_index(&path, 1).unwrap();
}
#[pg_test]
fn test_masking_0() {
let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR"));
crate::util::run_test_file_at_index(&path, 0).unwrap();
}
#[pg_test]
fn test_masking_1() {
let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR"));
crate::util::run_test_file_at_index(&path, 1).unwrap();
}
#[pg_test]
fn test_masking_2() {
let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR"));
crate::util::run_test_file_at_index(&path, 2).unwrap();
}
#[pg_test]
fn test_masking_3() {
let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR"));
crate::util::run_test_file_at_index(&path, 3).unwrap();
}
#[pg_test]
fn test_max_properties_0() {
let path = format!("{}/tests/fixtures/maxProperties.json", env!("CARGO_MANIFEST_DIR"));

View File

@ -183,15 +183,51 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
if let Some(sid) = schema_id {
let result = validator.validate(&sid, &test.data);
if !result.errors.is_empty() != !test.valid {
failures.push(format!(
"[{}] Test '{}' failed. Expected: {}, Got: {}. Errors: {:?}",
group.description,
test.description,
test.valid,
!result.errors.is_empty(), // "Got Invalid?"
result.errors
));
let (got_valid, _errors) = match &result {
Ok(res) => (res.is_valid(), &res.errors),
Err(_e) => {
// If we encounter an execution error (e.g. Schema Not Found),
// we treat it as a test failure.
(false, &vec![])
}
};
if let Some(expected) = &test.expected {
// Masking Test
let mut data_for_mask = test.data.clone();
match validator.mask(&sid, &mut data_for_mask) {
Ok(_) => {
if !equals(&data_for_mask, expected) {
let msg = format!(
"Masking Test '{}' failed.\nExpected: {:?}\nGot: {:?}",
test.description, expected, data_for_mask
);
eprintln!("{}", msg);
failures.push(msg);
}
}
Err(e) => {
let msg = format!(
"Masking Test '{}' failed with execution error: {:?}",
test.description, e
);
eprintln!("{}", msg);
failures.push(msg);
}
}
} else {
// Standard Validation Test
if got_valid != test.valid {
let error_msg = match &result {
Ok(res) => format!("{:?}", res.errors),
Err(e) => format!("Execution Error: {:?}", e),
};
failures.push(format!(
"[{}] Test '{}' failed. Expected: {}, Got: {}. Errors: {}",
group.description, test.description, test.valid, got_valid, error_msg
));
}
}
} else {
failures.push(format!(
@ -310,28 +346,47 @@ pub fn run_test_file(path: &str) -> Result<(), String> {
// Use explicit schema_id from test, or default to unique_id
let schema_id = test.schema_id.as_deref().unwrap_or(&unique_id).to_string();
let drop = validator.validate(&schema_id, &test.data);
let result = validator.validate(&schema_id, &test.data);
if test.valid {
if !drop.errors.is_empty() {
let msg = format!(
"Test failed (expected valid): {}\nSchema: {:?}\nData: {:?}\nErrors: {:?}",
test.description,
group.schema, // We might need to find the actual schema used if schema_id is custom
test.data,
drop.errors
);
eprintln!("{}", msg);
failures.push(msg);
match result {
Ok(res) => {
if !res.is_valid() {
let msg = format!(
"Test failed (expected valid): {}\nSchema: {:?}\nData: {:?}\nErrors: {:?}",
test.description,
group.schema, // We might need to find the actual schema used if schema_id is custom
test.data,
res.errors
);
eprintln!("{}", msg);
failures.push(msg);
}
}
Err(e) => {
let msg = format!(
"Test failed (expected valid) but got execution error: {}\nSchema: {:?}\nData: {:?}\nError: {:?}",
test.description, group.schema, test.data, e
);
eprintln!("{}", msg);
failures.push(msg);
}
}
} else {
if drop.errors.is_empty() {
let msg = format!(
"Test failed (expected invalid): {}\nSchema: {:?}\nData: {:?}\nErrors: (Empty)",
test.description, group.schema, test.data
);
println!("{}", msg);
failures.push(msg);
match result {
Ok(res) => {
if res.is_valid() {
let msg = format!(
"Test failed (expected invalid): {}\nSchema: {:?}\nData: {:?}",
test.description, group.schema, test.data
);
eprintln!("{}", msg);
failures.push(msg);
}
}
Err(_) => {
// Expected invalid, got error (which implies invalid/failure), so this is PASS.
}
}
}
}

View File

@ -6,7 +6,7 @@ use regex::Regex;
use serde_json::Value;
use std::collections::HashSet;
#[derive(Debug, Clone)]
#[derive(Debug, Clone, serde::Serialize)]
pub struct ValidationError {
pub code: String,
pub message: String,
@ -18,10 +18,12 @@ pub enum ResolvedRef<'a> {
Global(&'a Schema, &'a Schema),
}
#[derive(Debug, Default, Clone)]
#[derive(Debug, Default, Clone, serde::Serialize)]
pub struct ValidationResult {
pub errors: Vec<ValidationError>,
#[serde(skip)]
pub evaluated_keys: HashSet<String>,
#[serde(skip)]
pub evaluated_indices: HashSet<usize>,
}
@ -41,7 +43,110 @@ impl ValidationResult {
}
}
pub struct ValidationContext<'a> {
use std::ptr::NonNull;
pub trait ValidationInstance<'a>: Copy + Clone {
fn as_value(&self) -> &'a Value;
fn child_at_key(&self, key: &str) -> Option<Self>;
fn child_at_index(&self, idx: usize) -> Option<Self>;
fn prune_object(&self, _keys: &HashSet<String>) {}
fn prune_array(&self, _indices: &HashSet<usize>) {}
}
#[derive(Clone, Copy)]
pub struct ReadOnlyInstance<'a>(pub &'a Value);
impl<'a> ValidationInstance<'a> for ReadOnlyInstance<'a> {
fn as_value(&self) -> &'a Value {
self.0
}
fn child_at_key(&self, key: &str) -> Option<Self> {
self.0.get(key).map(ReadOnlyInstance)
}
fn child_at_index(&self, idx: usize) -> Option<Self> {
self.0.get(idx).map(ReadOnlyInstance)
}
}
#[derive(Clone, Copy)]
pub struct MutableInstance {
ptr: NonNull<Value>,
}
impl MutableInstance {
pub fn new(val: &mut Value) -> Self {
Self {
ptr: NonNull::from(val),
}
}
}
impl<'a> ValidationInstance<'a> for MutableInstance {
fn as_value(&self) -> &'a Value {
unsafe { self.ptr.as_ref() }
}
fn child_at_key(&self, key: &str) -> Option<Self> {
unsafe {
if let Some(obj) = self.ptr.as_ref().as_object() {
// We use as_ref() to check existence (safe read).
if obj.contains_key(key) {
// Now we need mutable pointer to child.
// Since we have *mut parent, and we know key exists...
// casting *mut Value -> &mut Value -> get_mut -> *mut Value
// This is safe because we are single threaded and not holding other refs across this call.
let parent_mut = &mut *self.ptr.as_ptr();
if let Some(child_val) = parent_mut.get_mut(key) {
return Some(MutableInstance::new(child_val));
}
}
}
None
}
}
fn child_at_index(&self, idx: usize) -> Option<Self> {
unsafe {
if let Some(arr) = self.ptr.as_ref().as_array() {
if idx < arr.len() {
let parent_mut = &mut *self.ptr.as_ptr();
if let Some(child_val) = parent_mut.get_mut(idx) {
return Some(MutableInstance::new(child_val));
}
}
}
None
}
}
fn prune_object(&self, keys: &HashSet<String>) {
unsafe {
// We must re-acquire mutable reference from pointer
let val_mut = &mut *self.ptr.as_ptr();
if let Some(obj) = val_mut.as_object_mut() {
obj.retain(|k, _| keys.contains(k));
}
}
}
fn prune_array(&self, indices: &HashSet<usize>) {
unsafe {
let val_mut = &mut *self.ptr.as_ptr();
if let Some(arr) = val_mut.as_array_mut() {
let mut i = 0;
arr.retain(|_| {
let keep = indices.contains(&i);
i += 1;
keep
});
}
}
}
}
pub struct ValidationContext<'a, I: ValidationInstance<'a>> {
// 1. Global (The Library) - now passed as reference
pub registry: &'a Registry,
pub root: &'a Schema,
@ -50,12 +155,12 @@ pub struct ValidationContext<'a> {
pub schema: &'a Schema,
// 3. The Data (The Instance)
pub current: &'a Value,
pub instance: I,
// 4. State
pub path: &'a str,
pub path: String,
pub depth: usize,
pub scope: &'a [String],
pub scope: Vec<String>, // OWNED to avoid lifetime hell
// 5. Config
pub overrides: HashSet<String>, // Keywords explicitly defined by callers that I should skip (Inherited Mask)
@ -63,13 +168,13 @@ pub struct ValidationContext<'a> {
pub reporter: bool, // If true, we only report evaluated keys, don't enforce strictness
}
impl<'a> ValidationContext<'a> {
impl<'a, I: ValidationInstance<'a>> ValidationContext<'a, I> {
pub fn new(
registry: &'a Registry,
root: &'a Schema,
schema: &'a Schema,
current: &'a Value,
scope: &'a [String],
instance: I,
scope: Vec<String>,
overrides: HashSet<String>,
extensible: bool,
reporter: bool,
@ -79,8 +184,8 @@ impl<'a> ValidationContext<'a> {
registry,
root,
schema,
current,
path: "",
instance,
path: String::new(),
depth: 0,
scope,
overrides,
@ -92,9 +197,9 @@ impl<'a> ValidationContext<'a> {
pub fn derive(
&self,
schema: &'a Schema,
current: &'a Value,
path: &'a str,
scope: &'a [String],
instance: I, // We take I directly (it is Copy)
path: &str,
scope: Vec<String>,
overrides: HashSet<String>,
extensible: bool,
reporter: bool,
@ -105,8 +210,8 @@ impl<'a> ValidationContext<'a> {
registry: self.registry,
root: self.root,
schema,
current,
path,
instance,
path: path.to_string(),
depth: self.depth + 1,
scope,
overrides,
@ -119,9 +224,9 @@ impl<'a> ValidationContext<'a> {
pub fn derive_for_schema(&self, schema: &'a Schema, reporter: bool) -> Self {
self.derive(
schema,
self.current,
self.path,
self.scope,
self.instance, // Copy
&self.path,
self.scope.clone(),
HashSet::new(), // Reset overrides for composition/branching (Strict Intersection)
self.extensible, // Inherited extensibility doesn't change for same-level schema switch
reporter,
@ -132,8 +237,7 @@ impl<'a> ValidationContext<'a> {
pub fn validate(&self) -> Result<ValidationResult, ValidationError> {
// Check if we need to update scope due to ID
let mut effective_scope = self.scope;
let mut new_scope_buf: Vec<String>;
let mut effective_scope = self.scope.clone();
if let Some(id) = &self.schema.obj.id {
let current_base = self.scope.last().map(|s| s.as_str()).unwrap_or("");
@ -146,18 +250,15 @@ impl<'a> ValidationContext<'a> {
}
}
new_scope_buf = self.scope.to_vec();
new_scope_buf.push(new_base);
effective_scope = &new_scope_buf;
}
effective_scope.push(new_base);
if effective_scope.len() != self.scope.len() {
// If scope changed, we create a shadow context to use the new scope
let shadow = ValidationContext {
registry: self.registry,
root: self.root,
schema: self.schema,
current: self.current,
path: self.path,
instance: self.instance,
path: self.path.clone(),
depth: self.depth,
scope: effective_scope,
overrides: self.overrides.clone(),
@ -167,6 +268,7 @@ impl<'a> ValidationContext<'a> {
return shadow.validate_scoped();
}
// If no ID change, proceed
self.validate_scoped()
}
@ -193,15 +295,7 @@ impl<'a> ValidationContext<'a> {
// --- Helpers Groups ---
if let Some(ref_res) = self.validate_refs()? {
// eprintln!(
// "DEBUG: validate_refs returned {} errors",
// ref_res.errors.len()
// );
result.merge(ref_res);
// eprintln!(
// "DEBUG: result has {} errors after refs merge",
// result.errors.len()
// );
}
// 2. Core
@ -230,9 +324,10 @@ impl<'a> ValidationContext<'a> {
// If extensible, mark all as evaluated so strictness checks pass and parents don't complain
if self.extensible {
if let Some(obj) = self.current.as_object() {
let current = self.instance.as_value();
if let Some(obj) = current.as_object() {
result.evaluated_keys.extend(obj.keys().cloned());
} else if let Some(arr) = self.current.as_array() {
} else if let Some(arr) = current.as_array() {
result.evaluated_indices.extend(0..arr.len());
}
}
@ -250,8 +345,10 @@ impl<'a> ValidationContext<'a> {
let mut handled = false;
// Scope is already effective due to validate() wrapper!
let effective_scope = self.scope;
// self.scope is Vec<String>
let effective_scope = &self.scope;
let current_base_resolved = effective_scope.last().map(|s| s.as_str()).unwrap_or("");
// Removed unused current binding
// $ref
if let Some(ref ref_string) = self.schema.ref_string {
@ -266,9 +363,9 @@ impl<'a> ValidationContext<'a> {
let derived = self.derive(
self.root,
self.current,
self.path,
effective_scope,
self.instance, // Copy
&self.path,
effective_scope.clone(),
new_overrides,
self.extensible,
self.reporter, // Inherit so efficient composition (allOf) works, but property refs stay strict
@ -290,17 +387,16 @@ impl<'a> ValidationContext<'a> {
&matched_key
};
let mut new_scope_buffer: Vec<String>;
let scope_to_pass = if target_schema.obj.id.is_none() {
if !resource_base.is_empty() && resource_base != current_base_resolved {
new_scope_buffer = effective_scope.to_vec();
new_scope_buffer.push(resource_base.to_string());
&new_scope_buffer
let mut new_scope = effective_scope.clone();
new_scope.push(resource_base.to_string());
new_scope
} else {
effective_scope
effective_scope.clone()
}
} else {
effective_scope
effective_scope.clone()
};
// Calculate new overrides (Masking)
@ -313,7 +409,7 @@ impl<'a> ValidationContext<'a> {
self.registry,
target_root,
target_schema,
self.current,
self.instance, // Copy
scope_to_pass,
new_overrides,
false, // Reset extensibility for $ref (Default Strict)
@ -321,7 +417,7 @@ impl<'a> ValidationContext<'a> {
);
// Manually set path/depth to continue trace
let mut manual_ctx = target_ctx;
manual_ctx.path = self.path;
manual_ctx.path = self.path.clone();
manual_ctx.depth = self.depth + 1;
let target_res = manual_ctx.validate()?;
@ -432,18 +528,17 @@ impl<'a> ValidationContext<'a> {
&matched_key
};
let mut new_scope_buffer: Vec<String>;
let scope_to_pass = if let Some(ref tid) = target_schema.obj.id {
new_scope_buffer = effective_scope.to_vec();
new_scope_buffer.push(tid.clone());
&new_scope_buffer
let mut new_scope = effective_scope.clone();
new_scope.push(tid.clone());
new_scope
} else {
if !resource_base.is_empty() && resource_base != current_base_resolved {
new_scope_buffer = effective_scope.to_vec();
new_scope_buffer.push(resource_base.to_string());
&new_scope_buffer
let mut new_scope = effective_scope.clone();
new_scope.push(resource_base.to_string());
new_scope
} else {
effective_scope
effective_scope.clone()
}
};
@ -457,14 +552,14 @@ impl<'a> ValidationContext<'a> {
self.registry,
target_root,
target_schema,
self.current,
self.instance, // Copy
scope_to_pass,
new_overrides,
false,
self.reporter, // Propagate reporter
);
let mut manual_ctx = target_ctx;
manual_ctx.path = self.path;
manual_ctx.path = self.path.clone();
manual_ctx.depth = self.depth + 1;
// manual_ctx.reporter = true;
@ -482,11 +577,12 @@ impl<'a> ValidationContext<'a> {
}
fn validate_core(&self, result: &mut ValidationResult) {
let current = self.instance.as_value();
// Type
if let Some(ref type_) = self.schema.type_ {
match type_ {
crate::schema::SchemaTypeOrArray::Single(t) => {
if !Validator::check_type(t, self.current) {
if !Validator::check_type(t, current) {
result.errors.push(ValidationError {
code: "INVALID_TYPE".to_string(),
message: format!("Expected type '{}'", t),
@ -497,7 +593,7 @@ impl<'a> ValidationContext<'a> {
crate::schema::SchemaTypeOrArray::Multiple(types) => {
let mut valid = false;
for t in types {
if Validator::check_type(t, self.current) {
if Validator::check_type(t, current) {
valid = true;
break;
}
@ -515,16 +611,16 @@ impl<'a> ValidationContext<'a> {
// Const
if let Some(ref const_val) = self.schema.const_ {
if !crate::util::equals(self.current, const_val) {
if !crate::util::equals(current, const_val) {
result.errors.push(ValidationError {
code: "CONST_VIOLATED".to_string(),
message: "Value does not match const".to_string(),
path: self.path.to_string(),
});
} else {
if let Some(obj) = self.current.as_object() {
if let Some(obj) = current.as_object() {
result.evaluated_keys.extend(obj.keys().cloned());
} else if let Some(arr) = self.current.as_array() {
} else if let Some(arr) = current.as_array() {
result.evaluated_indices.extend(0..arr.len());
}
}
@ -534,7 +630,7 @@ impl<'a> ValidationContext<'a> {
if let Some(ref enum_vals) = self.schema.enum_ {
let mut found = false;
for val in enum_vals {
if crate::util::equals(self.current, val) {
if crate::util::equals(current, val) {
found = true;
break;
}
@ -546,9 +642,9 @@ impl<'a> ValidationContext<'a> {
path: self.path.to_string(),
});
} else {
if let Some(obj) = self.current.as_object() {
if let Some(obj) = current.as_object() {
result.evaluated_keys.extend(obj.keys().cloned());
} else if let Some(arr) = self.current.as_array() {
} else if let Some(arr) = current.as_array() {
result.evaluated_indices.extend(0..arr.len());
}
}
@ -556,7 +652,8 @@ impl<'a> ValidationContext<'a> {
}
fn validate_numeric(&self, result: &mut ValidationResult) {
if let Some(num) = self.current.as_f64() {
let current = self.instance.as_value();
if let Some(num) = current.as_f64() {
if let Some(min) = self.schema.minimum {
if num < min {
result.errors.push(ValidationError {
@ -607,7 +704,8 @@ impl<'a> ValidationContext<'a> {
}
fn validate_string(&self, result: &mut ValidationResult) {
if let Some(s) = self.current.as_str() {
let current = self.instance.as_value();
if let Some(s) = current.as_str() {
if let Some(min) = self.schema.min_length {
if (s.chars().count() as f64) < min {
result.errors.push(ValidationError {
@ -649,16 +747,17 @@ impl<'a> ValidationContext<'a> {
}
fn validate_format(&self, result: &mut ValidationResult) {
let current = self.instance.as_value();
if let Some(ref compiled_fmt) = self.schema.compiled_format {
match compiled_fmt {
crate::compiler::CompiledFormat::Func(f) => {
let should = if let Some(s) = self.current.as_str() {
let should = if let Some(s) = current.as_str() {
!s.is_empty()
} else {
true
};
if should {
if let Err(e) = f(self.current) {
if let Err(e) = f(current) {
result.errors.push(ValidationError {
code: "FORMAT_MISMATCH".to_string(),
message: format!("Format error: {}", e),
@ -668,7 +767,7 @@ impl<'a> ValidationContext<'a> {
}
}
crate::compiler::CompiledFormat::Regex(re) => {
if let Some(s) = self.current.as_str() {
if let Some(s) = current.as_str() {
if !re.is_match(s) {
result.errors.push(ValidationError {
code: "FORMAT_MISMATCH".to_string(),
@ -683,7 +782,8 @@ impl<'a> ValidationContext<'a> {
}
fn validate_object(&self, result: &mut ValidationResult) -> Result<(), ValidationError> {
if let Some(obj) = self.current.as_object() {
let current = self.instance.as_value();
if let Some(obj) = current.as_object() {
// 1. Min Properties
if let Some(min) = self.schema.min_properties {
if (obj.len() as f64) < min {
@ -739,9 +839,9 @@ impl<'a> ValidationContext<'a> {
// Reporter = true (merges results).
let derived = self.derive(
sub_schema,
self.current,
self.path,
self.scope,
self.instance, // Copy
&self.path,
self.scope.clone(),
HashSet::new(),
self.extensible,
false,
@ -759,7 +859,7 @@ impl<'a> ValidationContext<'a> {
continue;
}
if let Some(val) = obj.get(key) {
if let Some(child_instance) = self.instance.child_at_key(key) {
let new_path = format!("{}/{}", self.path, key);
let is_ref = sub_schema.ref_string.is_some() || sub_schema.obj.dynamic_ref.is_some();
@ -767,9 +867,9 @@ impl<'a> ValidationContext<'a> {
let derived = self.derive(
sub_schema,
val,
child_instance,
&new_path,
self.scope,
self.scope.clone(),
HashSet::new(),
next_extensible,
false,
@ -783,31 +883,28 @@ impl<'a> ValidationContext<'a> {
if let Some(ref compiled_pp) = self.schema.compiled_pattern_properties {
for (compiled_re, sub_schema) in compiled_pp {
for (key, val) in obj {
for (key, _) in obj {
if compiled_re.0.is_match(key) {
// Note: Pattern properties are not shadowed by property names in standard override logic typically,
// but if we supported masking pattern props, we'd check here. For now, assuming standard behavior + no masking.
// Note: Pattern properties need to derive child instance dynamically for each matching key
if let Some(child_instance) = self.instance.child_at_key(key) {
let new_path = format!("{}/{}", self.path, key);
let is_ref =
sub_schema.ref_string.is_some() || sub_schema.obj.dynamic_ref.is_some();
let next_extensible = if is_ref { false } else { self.extensible };
let new_path = format!("{}/{}", self.path, key);
let is_ref = sub_schema.ref_string.is_some() || sub_schema.obj.dynamic_ref.is_some();
let next_extensible = if is_ref { false } else { self.extensible };
let derived = self.derive(
sub_schema,
val,
&new_path,
self.scope,
HashSet::new(),
next_extensible,
false,
);
let item_res = derived.validate()?;
// eprintln!(
// "PPROP VALIDATE: path={} key={} keys={:?}",
// self.path, key, item_res.evaluated_keys
// );
result.merge(item_res);
result.evaluated_keys.insert(key.clone());
let derived = self.derive(
sub_schema,
child_instance,
&new_path,
self.scope.clone(),
HashSet::new(),
next_extensible,
false,
);
let item_res = derived.validate()?;
result.merge(item_res);
result.evaluated_keys.insert(key.clone());
}
}
}
}
@ -816,39 +913,44 @@ impl<'a> ValidationContext<'a> {
// 7. Property Names
if let Some(ref property_names) = self.schema.property_names {
for key in obj.keys() {
let new_path = format!("{}/propertyNames/{}", self.path, key);
let _new_path = format!("{}/propertyNames/{}", self.path, key);
let val_str = Value::String(key.clone());
// Validating the KEY as a STRING instance.
// New scope.
let derived = self.derive(
let ctx = ValidationContext::new(
self.registry,
self.root,
property_names,
&val_str,
&new_path,
self.scope,
crate::validator::ReadOnlyInstance(&val_str),
self.scope.clone(),
HashSet::new(),
self.extensible,
false,
self.reporter,
);
result.merge(derived.validate()?);
result.merge(ctx.validate()?);
}
}
// 7. Additional Properties (Strictness / Extensibility)
// Done via check_strictness at end OR explicit schema keyword.
}
// --- Pruning (Masking) ---
if !self.extensible {
self.instance.prune_object(&result.evaluated_keys);
}
// 8. Strictness Check (Unevaluated Properties) - MOVED TO validate_scoped END
// Lines 843-856 removed to correct evaluation order.
// if !self.extensible && !self.reporter { ... }
Ok(())
}
fn validate_array(&self, result: &mut ValidationResult) -> Result<(), ValidationError> {
if let Some(arr) = self.current.as_array() {
let current = self.instance.as_value();
if let Some(arr) = current.as_array() {
// 1. Min/Max Items
if let Some(min) = self.schema.min_items {
if (arr.len() as f64) < min {
result.errors.push(ValidationError {
code: "MIN_ITEMS".to_string(),
message: format!("Length < min {}", min),
message: "Too few items".to_string(),
path: self.path.to_string(),
});
}
@ -857,26 +959,22 @@ impl<'a> ValidationContext<'a> {
if (arr.len() as f64) > max {
result.errors.push(ValidationError {
code: "MAX_ITEMS".to_string(),
message: format!("Length > max {}", max),
message: "Too many items".to_string(),
path: self.path.to_string(),
});
}
}
// 2. Unique Items
if self.schema.unique_items.unwrap_or(false) {
let mut seen: Vec<&Value> = Vec::with_capacity(arr.len());
for (i, item) in arr.iter().enumerate() {
for seen_item in &seen {
if crate::util::equals(item, *seen_item) {
result.errors.push(ValidationError {
code: "UNIQUE_ITEMS".to_string(),
message: format!("Duplicate item at index {}", i),
path: format!("{}/{}", self.path, i),
});
break;
}
}
if !result.errors.is_empty() {
let mut seen: Vec<&Value> = Vec::new();
for item in arr {
if seen.contains(&item) {
result.errors.push(ValidationError {
code: "UNIQUE_ITEMS_VIOLATED".to_string(),
message: "Array has duplicate items".to_string(),
path: self.path.to_string(),
});
break;
}
seen.push(item);
@ -886,21 +984,25 @@ impl<'a> ValidationContext<'a> {
// 3. Contains
if let Some(ref contains_schema) = self.schema.contains {
let mut _match_count = 0;
for (i, param) in arr.iter().enumerate() {
let derived = self.derive(
contains_schema,
param,
self.path,
self.scope,
HashSet::new(),
self.extensible,
false,
);
// self.instance.as_value() is &Value
// We iterate indices
for i in 0..arr.len() {
if let Some(child_instance) = self.instance.child_at_index(i) {
let derived = self.derive(
contains_schema,
child_instance,
&self.path,
self.scope.clone(),
HashSet::new(),
self.extensible,
false,
);
let check = derived.validate()?;
if check.is_valid() {
_match_count += 1;
result.evaluated_indices.insert(i);
let check = derived.validate()?;
if check.is_valid() {
_match_count += 1;
result.evaluated_indices.insert(i);
}
}
}
@ -933,20 +1035,21 @@ impl<'a> ValidationContext<'a> {
for (i, sub_schema) in prefix.iter().enumerate() {
if i < len {
let path = format!("{}/{}", self.path, i);
let derived = self.derive(
sub_schema,
&arr[i],
&path,
self.scope,
HashSet::new(),
self.extensible,
false,
);
let item_res = derived.validate()?;
result.merge(item_res);
result.evaluated_indices.insert(i);
validation_index += 1;
if let Some(child_instance) = self.instance.child_at_index(i) {
let derived = self.derive(
sub_schema,
child_instance,
&path,
self.scope.clone(),
HashSet::new(),
self.extensible,
false,
);
let item_res = derived.validate()?;
result.merge(item_res);
result.evaluated_indices.insert(i);
validation_index += 1;
}
}
}
}
@ -954,22 +1057,28 @@ impl<'a> ValidationContext<'a> {
if let Some(ref items_schema) = self.schema.items {
for i in validation_index..len {
let path = format!("{}/{}", self.path, i);
let derived = self.derive(
items_schema,
&arr[i],
&path,
self.scope,
HashSet::new(),
self.extensible,
false,
);
let item_res = derived.validate()?;
result.merge(item_res);
result.evaluated_indices.insert(i);
if let Some(child_instance) = self.instance.child_at_index(i) {
let derived = self.derive(
items_schema,
child_instance,
&path,
self.scope.clone(),
HashSet::new(),
self.extensible,
false,
);
let item_res = derived.validate()?;
result.merge(item_res);
result.evaluated_indices.insert(i);
}
}
}
}
// --- Pruning (Masking) ---
if !self.extensible {
self.instance.prune_array(&result.evaluated_indices);
}
Ok(())
}
@ -1089,7 +1198,7 @@ impl<'a> ValidationContext<'a> {
}
// 1. Unevaluated Properties
if let Some(obj) = self.current.as_object() {
if let Some(obj) = self.instance.as_value().as_object() {
for key in obj.keys() {
if !result.evaluated_keys.contains(key) && !self.overrides.contains(key) {
result.errors.push(ValidationError {
@ -1102,7 +1211,7 @@ impl<'a> ValidationContext<'a> {
}
// 2. Unevaluated Items
if let Some(arr) = self.current.as_array() {
if let Some(arr) = self.instance.as_value().as_array() {
for i in 0..arr.len() {
if !result.evaluated_indices.contains(&i) {
result.errors.push(ValidationError {
@ -1267,65 +1376,58 @@ impl Validator {
None
}
pub fn validate(&self, schema_id: &str, instance: &Value) -> crate::drop::Drop {
let registry = &self.registry;
// Registry is owned, so we can access it directly. No mutex needed.
// However, Validator owns it, so we need &self to access.
if let Some(root) = registry.get(schema_id) {
let root_id = root.obj.id.clone().unwrap_or_default();
let scope = vec![root_id.clone()];
// Initial Context
pub fn validate(
&self,
schema_id: &str,
instance: &Value,
) -> Result<ValidationResult, ValidationError> {
if let Some(schema) = self.registry.schemas.get(schema_id) {
let ctx = ValidationContext::new(
registry,
&root,
&root,
instance,
&scope,
&self.registry,
schema,
schema,
ReadOnlyInstance(instance),
vec![],
HashSet::new(),
false, // Default strictness (overridden by schema.extensible if present)
false,
);
ctx.validate()
} else {
Err(ValidationError {
code: "SCHEMA_NOT_FOUND".to_string(),
message: format!("Schema {} not found", schema_id),
path: "".to_string(),
})
}
}
pub fn mask(
&self,
schema_id: &str,
instance: &mut Value,
) -> Result<ValidationResult, ValidationError> {
if let Some(schema) = self.registry.schemas.get(schema_id) {
let ctx = ValidationContext::new(
&self.registry,
schema,
schema,
MutableInstance::new(instance),
vec![],
HashSet::new(),
false, // Default strictness
false,
false, // reporter = false (Default)
);
match ctx.validate() {
Ok(result) => {
if result.is_valid() {
crate::drop::Drop::success()
} else {
let errors = result
.errors
.into_iter()
.map(|e| crate::drop::Error {
punc: None,
code: e.code,
message: e.message,
details: crate::drop::ErrorDetails { path: e.path },
})
.collect();
crate::drop::Drop::with_errors(errors)
}
}
Err(e) => {
let error = crate::drop::Error {
punc: None,
code: e.code,
message: e.message,
details: crate::drop::ErrorDetails { path: e.path },
};
crate::drop::Drop::with_errors(vec![error])
}
}
let res = ctx.validate()?;
Ok(res)
} else {
let error = crate::drop::Error {
punc: None,
Err(ValidationError {
code: "SCHEMA_NOT_FOUND".to_string(),
message: format!("Schema '{}' not found", schema_id),
details: crate::drop::ErrorDetails {
path: "".to_string(),
},
};
crate::drop::Drop::with_errors(vec![error])
message: format!("Schema {} not found", schema_id),
path: "".to_string(),
})
}
}
}

171
tests/fixtures/masking.json vendored Normal file
View File

@ -0,0 +1,171 @@
[
{
"description": "Masking Properties",
"schema": {
"$id": "mask_properties",
"type": "object",
"properties": {
"foo": {
"type": "string"
},
"bar": {
"type": "integer"
}
},
"required": [
"foo"
],
"extensible": false
},
"tests": [
{
"description": "Keep valid properties",
"data": {
"foo": "a",
"bar": 1
},
"valid": true,
"expected": {
"foo": "a",
"bar": 1
}
},
{
"description": "Remove unknown properties",
"data": {
"foo": "a",
"baz": true
},
"valid": true,
"expected": {
"foo": "a"
}
},
{
"description": "Keep valid properties with unknown",
"data": {
"foo": "a",
"bar": 1,
"baz": true
},
"valid": true,
"expected": {
"foo": "a",
"bar": 1
}
}
]
},
{
"description": "Masking Nested Objects",
"schema": {
"$id": "mask_nested",
"type": "object",
"properties": {
"meta": {
"type": "object",
"properties": {
"id": {
"type": "integer"
}
},
"extensible": false
}
},
"extensible": false
},
"tests": [
{
"description": "Mask nested object",
"data": {
"meta": {
"id": 1,
"extra": "x"
},
"top_extra": "y"
},
"valid": true,
"expected": {
"meta": {
"id": 1
}
}
}
]
},
{
"description": "Masking Arrays",
"schema": {
"$id": "mask_arrays",
"type": "object",
"properties": {
"tags": {
"type": "array",
"items": {
"type": "string"
}
}
},
"extensible": false
},
"tests": [
{
"description": "Arrays are kept (items are valid)",
"data": {
"tags": [
"a",
"b"
]
},
"valid": true,
"expected": {
"tags": [
"a",
"b"
]
}
}
]
},
{
"description": "Masking Tuple Arrays (prefixItems)",
"schema": {
"$id": "mask_tuple",
"type": "object",
"properties": {
"coord": {
"type": "array",
"prefixItems": [
{
"type": "number"
},
{
"type": "number"
}
]
}
},
"extensible": false
},
"tests": [
{
"description": "Extra tuple items removed",
"data": {
"coord": [
1,
2,
3,
"extra"
]
},
"valid": true,
"expected": {
"coord": [
1,
2
]
}
}
]
}
]

View File

@ -1098,6 +1098,30 @@ fn test_pattern_1() {
util::run_test_file_at_index(&path, 1).unwrap();
}
#[test]
fn test_masking_0() {
let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR"));
util::run_test_file_at_index(&path, 0).unwrap();
}
#[test]
fn test_masking_1() {
let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR"));
util::run_test_file_at_index(&path, 1).unwrap();
}
#[test]
fn test_masking_2() {
let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR"));
util::run_test_file_at_index(&path, 2).unwrap();
}
#[test]
fn test_masking_3() {
let path = format!("{}/tests/fixtures/masking.json", env!("CARGO_MANIFEST_DIR"));
util::run_test_file_at_index(&path, 3).unwrap();
}
#[test]
fn test_max_properties_0() {
let path = format!("{}/tests/fixtures/maxProperties.json", env!("CARGO_MANIFEST_DIR"));