From 92c0a6fc0b5837db99df6e71d3b1d7cbdb47516b Mon Sep 17 00:00:00 2001 From: Alex Groleau Date: Wed, 11 Jun 2025 19:38:46 -0400 Subject: [PATCH] even more jspg improved error handling, missing some codes before --- rustfmt.toml | 1 + src/lib.rs | 456 ++++++++++++++++++++++++++++----------------------- 2 files changed, 254 insertions(+), 203 deletions(-) create mode 100644 rustfmt.toml diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..6f2e075 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +tab_spaces = 2 \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 0fc2b5d..8ff8842 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,16 +2,26 @@ use pgrx::*; pg_module_magic!(); -use serde_json::{json, Value}; -use std::{collections::HashMap, sync::RwLock}; -use boon::{Compiler, Schemas, ValidationError, SchemaIndex, CompileError}; +use boon::{CompileError, Compiler, ErrorKind, SchemaIndex, Schemas, ValidationError}; use lazy_static::lazy_static; +use serde_json::{json, Value}; +use std::collections::hash_map::Entry; +use std::{collections::HashMap, sync::RwLock}; struct BoonCache { schemas: Schemas, id_to_index: HashMap, } +// Structure to hold error information without lifetimes +#[derive(Debug)] +struct Error { + path: String, + code: String, + message: String, + cause: String, +} + lazy_static! { static ref SCHEMA_CACHE: RwLock = RwLock::new(BoonCache { schemas: Schemas::new(), @@ -30,6 +40,7 @@ fn cache_json_schema(schema_id: &str, schema: JsonB, strict: bool) -> JsonB { apply_strict_validation(&mut schema_value); } + // Create the boon compiler and enable format assertions let mut compiler = Compiler::new(); compiler.enable_format_assertions(); @@ -59,9 +70,9 @@ fn cache_json_schema(schema_id: &str, schema: JsonB, strict: bool) -> JsonB { CompileError::ValidationError { url: _url, src } => { // Collect leaf errors from the meta-schema validation failure let mut error_list = Vec::new(); - collect_validation_errors(src, &mut error_list); + collect_errors(src, &mut error_list); // Filter and format errors properly - no instance for schema compilation - format_drop_errors(error_list, &schema_value) + format_errors(error_list, &schema_value) } _ => { // Other compilation errors @@ -83,33 +94,31 @@ fn cache_json_schema(schema_id: &str, schema: JsonB, strict: bool) -> JsonB { // Helper function to recursively apply strict validation to all objects in a schema fn apply_strict_validation(schema: &mut Value) { match schema { - Value::Object(map) => { - // If this is an object type schema, add additionalProperties: false - if let Some(Value::String(t)) = map.get("type") { - if t == "object" && !map.contains_key("additionalProperties") { - map.insert("additionalProperties".to_string(), Value::Bool(false)); - } - } - - // Recurse into all properties - for (_, value) in map.iter_mut() { - apply_strict_validation(value); - } + Value::Object(map) => { + // If this is an object type schema, add additionalProperties: false + if let Some(Value::String(t)) = map.get("type") { + if t == "object" && !map.contains_key("additionalProperties") { + map.insert("additionalProperties".to_string(), Value::Bool(false)); + } } - Value::Array(arr) => { - // Recurse into array items - for item in arr.iter_mut() { - apply_strict_validation(item); - } + // Recurse into all properties + for (_, value) in map.iter_mut() { + apply_strict_validation(value); } - _ => {} + } + Value::Array(arr) => { + // Recurse into array items + for item in arr.iter_mut() { + apply_strict_validation(item); + } + } + _ => {} } } #[pg_extern(strict, parallel_safe)] fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { let cache = SCHEMA_CACHE.read().unwrap(); - // Lookup uses the original schema_id match cache.id_to_index.get(schema_id) { None => JsonB(json!({ @@ -127,10 +136,9 @@ fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { Ok(_) => JsonB(json!({ "response": "success" })), Err(validation_error) => { let mut error_list = Vec::new(); - collect_validation_errors(&validation_error, &mut error_list); - let errors = format_drop_errors(error_list, &instance_value); - - JsonB(json!({ "errors": errors })) + collect_errors(&validation_error, &mut error_list); + let errors = format_errors(error_list, &instance_value); + JsonB(json!({ "errors": errors })) } } } @@ -138,201 +146,244 @@ fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { } // Recursively collects validation errors -fn collect_validation_errors(error: &ValidationError, errors_list: &mut Vec<(String, String, String)>) { +fn collect_errors(error: &ValidationError, errors_list: &mut Vec) { // Check if this is a structural error that we should skip - let error_message = format!("{}", error.kind); - let is_structural = error_message == "validation failed" || - error_message == "allOf failed" || - error_message == "anyOf failed" || - error_message == "not failed" || - error_message.starts_with("oneOf failed"); - + let is_structural = matches!( + &error.kind, + ErrorKind::Group | ErrorKind::AllOf | ErrorKind::AnyOf | ErrorKind::Not | ErrorKind::OneOf(_) + ); + if error.causes.is_empty() && !is_structural { // This is a leaf error that's not structural - // Format just the error kind, not the whole validation error - let message = format!("{}", error.kind); + let original_message = format!("{}", error.kind); + let (error_code, human_message) = convert_error_kind(&error.kind); - errors_list.push(( - error.instance_location.to_string(), - error.schema_url.to_string(), - message - )); + errors_list.push(Error { + path: error.instance_location.to_string(), + code: error_code, + message: human_message, + cause: original_message, + }); } else { // Recurse into causes for cause in &error.causes { - collect_validation_errors(cause, errors_list); + collect_errors(cause, errors_list); } } } +// Convert ErrorKind to error code and human message +fn convert_error_kind(kind: &ErrorKind) -> (String, String) { + match kind { + ErrorKind::Type { .. } => ( + "TYPE_MISMATCH".to_string(), + "Field type does not match the expected type".to_string(), + ), + ErrorKind::Required { .. } => ( + "REQUIRED_FIELD_MISSING".to_string(), + "Required field is missing".to_string(), + ), + ErrorKind::DependentRequired { .. } => ( + "DEPENDENT_REQUIRED_MISSING".to_string(), + "Dependent required fields are missing".to_string(), + ), + ErrorKind::Dependency { .. } => ( + "DEPENDENCY_FAILED".to_string(), + "Dependency requirement not met".to_string(), + ), + ErrorKind::Enum { .. } => ( + "ENUM_VIOLATED".to_string(), + "Value is not one of the allowed options".to_string(), + ), + ErrorKind::Const { .. } => ( + "CONST_VIOLATED".to_string(), + "Value does not match the required constant".to_string(), + ), + ErrorKind::MinLength { .. } => ( + "MIN_LENGTH_VIOLATED".to_string(), + "Field length is below the minimum required".to_string(), + ), + ErrorKind::MaxLength { .. } => ( + "MAX_LENGTH_VIOLATED".to_string(), + "Field length exceeds the maximum allowed".to_string(), + ), + ErrorKind::Pattern { .. } => ( + "PATTERN_VIOLATED".to_string(), + "Value does not match the required pattern".to_string(), + ), + ErrorKind::Minimum { .. } => ( + "MINIMUM_VIOLATED".to_string(), + "Value is below the minimum allowed".to_string(), + ), + ErrorKind::Maximum { .. } => ( + "MAXIMUM_VIOLATED".to_string(), + "Value exceeds the maximum allowed".to_string(), + ), + ErrorKind::ExclusiveMinimum { .. } => ( + "EXCLUSIVE_MINIMUM_VIOLATED".to_string(), + "Value must be greater than the minimum".to_string(), + ), + ErrorKind::ExclusiveMaximum { .. } => ( + "EXCLUSIVE_MAXIMUM_VIOLATED".to_string(), + "Value must be less than the maximum".to_string(), + ), + ErrorKind::MultipleOf { .. } => ( + "MULTIPLE_OF_VIOLATED".to_string(), + "Value is not a multiple of the required factor".to_string(), + ), + ErrorKind::MinItems { .. } => ( + "MIN_ITEMS_VIOLATED".to_string(), + "Array has fewer items than required".to_string(), + ), + ErrorKind::MaxItems { .. } => ( + "MAX_ITEMS_VIOLATED".to_string(), + "Array has more items than allowed".to_string(), + ), + ErrorKind::UniqueItems { .. } => ( + "UNIQUE_ITEMS_VIOLATED".to_string(), + "Array contains duplicate items".to_string(), + ), + ErrorKind::MinProperties { .. } => ( + "MIN_PROPERTIES_VIOLATED".to_string(), + "Object has fewer properties than required".to_string(), + ), + ErrorKind::MaxProperties { .. } => ( + "MAX_PROPERTIES_VIOLATED".to_string(), + "Object has more properties than allowed".to_string(), + ), + ErrorKind::AdditionalProperties { .. } => ( + "ADDITIONAL_PROPERTIES_NOT_ALLOWED".to_string(), + "Object contains properties that are not allowed".to_string(), + ), + ErrorKind::AdditionalItems { .. } => ( + "ADDITIONAL_ITEMS_NOT_ALLOWED".to_string(), + "Array contains additional items that are not allowed".to_string(), + ), + ErrorKind::Format { want, .. } => ( + "FORMAT_INVALID".to_string(), + format!("Invalid {} format", want), + ), + ErrorKind::PropertyName { .. } => ( + "INVALID_PROPERTY_NAME".to_string(), + "Property name is invalid".to_string(), + ), + ErrorKind::Contains => ( + "CONTAINS_FAILED".to_string(), + "No items match the required schema".to_string(), + ), + ErrorKind::MinContains { .. } => ( + "MIN_CONTAINS_VIOLATED".to_string(), + "Too few items match the required schema".to_string(), + ), + ErrorKind::MaxContains { .. } => ( + "MAX_CONTAINS_VIOLATED".to_string(), + "Too many items match the required schema".to_string(), + ), + ErrorKind::ContentEncoding { .. } => ( + "CONTENT_ENCODING_INVALID".to_string(), + "Content encoding is invalid".to_string(), + ), + ErrorKind::ContentMediaType { .. } => ( + "CONTENT_MEDIA_TYPE_INVALID".to_string(), + "Content media type is invalid".to_string(), + ), + ErrorKind::FalseSchema => ( + "FALSE_SCHEMA".to_string(), + "Schema validation always fails".to_string(), + ), + ErrorKind::Not => ( + "NOT_VIOLATED".to_string(), + "Value matched a schema it should not match".to_string(), + ), + ErrorKind::RefCycle { .. } => ( + "REFERENCE_CYCLE".to_string(), + "Schema contains a reference cycle".to_string(), + ), + ErrorKind::Reference { .. } => ( + "REFERENCE_FAILED".to_string(), + "Reference validation failed".to_string(), + ), + ErrorKind::Schema { .. } => ( + "SCHEMA_FAILED".to_string(), + "Schema validation failed".to_string(), + ), + ErrorKind::ContentSchema => ( + "CONTENT_SCHEMA_FAILED".to_string(), + "Content schema validation failed".to_string(), + ), + // These shouldn't appear as leaf errors due to is_structural check + ErrorKind::Group => ( + "VALIDATION_FAILED".to_string(), + "Validation failed".to_string(), + ), + ErrorKind::AllOf => ( + "ALL_OF_VIOLATED".to_string(), + "Value does not match all required schemas".to_string(), + ), + ErrorKind::AnyOf => ( + "ANY_OF_VIOLATED".to_string(), + "Value does not match any of the allowed schemas".to_string(), + ), + ErrorKind::OneOf(_) => ( + "ONE_OF_VIOLATED".to_string(), + "Value must match exactly one schema".to_string(), + ), + } +} + // Formats errors according to DropError structure -fn format_drop_errors(raw_errors: Vec<(String, String, String)>, instance: &Value) -> Vec { - use std::collections::HashMap; - use std::collections::hash_map::Entry; - - // We don't filter structural paths from instance paths anymore - // because instance paths shouldn't contain these segments anyway - // The issue was likely with schema paths, not instance paths - let plausible_errors = raw_errors; - - // 2. Deduplicate by instance_path and format as DropError - let mut unique_errors: HashMap = HashMap::new(); - for (instance_path, _schema_path, message) in plausible_errors { - if let Entry::Vacant(entry) = unique_errors.entry(instance_path.clone()) { - // Convert message to error code and make it human readable - let (code, human_message) = enhance_error_message(&message); - - // Extract the failing value from the instance - let failing_value = extract_value_at_path(instance, &instance_path); - - entry.insert(json!({ - "code": code, - "message": human_message, - "details": { - "path": instance_path, - "context": failing_value, - "cause": message // Original error message - } - })); - } +fn format_errors(errors: Vec, instance: &Value) -> Vec { + // Deduplicate by instance_path and format as DropError + let mut unique_errors: HashMap = HashMap::new(); + for error in errors { + if let Entry::Vacant(entry) = unique_errors.entry(error.path.clone()) { + // Extract the failing value from the instance + let failing_value = extract_value_at_path(instance, &error.path); + entry.insert(json!({ + "code": error.code, + "message": error.message, + "details": { + "path": error.path, + "context": failing_value, + "cause": error.cause + } + })); } + } - unique_errors.into_values().collect() + unique_errors.into_values().collect() } // Helper function to extract value at a JSON pointer path fn extract_value_at_path(instance: &Value, path: &str) -> Value { - let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); - let mut current = instance; - - for part in parts { - match current { - Value::Object(map) => { - if let Some(value) = map.get(part) { - current = value; - } else { - return Value::Null; - } - } - Value::Array(arr) => { - if let Ok(index) = part.parse::() { - if let Some(value) = arr.get(index) { - current = value; - } else { - return Value::Null; - } - } else { - return Value::Null; - } - } - _ => return Value::Null, + let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + let mut current = instance; + + for part in parts { + match current { + Value::Object(map) => { + if let Some(value) = map.get(part) { + current = value; + } else { + return Value::Null; } + } + Value::Array(arr) => { + if let Ok(index) = part.parse::() { + if let Some(value) = arr.get(index) { + current = value; + } else { + return Value::Null; + } + } else { + return Value::Null; + } + } + _ => return Value::Null, } - - current.clone() -} + } -// Helper to convert validation messages to error codes and human-readable messages -fn enhance_error_message(message: &str) -> (String, String) { - // Match exact boon error message patterns - let trimmed = message.trim(); - - if trimmed.contains("value must be one of") { - ("ENUM_VIOLATED".to_string(), - "Value is not one of the allowed options".to_string()) - } else if trimmed.contains("length must be >=") && trimmed.contains("but got") { - ("MIN_LENGTH_VIOLATED".to_string(), - "Field length is below the minimum required".to_string()) - } else if trimmed.contains("length must be <=") && trimmed.contains("but got") { - ("MAX_LENGTH_VIOLATED".to_string(), - "Field length exceeds the maximum allowed".to_string()) - } else if trimmed.contains("must be >=") && trimmed.contains("but got") { - ("MINIMUM_VIOLATED".to_string(), - "Value is below the minimum allowed".to_string()) - } else if trimmed.contains("must be <=") && trimmed.contains("but got") { - ("MAXIMUM_VIOLATED".to_string(), - "Value exceeds the maximum allowed".to_string()) - } else if trimmed.contains("must be >") && trimmed.contains("but got") { - ("EXCLUSIVE_MINIMUM_VIOLATED".to_string(), - "Value must be greater than the minimum".to_string()) - } else if trimmed.contains("must be <") && trimmed.contains("but got") { - ("EXCLUSIVE_MAXIMUM_VIOLATED".to_string(), - "Value must be less than the maximum".to_string()) - } else if trimmed.contains("does not match pattern") { - ("PATTERN_VIOLATED".to_string(), - "Value does not match the required pattern".to_string()) - } else if trimmed.contains("missing properties") { - ("REQUIRED_FIELD_MISSING".to_string(), - "Required field is missing".to_string()) - } else if trimmed.contains("want") && trimmed.contains("but got") { - ("TYPE_MISMATCH".to_string(), - "Field type does not match the expected type".to_string()) - } else if trimmed.starts_with("value must be") && !trimmed.contains("one of") { - ("CONST_VIOLATED".to_string(), - "Value does not match the required constant".to_string()) - } else if trimmed.contains("is not valid") && trimmed.contains(":") { - ("FORMAT_INVALID".to_string(), - extract_format_message(trimmed)) - } else if trimmed.contains("items at") && trimmed.contains("are equal") { - ("UNIQUE_ITEMS_VIOLATED".to_string(), - "Array contains duplicate items".to_string()) - } else if trimmed.contains("additionalProperties") && trimmed.contains("not allowed") { - ("ADDITIONAL_PROPERTIES_NOT_ALLOWED".to_string(), - "Object contains properties that are not allowed".to_string()) - } else if trimmed.contains("is not multipleOf") { - ("MULTIPLE_OF_VIOLATED".to_string(), - "Value is not a multiple of the required factor".to_string()) - } else if trimmed.contains("minimum") && trimmed.contains("properties required") { - ("MIN_PROPERTIES_VIOLATED".to_string(), - "Object has fewer properties than required".to_string()) - } else if trimmed.contains("maximum") && trimmed.contains("properties required") { - ("MAX_PROPERTIES_VIOLATED".to_string(), - "Object has more properties than allowed".to_string()) - } else if trimmed.contains("minimum") && trimmed.contains("items required") { - ("MIN_ITEMS_VIOLATED".to_string(), - "Array has fewer items than required".to_string()) - } else if trimmed.contains("maximum") && trimmed.contains("items required") { - ("MAX_ITEMS_VIOLATED".to_string(), - "Array has more items than allowed".to_string()) - } else if trimmed == "false schema" { - ("FALSE_SCHEMA".to_string(), - "Schema validation always fails".to_string()) - } else if trimmed == "not failed" { - ("NOT_VIOLATED".to_string(), - "Value matched a schema it should not match".to_string()) - } else if trimmed == "allOf failed" { - ("ALL_OF_VIOLATED".to_string(), - "Value does not match all required schemas".to_string()) - } else if trimmed == "anyOf failed" { - ("ANY_OF_VIOLATED".to_string(), - "Value does not match any of the allowed schemas".to_string()) - } else if trimmed.contains("oneOf failed") { - ("ONE_OF_VIOLATED".to_string(), - "Value must match exactly one schema".to_string()) - } else if trimmed == "validation failed" { - ("VALIDATION_FAILED".to_string(), - "Validation failed".to_string()) - } else { - // For any unmatched patterns, try to provide a generic human-readable message - // while preserving the original error in details.cause - ("VALIDATION_FAILED".to_string(), - "Validation failed".to_string()) - } -} - -// Extract a better format message -fn extract_format_message(message: &str) -> String { - if message.contains("date-time") { - "Invalid date-time format".to_string() - } else if message.contains("email") { - "Invalid email format".to_string() - } else if message.contains("uri") { - "Invalid URI format".to_string() - } else if message.contains("uuid") { - "Invalid UUID format".to_string() - } else { - "Invalid format".to_string() - } + current.clone() } #[pg_extern(strict, parallel_safe)] @@ -373,7 +424,6 @@ pub mod pg_test { } } - #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests {