use pgrx::*; pg_module_magic!(); use serde_json::{json, Value}; use std::{collections::HashMap, sync::RwLock}; use boon::{Compiler, Schemas, ValidationError, SchemaIndex, CompileError}; use lazy_static::lazy_static; struct BoonCache { schemas: Schemas, id_to_index: HashMap, } lazy_static! { static ref SCHEMA_CACHE: RwLock = RwLock::new(BoonCache { schemas: Schemas::new(), id_to_index: HashMap::new(), }); } #[pg_extern(strict)] fn cache_json_schema(schema_id: &str, schema: JsonB) -> JsonB { let mut cache = SCHEMA_CACHE.write().unwrap(); let schema_value: Value = schema.0; let schema_path = format!("urn:{}", schema_id); let mut compiler = Compiler::new(); compiler.enable_format_assertions(); // Use schema_path when adding the resource if let Err(e) = compiler.add_resource(&schema_path, schema_value.clone()) { return JsonB(json!({ "errors": [{ "code": "SCHEMA_RESOURCE_ADD_FAILED", "message": format!("Failed to add schema resource '{}'", schema_id), "details": { "path": schema_path, "cause": format!("{}", e) } }] })); } // Use schema_path when compiling match compiler.compile(&schema_path, &mut cache.schemas) { Ok(sch_index) => { // Store the index using the original schema_id as the key cache.id_to_index.insert(schema_id.to_string(), sch_index); JsonB(json!({ "response": "success" })) } Err(e) => { let errors = match &e { CompileError::ValidationError { url: _url, src } => { // Collect leaf errors from the meta-schema validation failure let mut error_list = Vec::new(); collect_validation_errors(src, &mut error_list); // Filter and format errors properly - no instance for schema compilation format_drop_errors(error_list, &schema_value) } _ => { // Other compilation errors vec![json!({ "code": "SCHEMA_COMPILATION_FAILED", "message": format!("Schema '{}' compilation failed", schema_id), "details": { "path": schema_path, "cause": format!("{:?}", e) } })] } }; JsonB(json!({ "errors": errors })) } } } #[pg_extern(strict, parallel_safe)] fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { let cache = SCHEMA_CACHE.read().unwrap(); // Lookup uses the original schema_id match cache.id_to_index.get(schema_id) { None => JsonB(json!({ "errors": [{ "code": "SCHEMA_NOT_FOUND", "message": format!("Schema '{}' not found in cache", schema_id), "details": { "cause": "Schema must be cached before validation" } }] })), Some(sch_index) => { let instance_value: Value = instance.0; match cache.schemas.validate(&instance_value, *sch_index) { Ok(_) => JsonB(json!({ "response": "success" })), Err(validation_error) => { let mut error_list = Vec::new(); collect_validation_errors(&validation_error, &mut error_list); let errors = format_drop_errors(error_list, &instance_value); JsonB(json!({ "errors": errors })) } } } } } // Recursively collects validation errors fn collect_validation_errors(error: &ValidationError, errors_list: &mut Vec<(String, String, String)>) { // Check if this is a structural error that we should skip let error_message = format!("{}", error.kind); let is_structural = error_message == "validation failed" || error_message == "allOf failed" || error_message == "anyOf failed" || error_message == "not failed" || error_message.starts_with("oneOf failed"); if error.causes.is_empty() && !is_structural { // This is a leaf error that's not structural // Format just the error kind, not the whole validation error let message = format!("{}", error.kind); errors_list.push(( error.instance_location.to_string(), error.schema_url.to_string(), message )); } else { // Recurse into causes for cause in &error.causes { collect_validation_errors(cause, errors_list); } } } // Formats errors according to DropError structure fn format_drop_errors(raw_errors: Vec<(String, String, String)>, instance: &Value) -> Vec { use std::collections::HashMap; use std::collections::hash_map::Entry; // We don't filter structural paths from instance paths anymore // because instance paths shouldn't contain these segments anyway // The issue was likely with schema paths, not instance paths let plausible_errors = raw_errors; // 2. Deduplicate by instance_path and format as DropError let mut unique_errors: HashMap = HashMap::new(); for (instance_path, schema_path, message) in plausible_errors { if let Entry::Vacant(entry) = unique_errors.entry(instance_path.clone()) { // Convert message to error code and make it human readable let (code, human_message) = enhance_error_message(&message); // Extract the failing value from the instance let failing_value = extract_value_at_path(instance, &instance_path); entry.insert(json!({ "code": code, "message": human_message, "details": { "path": schema_path, "context": json!({ "instance_path": instance_path, "failing_value": failing_value }), "cause": message // Original error message } })); } } unique_errors.into_values().collect() } // Helper function to extract value at a JSON pointer path fn extract_value_at_path(instance: &Value, path: &str) -> Value { let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); let mut current = instance; for part in parts { match current { Value::Object(map) => { if let Some(value) = map.get(part) { current = value; } else { return Value::Null; } } Value::Array(arr) => { if let Ok(index) = part.parse::() { if let Some(value) = arr.get(index) { current = value; } else { return Value::Null; } } else { return Value::Null; } } _ => return Value::Null, } } current.clone() } // Helper to convert validation messages to error codes and human-readable messages fn enhance_error_message(message: &str) -> (String, String) { // Match exact boon error message patterns let trimmed = message.trim(); if trimmed.contains("value must be one of") { ("ENUM_VIOLATED".to_string(), "Value is not one of the allowed options".to_string()) } else if trimmed.contains("length must be >=") && trimmed.contains("but got") { ("MIN_LENGTH_VIOLATED".to_string(), "Field length is below the minimum required".to_string()) } else if trimmed.contains("length must be <=") && trimmed.contains("but got") { ("MAX_LENGTH_VIOLATED".to_string(), "Field length exceeds the maximum allowed".to_string()) } else if trimmed.contains("must be >=") && trimmed.contains("but got") { ("MINIMUM_VIOLATED".to_string(), "Value is below the minimum allowed".to_string()) } else if trimmed.contains("must be <=") && trimmed.contains("but got") { ("MAXIMUM_VIOLATED".to_string(), "Value exceeds the maximum allowed".to_string()) } else if trimmed.contains("must be >") && trimmed.contains("but got") { ("EXCLUSIVE_MINIMUM_VIOLATED".to_string(), "Value must be greater than the minimum".to_string()) } else if trimmed.contains("must be <") && trimmed.contains("but got") { ("EXCLUSIVE_MAXIMUM_VIOLATED".to_string(), "Value must be less than the maximum".to_string()) } else if trimmed.contains("does not match pattern") { ("PATTERN_VIOLATED".to_string(), "Value does not match the required pattern".to_string()) } else if trimmed.contains("missing properties") { ("REQUIRED_FIELD_MISSING".to_string(), "Required field is missing".to_string()) } else if trimmed.contains("want") && trimmed.contains("but got") { ("TYPE_MISMATCH".to_string(), "Field type does not match the expected type".to_string()) } else if trimmed.starts_with("value must be") && !trimmed.contains("one of") { ("CONST_VIOLATED".to_string(), "Value does not match the required constant".to_string()) } else if trimmed.contains("is not valid") && trimmed.contains(":") { ("FORMAT_INVALID".to_string(), extract_format_message(trimmed)) } else if trimmed.contains("items at") && trimmed.contains("are equal") { ("UNIQUE_ITEMS_VIOLATED".to_string(), "Array contains duplicate items".to_string()) } else if trimmed.contains("additionalProperties") && trimmed.contains("not allowed") { ("ADDITIONAL_PROPERTIES_NOT_ALLOWED".to_string(), "Object contains properties that are not allowed".to_string()) } else if trimmed.contains("is not multipleOf") { ("MULTIPLE_OF_VIOLATED".to_string(), "Value is not a multiple of the required factor".to_string()) } else if trimmed.contains("minimum") && trimmed.contains("properties required") { ("MIN_PROPERTIES_VIOLATED".to_string(), "Object has fewer properties than required".to_string()) } else if trimmed.contains("maximum") && trimmed.contains("properties required") { ("MAX_PROPERTIES_VIOLATED".to_string(), "Object has more properties than allowed".to_string()) } else if trimmed.contains("minimum") && trimmed.contains("items required") { ("MIN_ITEMS_VIOLATED".to_string(), "Array has fewer items than required".to_string()) } else if trimmed.contains("maximum") && trimmed.contains("items required") { ("MAX_ITEMS_VIOLATED".to_string(), "Array has more items than allowed".to_string()) } else if trimmed == "false schema" { ("FALSE_SCHEMA".to_string(), "Schema validation always fails".to_string()) } else if trimmed == "not failed" { ("NOT_VIOLATED".to_string(), "Value matched a schema it should not match".to_string()) } else if trimmed == "allOf failed" { ("ALL_OF_VIOLATED".to_string(), "Value does not match all required schemas".to_string()) } else if trimmed == "anyOf failed" { ("ANY_OF_VIOLATED".to_string(), "Value does not match any of the allowed schemas".to_string()) } else if trimmed.contains("oneOf failed") { ("ONE_OF_VIOLATED".to_string(), "Value must match exactly one schema".to_string()) } else if trimmed == "validation failed" { ("VALIDATION_FAILED".to_string(), "Validation failed".to_string()) } else { // For any unmatched patterns, try to provide a generic human-readable message // while preserving the original error in details.cause ("VALIDATION_FAILED".to_string(), "Validation failed".to_string()) } } // Extract a better format message fn extract_format_message(message: &str) -> String { if message.contains("date-time") { "Invalid date-time format".to_string() } else if message.contains("email") { "Invalid email format".to_string() } else if message.contains("uri") { "Invalid URI format".to_string() } else if message.contains("uuid") { "Invalid UUID format".to_string() } else { "Invalid format".to_string() } } #[pg_extern(strict, parallel_safe)] fn json_schema_cached(schema_id: &str) -> bool { let cache = SCHEMA_CACHE.read().unwrap(); cache.id_to_index.contains_key(schema_id) } #[pg_extern(strict)] fn clear_json_schemas() -> JsonB { let mut cache = SCHEMA_CACHE.write().unwrap(); *cache = BoonCache { schemas: Schemas::new(), id_to_index: HashMap::new(), }; JsonB(json!({ "response": "success" })) } #[pg_extern(strict, parallel_safe)] fn show_json_schemas() -> JsonB { let cache = SCHEMA_CACHE.read().unwrap(); let ids: Vec = cache.id_to_index.keys().cloned().collect(); JsonB(json!({ "response": ids })) } /// This module is required by `cargo pgrx test` invocations. /// It must be visible at the root of your extension crate. #[cfg(test)] pub mod pg_test { pub fn setup(_options: Vec<&str>) { // perform one-off initialization when the pg_test framework starts } #[must_use] pub fn postgresql_conf_options() -> Vec<&'static str> { // return any postgresql.conf settings that are required for your tests vec![] } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { include!("tests.rs"); }