use pgrx::*; pg_module_magic!(); use serde_json::{json, Value}; use std::{collections::HashMap, sync::RwLock}; use boon::{Compiler, Schemas, ValidationError, SchemaIndex, CompileError}; use lazy_static::lazy_static; struct BoonCache { schemas: Schemas, id_to_index: HashMap, } lazy_static! { static ref SCHEMA_CACHE: RwLock = RwLock::new(BoonCache { schemas: Schemas::new(), id_to_index: HashMap::new(), }); } #[pg_extern(strict)] fn cache_json_schema(schema_id: &str, schema: JsonB) -> JsonB { let mut cache = SCHEMA_CACHE.write().unwrap(); let schema_value: Value = schema.0; let mut compiler = Compiler::new(); compiler.enable_format_assertions(); if let Err(e) = compiler.add_resource(schema_id, schema_value) { return JsonB(json!({ "success": false, "error": { "kind": "SchemaResourceError", "message": format!("Failed to add schema resource: {}", e), "schema_id": schema_id } })); } match compiler.compile(schema_id, &mut cache.schemas) { Ok(sch_index) => { cache.id_to_index.insert(schema_id.to_string(), sch_index); JsonB(json!({ "success": true })) } Err(e) => { // Enhance error reporting by matching on the CompileError variant let error_details = match &e { CompileError::ValidationError { url, src } => { // Metaschema validation failed - provide more detail json!({ "kind": "SchemaCompilationError", "sub_kind": "ValidationError", // Explicitly state it's a metaschema validation error "message": format!("Schema failed validation against its metaschema: {}", src), "schema_id": schema_id, "failed_at_url": url, "validation_details": format!("{:?}", src), // Include full debug info of the validation error }) } // Handle other potential compilation errors _ => { let error_type = format!("{:?}", e).split('(').next().unwrap_or("Unknown").to_string(); json!({ "kind": "SchemaCompilationError", "sub_kind": error_type, // e.g., "InvalidJsonPointer", "UnsupportedUrlScheme" "message": format!("Schema compilation failed: {}", e), "schema_id": schema_id, "details": format!("{:?}", e), // Generic debug info }) } }; JsonB(json!({ "success": false, "error": error_details })) } } } #[pg_extern(strict, parallel_safe)] fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { let cache = SCHEMA_CACHE.read().unwrap(); match cache.id_to_index.get(schema_id) { None => JsonB(json!({ "success": false, "error": { "kind": "SchemaNotFound", "message": format!("Schema with id '{}' not found in cache", schema_id) } })), Some(sch_index) => { let instance_value: Value = instance.0; match cache.schemas.validate(&instance_value, *sch_index) { Ok(_) => JsonB(json!({ "success": true })), Err(validation_error) => { let error = format_validation_error(&validation_error); JsonB(json!({ "success": false, "error": error })) } } } } } fn format_validation_error(error: &ValidationError) -> Value { json!({ "instance_path": error.instance_location.to_string(), "schema_path": error.schema_url.to_string(), "kind": format!("{:?}", error.kind), "message": format!("{}", error), "error": error .causes .iter() .map(format_validation_error) .collect::>() }) } #[pg_extern(strict, parallel_safe)] fn json_schema_cached(schema_id: &str) -> bool { let cache = SCHEMA_CACHE.read().unwrap(); cache.id_to_index.contains_key(schema_id) } #[pg_extern(strict)] fn clear_json_schemas() { let mut cache = SCHEMA_CACHE.write().unwrap(); *cache = BoonCache { schemas: Schemas::new(), id_to_index: HashMap::new(), }; } #[pg_extern(strict, parallel_safe)] fn show_json_schemas() -> Vec { let cache = SCHEMA_CACHE.read().unwrap(); let ids: Vec = cache.id_to_index.keys().cloned().collect(); ids } #[pg_schema] #[cfg(any(test, feature = "pg_test"))] mod tests { use pgrx::*; use pgrx::pg_test; use super::*; use serde_json::json; fn jsonb(val: Value) -> JsonB { JsonB(val) } fn setup_test() { clear_json_schemas(); } #[pg_test] fn test_cache_and_validate_json_schema() { setup_test(); let schema_id = "my_schema"; let schema = json!({ "type": "object", "properties": { "name": { "type": "string" }, "age": { "type": "integer", "minimum": 0 } }, "required": ["name", "age"] }); let valid_instance = json!({ "name": "Alice", "age": 30 }); let invalid_instance_type = json!({ "name": "Bob", "age": -5 }); let invalid_instance_missing = json!({ "name": "Charlie" }); let cache_result = cache_json_schema(schema_id, jsonb(schema.clone())); assert!(cache_result.0["success"].as_bool().unwrap()); let valid_result = validate_json_schema(schema_id, jsonb(valid_instance)); assert!(valid_result.0["success"].as_bool().unwrap()); let invalid_result_type = validate_json_schema(schema_id, jsonb(invalid_instance_type)); assert!(!invalid_result_type.0["success"].as_bool().unwrap()); let error_obj_type = invalid_result_type.0.get("error").expect("Expected top-level 'error' object"); let causes_age = error_obj_type.get("error").and_then(Value::as_array).expect("Expected nested 'error' array (causes)"); assert!(!causes_age.is_empty(), "Expected causes for invalid age"); let first_cause_age = &causes_age[0]; assert!(first_cause_age["kind"].as_str().unwrap().contains("Minimum"), "Kind '{}' should contain Minimum", first_cause_age["kind"]); let msg = first_cause_age["message"].as_str().unwrap_or(""); assert!(msg.contains("must be >=0"), "Error message mismatch for age minimum: {}", msg); let invalid_result_missing = validate_json_schema(schema_id, jsonb(invalid_instance_missing)); assert!(!invalid_result_missing.0["success"].as_bool().unwrap()); let error_obj_missing = invalid_result_missing.0.get("error").expect("Expected top-level 'error' object"); let causes_missing = error_obj_missing.get("error").and_then(Value::as_array).expect("Expected nested 'error' array (causes) for missing"); assert!(!causes_missing.is_empty(), "Expected causes for missing age"); let first_cause_missing = &causes_missing[0]; assert!(first_cause_missing["kind"].as_str().unwrap().contains("Required")); let msg_missing = first_cause_missing["message"].as_str().unwrap_or(""); assert!(msg_missing.contains("missing properties 'age'"), "Error message mismatch for missing 'age': {}", msg_missing); assert!(first_cause_missing["instance_path"] == "", "Expected empty instance path for missing field"); let non_existent_id = "non_existent_schema"; let invalid_schema_result = validate_json_schema(non_existent_id, jsonb(json!({}))); assert!(!invalid_schema_result.0["success"].as_bool().unwrap()); let schema_not_found_error = invalid_schema_result.0 .get("error") // Top level error object .expect("Expected top-level 'error' object for schema not found"); assert_eq!(schema_not_found_error["kind"], "SchemaNotFound"); assert!(schema_not_found_error["message"].as_str().unwrap().contains(non_existent_id)); } #[pg_test] fn test_validate_json_schema_not_cached() { setup_test(); let instance = json!({ "foo": "bar" }); let result = validate_json_schema("non_existent_schema", jsonb(instance)); assert!(!result.0["success"].as_bool().unwrap()); let error_obj = result.0.get("error").expect("Expected top-level 'error' object"); assert_eq!(error_obj["kind"], "SchemaNotFound"); assert!(error_obj["message"].as_str().unwrap().contains("non_existent_schema")); } #[pg_test] fn test_cache_invalid_json_schema() { setup_test(); let schema_id = "invalid_schema"; let invalid_schema_json = "{\"type\": \"string\" \"maxLength\": 5}"; let invalid_schema_value: Result = serde_json::from_str(invalid_schema_json); assert!(invalid_schema_value.is_err(), "Test setup assumes invalid JSON string"); let schema_representing_invalid = json!({ "type": 123 }); let result = cache_json_schema(schema_id, jsonb(schema_representing_invalid.clone())); assert!(!result.0["success"].as_bool().unwrap()); let error_obj = result.0.get("error").expect("Expected top-level 'error' object for compilation failure"); assert_eq!(error_obj.get("kind").and_then(Value::as_str), Some("SchemaCompilationError")); assert_eq!(error_obj.get("sub_kind").and_then(Value::as_str), Some("ValidationError"), "Expected sub_kind 'ValidationError' for metaschema failure"); assert!(error_obj.get("message").and_then(Value::as_str).is_some(), "Expected 'message' field in error object"); assert!(error_obj["message"].as_str().unwrap().contains("Schema failed validation against its metaschema"), "Error message mismatch"); assert_eq!(error_obj.get("schema_id").and_then(Value::as_str), Some(schema_id)); let failed_at_url = error_obj.get("failed_at_url").and_then(Value::as_str).expect("Expected 'failed_at_url' string"); assert!(failed_at_url.ends_with(&format!("{}#", schema_id)), "failed_at_url ('{}') should end with schema_id + '#' ('{}#')", failed_at_url, schema_id); assert!(error_obj.get("validation_details").and_then(Value::as_str).is_some(), "Expected 'validation_details' field"); } #[pg_test] fn test_validate_json_schema_detailed_validation_errors() { setup_test(); let schema_id = "detailed_schema"; let schema = json!({ "type": "object", "properties": { "address": { "type": "object", "properties": { "street": { "type": "string" }, "city": { "type": "string", "maxLength": 10 } }, "required": ["street", "city"] } }, "required": ["address"] }); let invalid_instance = json!({ "address": { "street": 123, "city": "Supercalifragilisticexpialidocious" } }); assert!(cache_json_schema(schema_id, jsonb(schema.clone())).0["success"].as_bool().unwrap()); let result = validate_json_schema(schema_id, jsonb(invalid_instance)); assert!(!result.0["success"].as_bool().unwrap()); let error_obj = result.0.get("error").expect("Expected top-level 'error' object"); let causes = error_obj.get("error").and_then(Value::as_array).expect("Expected nested 'error' array (causes)"); assert!(causes.len() >= 2, "Expected at least 2 detailed causes"); let street_error = causes.iter().find(|e| e["instance_path"] == "/address/street").expect("Missing street error"); assert!(street_error["kind"].as_str().unwrap().contains("Type"), "Kind '{}' should contain Type", street_error["kind"]); let street_msg = street_error["message"].as_str().unwrap_or("null"); assert!(street_msg.contains("want string, but got number"), "Street message mismatch: {}", street_msg); let city_error = causes.iter().find(|e| e["instance_path"] == "/address/city").expect("Missing city error"); assert!(city_error["kind"].as_str().unwrap().contains("MaxLength"), "Kind '{}' should contain MaxLength", city_error["kind"]); let city_msg = city_error["message"].as_str().unwrap_or("null"); assert!(city_msg.contains("length must be <=10"), "City message mismatch: {}", city_msg); assert_eq!(causes.len(), 2, "Expected exactly 2 errors (street type, city length)"); } #[pg_test] fn test_validate_json_schema_oneof_validation_errors() { setup_test(); let schema_id = "oneof_schema"; let schema = json!({ "oneOf": [ { "type": "object", "properties": { "string_prop": { "type": "string", "maxLength": 5 } }, "required": ["string_prop"] }, { "type": "object", "properties": { "number_prop": { "type": "number", "minimum": 10 } }, "required": ["number_prop"] } ] }); cache_json_schema(schema_id, jsonb(schema)); let invalid_string_instance = json!({ "string_prop": "toolongstring" }); let result_invalid_string = validate_json_schema(schema_id, jsonb(invalid_string_instance)); assert!(!result_invalid_string.0["success"].as_bool().unwrap()); let error_obj_string = result_invalid_string.0.get("error").expect("Expected top-level 'error' object"); assert!(error_obj_string["kind"].as_str().unwrap().contains("Schema"), "Top level kind '{}' should contain Schema for OneOf failure", error_obj_string["kind"]); assert!(error_obj_string["message"].as_str().unwrap().contains("oneOf failed, none matched"), "OneOf message mismatch: {}", error_obj_string["message"]); // Final adjustment let causes_string = error_obj_string.get("error").and_then(Value::as_array).expect("Expected nested 'error' array (causes)"); assert_eq!(causes_string.len(), 1, "Expected one cause for oneOf failure (string)"); let nested_causes_string = causes_string[0].get("error").and_then(Value::as_array).expect("Expected deeper nested causes for string oneOf"); assert_eq!(nested_causes_string.len(), 2, "Expected two nested causes for string oneOf"); let string_schema_fail = nested_causes_string.iter().find(|c| c["schema_path"].as_str().unwrap().ends_with("/oneOf/0/properties/string_prop")).expect("Missing nested cause for string schema"); assert_eq!(string_schema_fail["instance_path"].as_str().unwrap(), "/string_prop", "Instance path should be /string_prop"); assert!(string_schema_fail["kind"].as_str().unwrap().contains("MaxLength"), "Nested string cause kind should be MaxLength"); let number_schema_fail = nested_causes_string.iter().find(|c| c["schema_path"].as_str().unwrap().ends_with("/oneOf/1")).expect("Missing nested cause for number schema"); assert_eq!(number_schema_fail["instance_path"].as_str().unwrap(), "", "Instance path for branch 2 type mismatch should be empty"); assert!(number_schema_fail["kind"].as_str().unwrap().contains("Required"), "Nested number cause kind should be Required"); let invalid_number_instance = json!({ "number_prop": 5 }); let result_invalid_number = validate_json_schema(schema_id, jsonb(invalid_number_instance)); assert!(!result_invalid_number.0["success"].as_bool().unwrap()); let error_obj_number = result_invalid_number.0.get("error").expect("Expected top-level 'error' object"); assert!(error_obj_number["kind"].as_str().unwrap().contains("Schema"), "Top level kind '{}' should contain Schema for OneOf failure", error_obj_number["kind"]); assert!(error_obj_number["message"].as_str().unwrap().contains("oneOf failed, none matched"), "OneOf message mismatch: {}", error_obj_number["message"]); // Final adjustment let causes_number = error_obj_number.get("error").and_then(Value::as_array).expect("Expected nested 'error' array (causes)"); assert_eq!(causes_number.len(), 1, "Expected one cause for oneOf failure (number)"); let nested_causes_number = causes_number[0].get("error").and_then(Value::as_array).expect("Expected deeper nested causes for number oneOf"); assert_eq!(nested_causes_number.len(), 2, "Expected two nested causes for number oneOf"); let string_schema_fail_num = nested_causes_number.iter().find(|c| c["schema_path"].as_str().unwrap().ends_with("/oneOf/0")).expect("Missing nested cause for string schema (number case)"); assert_eq!(string_schema_fail_num["instance_path"].as_str().unwrap(), "", "Instance path for branch 1 type mismatch should be empty"); assert!(string_schema_fail_num["kind"].as_str().unwrap().contains("Required"), "Nested string cause kind should be Required (number case)"); let number_schema_fail_num = nested_causes_number.iter().find(|c| c["schema_path"].as_str().unwrap().ends_with("/oneOf/1/properties/number_prop")).expect("Missing nested cause for number schema (number case)"); assert_eq!(number_schema_fail_num["instance_path"].as_str().unwrap(), "/number_prop", "Instance path should be /number_prop (number case)"); assert!(number_schema_fail_num["kind"].as_str().unwrap().contains("Minimum"), "Nested number cause kind should be Minimum (number case)"); let invalid_bool_instance = json!({ "other_prop": true }); let result_invalid_bool = validate_json_schema(schema_id, jsonb(invalid_bool_instance)); assert!(!result_invalid_bool.0["success"].as_bool().unwrap()); let error_obj_bool = result_invalid_bool.0.get("error").expect("Expected top-level 'error' object"); assert!(error_obj_bool["kind"].as_str().unwrap().contains("Schema"), "Top level kind '{}' should contain Schema for OneOf failure", error_obj_bool["kind"]); assert!(error_obj_bool["message"].as_str().unwrap().contains("oneOf failed, none matched"), "OneOf message mismatch: {}", error_obj_bool["message"]); // Final adjustment let causes_bool = error_obj_bool.get("error").and_then(Value::as_array).expect("Expected nested 'error' array (causes)"); assert_eq!(causes_bool.len(), 1, "Expected one cause for oneOf failure (bool)"); let nested_causes_bool = causes_bool[0].get("error").and_then(Value::as_array).expect("Expected deeper nested causes for bool oneOf"); assert_eq!(nested_causes_bool.len(), 2, "Expected two nested causes for bool oneOf"); let bool_fail_0 = nested_causes_bool.iter().find(|c| c["schema_path"].as_str().unwrap().ends_with("/oneOf/0")).expect("Missing nested cause for branch 0 type fail"); assert_eq!(bool_fail_0["instance_path"].as_str().unwrap(), "", "Instance path for branch 0 type fail should be empty"); assert!(bool_fail_0["kind"].as_str().unwrap().contains("Required"), "Nested bool cause 0 kind should be Required"); let bool_fail_1 = nested_causes_bool.iter().find(|c| c["schema_path"].as_str().unwrap().ends_with("/oneOf/1")).expect("Missing nested cause for branch 1 type fail"); assert_eq!(bool_fail_1["instance_path"].as_str().unwrap(), "", "Instance path for branch 1 type fail should be empty"); assert!(bool_fail_1["kind"].as_str().unwrap().contains("Required"), "Nested bool cause 1 kind should be Required"); } #[pg_test] fn test_clear_json_schemas() { setup_test(); let schema_id = "schema_to_clear"; let schema = json!({ "type": "string" }); cache_json_schema(schema_id, jsonb(schema.clone())); let show_result1 = show_json_schemas(); assert!(show_result1.contains(&schema_id.to_string())); clear_json_schemas(); let show_result2 = show_json_schemas(); assert!(show_result2.is_empty()); let instance = json!("test"); let validate_result = validate_json_schema(schema_id, jsonb(instance)); assert!(!validate_result.0["success"].as_bool().unwrap()); let error_obj = validate_result.0.get("error").expect("Expected top-level 'error' object"); assert_eq!(error_obj["kind"], "SchemaNotFound"); assert!(error_obj["message"].as_str().unwrap().contains(schema_id)); } #[pg_test] fn test_show_json_schemas() { setup_test(); let schema_id1 = "schema1"; let schema_id2 = "schema2"; let schema = json!({ "type": "boolean" }); cache_json_schema(schema_id1, jsonb(schema.clone())); cache_json_schema(schema_id2, jsonb(schema.clone())); let result = show_json_schemas(); assert!(result.contains(&schema_id1.to_string())); assert!(result.contains(&schema_id2.to_string())); } } #[cfg(test)] pub mod pg_test { pub fn setup(_options: Vec<&str>) { // perform one-off initialization when the pg_test framework starts } pub fn postgresql_conf_options() -> Vec<&'static str> { // return any postgresql.conf settings that are required for your tests vec![] } }