From 23235d4b9dbb14ad39af6d467195f15d6f2ca5b1 Mon Sep 17 00:00:00 2001 From: Alex Groleau Date: Mon, 14 Apr 2025 18:08:36 -0400 Subject: [PATCH] -m switched to boon --- Cargo.lock | 27 +++ Cargo.toml | 2 + src/lib.rs | 543 ++++++++++++++++++++++++++++++++++------------------- 3 files changed, 380 insertions(+), 192 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7818a57..19e691f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -68,6 +68,12 @@ version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" +[[package]] +name = "appendlist" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e149dc73cd30538307e7ffa2acd3d2221148eaeed4871f246657b1c3eaa1cbd2" + [[package]] name = "async-trait" version = "0.1.88" @@ -177,6 +183,26 @@ dependencies = [ "generic-array", ] +[[package]] +name = "boon" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baa187da765010b70370368c49f08244b1ae5cae1d5d33072f76c8cb7112fe3e" +dependencies = [ + "ahash", + "appendlist", + "base64", + "fluent-uri", + "idna", + "once_cell", + "percent-encoding", + "regex", + "regex-syntax", + "serde", + "serde_json", + "url", +] + [[package]] name = "borrow-or-share" version = "0.2.2" @@ -1015,6 +1041,7 @@ dependencies = [ name = "jspg" version = "0.1.0" dependencies = [ + "boon", "jsonschema", "lazy_static", "pgrx", diff --git a/Cargo.toml b/Cargo.toml index c6dfd89..5e07655 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" jsonschema = "0.29.1" lazy_static = "1.5.0" +boon = "0.6.1" [dev-dependencies] pgrx-tests = "0.14.0" @@ -22,6 +23,7 @@ path = "src/bin/pgrx_embed.rs" [features] pg17 = ["pgrx/pg17", "pgrx-tests/pg17" ] +# Local feature flag used by `cargo pgrx test` pg_test = [] [profile.dev] diff --git a/src/lib.rs b/src/lib.rs index 5faf72e..9fdddfe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,237 +1,396 @@ use pgrx::*; -use jsonschema::{Draft, Validator}; -use serde_json::json; -use std::collections::HashMap; -use std::sync::RwLock; -use lazy_static::lazy_static; pg_module_magic!(); -// Global, thread-safe schema cache using the correct Validator type +use serde_json::{json, Value}; +use std::{collections::HashMap, sync::RwLock}; +use boon::{Compiler, Schemas, ValidationError, SchemaIndex}; +use lazy_static::lazy_static; + +struct BoonCache { + schemas: Schemas, + id_to_index: HashMap, +} + lazy_static! { - static ref SCHEMA_CACHE: RwLock> = RwLock::new(HashMap::new()); + static ref SCHEMA_CACHE: RwLock = RwLock::new(BoonCache { + schemas: Schemas::new(), + id_to_index: HashMap::new() + }); } -// Cache a schema explicitly with a provided ID -#[pg_extern(immutable, strict, parallel_safe)] +#[pg_extern(strict)] fn cache_schema(schema_id: &str, schema: JsonB) -> JsonB { - let schema_value = schema.0; + let mut cache = SCHEMA_CACHE.write().unwrap(); + let schema_value: Value = schema.0; - // Compile the schema using the builder pattern - match jsonschema::options() - .with_draft(Draft::Draft7) - .should_validate_formats(true) - .build(&schema_value) - { - Ok(compiled_schema) => { - // If compilation succeeds, add it to the cache - let mut cache = SCHEMA_CACHE.write().unwrap(); - cache.insert(schema_id.to_string(), compiled_schema); - JsonB(json!({ "success": true, "id": schema_id })) + let mut compiler = Compiler::new(); + compiler.enable_format_assertions(); + + let schema_url = format!("urn:jspg:{}", schema_id); + + if let Err(e) = compiler.add_resource(&schema_url, schema_value) { + return JsonB(json!({ + "success": false, + "error": format!("Failed to add schema resource '{}': {}", schema_id, e) + })); } - Err(e) => { - // If compilation fails, return an error - JsonB(json!({ - "success": false, - "error": format!("Failed to compile schema '{}': {}", schema_id, e) - })) + + match compiler.compile(&schema_url, &mut cache.schemas) { + Ok(sch_index) => { + cache.id_to_index.insert(schema_id.to_string(), sch_index); + JsonB(json!({ + "success": true, + "schema_id": schema_id, + "message": "Schema cached successfully." + })) + } + Err(e) => JsonB(json!({ + "success": false, + "schema_id": schema_id, + "error": format!("Schema compilation failed: {}", e) + })), } - } } -// Check if a schema is cached -#[pg_extern(immutable, strict, parallel_safe)] -fn schema_cached(schema_id: &str) -> bool { - SCHEMA_CACHE.read().unwrap().contains_key(schema_id) -} - -// Validate JSONB instance against a cached schema by ID -#[pg_extern(immutable, strict, parallel_safe)] +#[pg_extern(strict, parallel_safe)] fn validate_schema(schema_id: &str, instance: JsonB) -> JsonB { - let cache = SCHEMA_CACHE.read().unwrap(); - let compiled_schema: &Validator = match cache.get(schema_id) { - Some(schema) => schema, - None => { - // Return the 'schema not cached' error in the standard object format - let error_msg = format!("Schema ID '{}' not cached", schema_id); - return JsonB(json!({ - "valid": false, - "errors": [json!({ - "kind": "SchemaNotFound", // Custom kind for this case - "error": error_msg - })] - })); - } - }; - - let instance_value = instance.0; - // Use iter_errors() to get all validation errors - let errors_iterator = compiled_schema.iter_errors(&instance_value); - - // Collect errors into a vector first to check if any exist - let collected_errors_result: Vec<_> = errors_iterator.collect(); - - if collected_errors_result.is_empty() { - // No errors found, validation passed - JsonB(json!({ "valid": true })) - } else { - // Errors found, format them - let error_details = collect_all_errors(collected_errors_result.into_iter()); - JsonB(json!({ - "valid": false, - "errors": error_details - })) - } -} - -fn format_validation_error(error: &jsonschema::ValidationError) -> serde_json::Value { - json!({ - "instance_path": error.instance_path.to_string(), - "schema_path": error.schema_path.to_string(), - "kind": format!("{:?}", error.kind), - "error": error.to_string() - }) -} - -// Simplified: Collects all validation errors by formatting each one. -// Assumes the iterator provided by iter_errors() gives all necessary detail. -fn collect_all_errors<'a>( - errors: impl Iterator>, -) -> Vec { - errors.map(|e| format_validation_error(&e)).collect() -} - -// Show the IDs of all schemas currently in the cache -#[pg_extern(immutable, parallel_safe)] -fn show_schema_cache() -> Vec { let cache = SCHEMA_CACHE.read().unwrap(); - cache.keys().cloned().collect() + + match cache.id_to_index.get(schema_id) { + None => JsonB(json!({ + "success": false, + "errors": [{ + "kind": "SchemaNotFound", + "message": format!("Schema with id '{}' not found in cache", schema_id) + }] + })), + Some(sch_index) => { + let instance_value: Value = instance.0; + match cache.schemas.validate(&instance_value, *sch_index) { + Ok(_) => JsonB(json!({ "success": true })), + Err(validation_error) => { + let error_details = format_boon_errors(&validation_error); + JsonB(json!({ + "success": false, + "errors": [error_details] + })) + } + } + } + } } -// Clear the entire schema cache explicitly -#[pg_extern(immutable, parallel_safe)] -fn clear_schema_cache() -> bool { - SCHEMA_CACHE.write().unwrap().clear(); - true +fn format_boon_errors(error: &ValidationError) -> Value { + json!({ + "instance_path": error.instance_location.to_string(), + "schema_path": error.schema_url.to_string(), + "kind": format!("{:?}", error.kind), + "message": format!("{}", error), + "causes": error + .causes + .iter() + .map(format_boon_errors) + .collect::>() + }) +} + +#[pg_extern(strict)] +fn clear_schema_cache() -> JsonB { + let mut cache = SCHEMA_CACHE.write().unwrap(); + *cache = BoonCache { + schemas: Schemas::new(), + id_to_index: HashMap::new() + }; + JsonB(json!({ + "success": true, + "message": "Schema cache cleared." + })) +} + +#[pg_extern(strict, parallel_safe)] +fn show_schema_cache() -> JsonB { + let cache = SCHEMA_CACHE.read().unwrap(); + let ids: Vec<&String> = cache.id_to_index.keys().collect(); + JsonB(json!({ + "cached_schema_ids": ids + })) } #[pg_schema] #[cfg(any(test, feature = "pg_test"))] mod tests { - use pgrx::prelude::*; - use serde_json::json; - use pgrx::JsonB; // Import JsonB specifically for tests + use pgrx::*; + use pgrx::pg_test; + use super::*; + use serde_json::json; - // Helper to clear cache before tests that need it - fn setup_test() { - crate::clear_schema_cache(); - } + fn jsonb(val: Value) -> JsonB { + JsonB(val) + } - #[pg_test] - fn test_cache_and_validate_schema() { - setup_test(); - assert!(crate::cache_schema( - "test_schema", - JsonB(json!({ "type": "object" })) - ).0["success"] == json!(true)); - assert!(crate::schema_cached("test_schema")); + fn setup_test() { + clear_schema_cache(); + } - let result_valid = crate::validate_schema("test_schema", JsonB(json!({ "foo": "bar" }))); - assert_eq!(result_valid.0["valid"], true); + #[pg_test] + fn test_cache_and_validate_schema() { + setup_test(); + let schema_id = "my_schema"; + let schema = json!({ + "type": "object", + "properties": { + "name": { "type": "string" }, + "age": { "type": "integer", "minimum": 0 } + }, + "required": ["name", "age"] + }); + let valid_instance = json!({ "name": "Alice", "age": 30 }); + let invalid_instance_type = json!({ "name": "Bob", "age": -5 }); + let invalid_instance_missing = json!({ "name": "Charlie" }); - let result_invalid = crate::validate_schema("test_schema", JsonB(json!(42))); - assert_eq!(result_invalid.0["valid"], false); - assert!(result_invalid.0["errors"][0]["error"].as_str().unwrap().contains("is not of type \"object\"")); - } + let cache_result = cache_schema(schema_id, jsonb(schema.clone())); + assert!(cache_result.0["success"].as_bool().unwrap()); - #[pg_test] - fn test_schema_not_cached() { - setup_test(); - let result = crate::validate_schema("unknown_schema", JsonB(json!({}))); - assert_eq!(result.0["valid"], false); - assert!(result.0["errors"][0]["error"].as_str().unwrap().contains("not cached")); - } + let valid_result = validate_schema(schema_id, jsonb(valid_instance)); + assert!(valid_result.0["success"].as_bool().unwrap()); - #[pg_test] - fn test_clear_schema_cache() { - setup_test(); - crate::cache_schema("clear_test", JsonB(json!({ "type": "object" }))); - assert!(crate::schema_cached("clear_test")); + let invalid_result_type = validate_schema(schema_id, jsonb(invalid_instance_type)); + assert!(!invalid_result_type.0["success"].as_bool().unwrap()); - crate::clear_schema_cache(); - assert!(!crate::schema_cached("clear_test")); - } + // --- Assertions for invalid_result_type --- - #[pg_test] - fn test_invalid_schema_cache() { - setup_test(); - // Attempt to cache an invalid schema definition - let result = crate::cache_schema( - "bad_schema", - JsonB(json!({ "type": "unknown_type" })) - ); - assert!(result.0["success"] == json!(false), "Caching an invalid schema should fail"); - assert!(!crate::schema_cached("bad_schema")); - } + // Get top-level errors + let top_level_errors = invalid_result_type.0["errors"].as_array().expect("Top-level 'errors' should be an array"); + assert_eq!(top_level_errors.len(), 1, "Should have exactly one top-level error for invalid type"); - #[pg_test] - fn test_show_schema_cache() { - setup_test(); - assert!(crate::cache_schema("schema1", JsonB(json!({ "type": "string" }))).0["success"] == json!(true)); - assert!(crate::cache_schema("schema2", JsonB(json!({ "type": "number" }))).0["success"] == json!(true)); + // Get the first (and only) top-level error + let top_level_error = top_level_errors.get(0).expect("Should get the first top-level error"); - let mut cached_ids = crate::show_schema_cache(); - cached_ids.sort(); // Sort for deterministic comparison + // Check top-level error kind + assert!(top_level_error.get("kind").and_then(Value::as_str).map_or(false, |k| k.starts_with("Schema { url:")), + "Incorrect kind for top-level error. Expected 'Schema {{ url:'. Error: {:?}. All errors: {:?}", top_level_error, top_level_errors); - assert_eq!(cached_ids.len(), 2); - assert_eq!(cached_ids, vec!["schema1", "schema2"]); + // Get the 'causes' array from the top-level error + let causes_age = top_level_error.get("causes").and_then(Value::as_array).expect("Top-level error 'causes' should be an array"); + assert_eq!(causes_age.len(), 1, "Should have one cause for the age error"); - crate::clear_schema_cache(); - let empty_ids = crate::show_schema_cache(); - assert!(empty_ids.is_empty()); - } + // Get the actual age error from the 'causes' array + let age_error = causes_age.get(0).expect("Should have an error object in 'causes'"); + assert_eq!(age_error.get("instance_path").and_then(Value::as_str), Some("/age"), + "Incorrect instance_path for age error. Error: {:?}. All errors: {:?}", age_error, top_level_errors); - #[pg_test] - fn test_detailed_validation_errors() { - setup_test(); - let schema_id = "required_prop_schema"; - let schema = JsonB(json!({ - "title": "Test Required", - "type": "object", - "properties": { - "name": { "type": "string" }, - "age": { "type": "integer" } - }, - "required": ["name"] - })); + assert!(age_error.get("kind").and_then(Value::as_str).map_or(false, |k| k.starts_with("Minimum { got:")), + "Incorrect kind prefix for age error. Expected 'Minimum {{ got:'. Error: {:?}. All errors: {:?}", age_error, top_level_errors); - assert!(crate::cache_schema(schema_id, schema).0["success"] == json!(true)); + let expected_prefix = "at '/age': must be >=0"; + assert!(age_error.get("message") + .and_then(Value::as_str) + .map_or(false, |m| m.starts_with(expected_prefix)), + "Incorrect message prefix for age error. Expected prefix '{}'. Error: {:?}. All errors: {:?}", + expected_prefix, age_error, top_level_errors); - // Instance missing the required 'name' property - let invalid_instance = JsonB(json!({ "age": 30 })); - let result = crate::validate_schema(schema_id, invalid_instance); + let invalid_result_missing = validate_schema(schema_id, jsonb(invalid_instance_missing)); + assert!(!invalid_result_missing.0["success"].as_bool().unwrap(), "Validation should fail for missing required field"); - assert_eq!(result.0["valid"], false); - let errors = result.0["errors"].as_array().expect("Errors should be an array"); - assert_eq!(errors.len(), 1, "Should have exactly one error"); + // --- Assertions for invalid_result_missing --- - let error = &errors[0]; - eprintln!("Validation Error Details: {}", error); + // Get top-level errors + let top_level_errors_missing = invalid_result_missing.0["errors"].as_array().expect("Errors should be an array for missing field"); + assert_eq!(top_level_errors_missing.len(), 1, "Should have one top-level error for missing field"); - assert_eq!(error["instance_path"].as_str().unwrap(), "", "Instance path should be root"); - assert_eq!(error["schema_path"].as_str().unwrap(), "/required", "Schema path should point to required keyword"); - assert!(error["kind"].as_str().unwrap().contains("Required"), "Error kind should be Required"); - assert!(error["error"].as_str().unwrap().contains("is a required property"), "Error message mismatch"); - } + // Get the first (and only) top-level error + let top_error_missing = top_level_errors_missing.get(0).expect("Should get the first top-level missing field error"); + + // Check top-level error kind + assert!(top_error_missing.get("kind").and_then(Value::as_str).map_or(false, |k| k.starts_with("Schema { url:")), + "Incorrect kind for missing field top-level error. Error: {:?}. All errors: {:?}", top_error_missing, top_level_errors_missing); + + // Get the 'causes' array from the top-level error + let causes_missing = top_error_missing.get("causes").and_then(Value::as_array).expect("Causes should be an array for missing field error"); + assert_eq!(causes_missing.len(), 1, "Should have one cause for missing field"); + + // Get the actual missing field error from the 'causes' array + let missing_error = causes_missing.get(0).expect("Should have missing field error object in 'causes'"); + + // Assertions on the specific missing field error + assert_eq!(missing_error.get("instance_path").and_then(Value::as_str), Some(""), + "Incorrect instance_path for missing age error: {:?}", missing_error); + assert!(missing_error.get("kind").and_then(Value::as_str).map_or(false, |k| k.starts_with("Required { want: [\"age\"]")), + "Incorrect kind for missing age error. Expected prefix 'Required {{ want: [\"age\"] }}'. Error: {:?}", missing_error); + } + + #[pg_test] + fn test_schema_not_cached() { + setup_test(); + let instance = json!({ "foo": "bar" }); + let result = validate_schema("non_existent_schema", jsonb(instance)); + assert!(!result.0["success"].as_bool().unwrap()); + let errors = result.0["errors"].as_array().unwrap(); + assert_eq!(errors.len(), 1); + assert_eq!(errors[0]["kind"], json!("SchemaNotFound")); + assert!(errors[0]["message"].as_str().unwrap().contains("non_existent_schema")); + } + + #[pg_test] + fn test_invalid_schema_cache() { + setup_test(); + let schema_id = "invalid_schema"; + let invalid_schema_json = "{\"type\": \"string\" \"maxLength\": 5}"; + let invalid_schema_value: Result = serde_json::from_str(invalid_schema_json); + assert!(invalid_schema_value.is_err(), "Test setup assumes invalid JSON string"); + + let schema_representing_invalid = json!({ + "type": 123 + }); + + let result = cache_schema(schema_id, jsonb(schema_representing_invalid.clone())); + assert!(!result.0["success"].as_bool().unwrap()); + assert!(result.0["error"].as_str().unwrap().contains("Schema compilation failed")); + } + + #[pg_test] + fn test_detailed_validation_errors() { + setup_test(); + let schema_id = "detailed_schema"; + let schema = json!({ + "type": "object", + "properties": { + "address": { + "type": "object", + "properties": { + "street": { "type": "string" }, + "city": { "type": "string", "maxLength": 10 } + }, + "required": ["street", "city"] + } + }, + "required": ["address"] + }); + let invalid_instance = json!({ + "address": { + "city": "San Francisco Bay Area" + } + }); + + assert!(cache_schema(schema_id, jsonb(schema.clone())).0["success"].as_bool().unwrap()); + let result = validate_schema(schema_id, jsonb(invalid_instance)); + assert!(!result.0["success"].as_bool().unwrap()); + + let errors = result.0["errors"].as_array().expect("Errors should be an array"); + let top_error = errors.get(0).expect("Expected at least one top-level error object"); + let causes = top_error.get("causes").and_then(Value::as_array).expect("Expected causes array"); + + let has_required_street_error = causes.iter().any(|e| + e.get("instance_path").and_then(Value::as_str) == Some("/address") && // Check path inside cause + e.get("kind").and_then(Value::as_str).unwrap_or("").starts_with("Required { want:") && // Check kind prefix + e.get("kind").and_then(Value::as_str).unwrap_or("").contains("street") // Ensure 'street' is mentioned + ); + assert!(has_required_street_error, "Missing required 'street' error within causes. Actual errors: {:?}", errors); + + let has_maxlength_city_error = causes.iter().any(|e| // Check within causes + e.get("instance_path").and_then(Value::as_str) == Some("/address/city") && + e.get("kind").and_then(Value::as_str).unwrap_or("").starts_with("MaxLength { got:") // Check kind prefix + ); + assert!(has_maxlength_city_error, "Missing maxLength 'city' error within causes. Actual errors: {:?}", errors); + } + + #[pg_test] + fn test_oneof_validation_errors() { + setup_test(); + let schema_id = "oneof_schema"; + let schema = json!({ + "type": "object", + "properties": { + "value": { + "oneOf": [ + { "type": "string", "minLength": 5 }, + { "type": "number", "minimum": 10 } + ] + } + }, + "required": ["value"] + }); + assert!(cache_schema(schema_id, jsonb(schema.clone())).0["success"].as_bool().unwrap()); + + let invalid_instance = json!({ "value": "abc" }); + let result = validate_schema(schema_id, jsonb(invalid_instance)); + + assert!(!result.0["success"].as_bool().unwrap()); + + let errors_val = result.0["errors"].as_array().expect("Errors should be an array"); + let top_schema_error = errors_val.get(0).expect("Expected at least one top-level Schema error object"); + let schema_error_causes = top_schema_error.get("causes").and_then(Value::as_array).expect("Expected causes array for Schema error"); + + let oneof_error = schema_error_causes.iter().find(|e| { + e.get("kind").and_then(Value::as_str) == Some("OneOf(None)") && + e.get("instance_path").and_then(Value::as_str) == Some("/value") + }).expect("Could not find the OneOf(None) error for /value within Schema causes"); + + let oneof_causes = oneof_error.get("causes").and_then(Value::as_array) + .expect("Expected causes array for OneOf error"); + + let has_minlength_error = oneof_causes.iter().any(|e| // Check within OneOf causes + e.get("instance_path").and_then(Value::as_str) == Some("/value") && + e.get("kind").and_then(Value::as_str).unwrap_or("").starts_with("MinLength { got:") // Check kind prefix + ); + assert!(has_minlength_error, "Missing MinLength error within OneOf causes. Actual errors: {:?}", errors_val); + + let has_type_error = oneof_causes.iter().any(|e| // Check within OneOf causes + e.get("instance_path").and_then(Value::as_str) == Some("/value") && + e.get("kind").and_then(Value::as_str).unwrap_or("").starts_with("Type { got: String, want: Types") // More specific kind check + ); + assert!(has_type_error, "Missing Type error within OneOf causes. Actual errors: {:?}", errors_val); + } + + #[pg_test] + fn test_clear_schema_cache() { + setup_test(); + let schema_id = "schema_to_clear"; + let schema = json!({ "type": "string" }); + cache_schema(schema_id, jsonb(schema.clone())); + + let show_result1 = show_schema_cache(); + assert!(show_result1.0["cached_schema_ids"].as_array().unwrap().iter().any(|id| id.as_str() == Some(schema_id))); + + let clear_result = clear_schema_cache(); + assert!(clear_result.0["success"].as_bool().unwrap()); + + let show_result2 = show_schema_cache(); + assert!(show_result2.0["cached_schema_ids"].as_array().unwrap().is_empty()); + + let instance = json!("test"); + let validate_result = validate_schema(schema_id, jsonb(instance)); + assert!(!validate_result.0["success"].as_bool().unwrap()); + assert_eq!(validate_result.0["errors"].as_array().unwrap()[0]["kind"], json!("SchemaNotFound")); + } + + #[pg_test] + fn test_show_schema_cache() { + setup_test(); + let schema_id1 = "schema1"; + let schema_id2 = "schema2"; + let schema = json!({ "type": "boolean" }); + + cache_schema(schema_id1, jsonb(schema.clone())); + cache_schema(schema_id2, jsonb(schema.clone())); + + let result = show_schema_cache(); + let ids = result.0["cached_schema_ids"].as_array().unwrap(); + assert_eq!(ids.len(), 2); + assert!(ids.contains(&json!(schema_id1))); + assert!(ids.contains(&json!(schema_id2))); + } } #[cfg(test)] pub mod pg_test { - pub fn setup(_options: Vec<&str>) { - // Initialization if needed - } + pub fn setup(_options: Vec<&str>) { + // perform one-off initialization when the pg_test framework starts + } - pub fn postgresql_conf_options() -> Vec<&'static str> { - vec![] - } -} + pub fn postgresql_conf_options() -> Vec<&'static str> { + // return any postgresql.conf settings that are required for your tests + vec![] + } +} \ No newline at end of file