Compare commits

...

2 Commits
1.0.8 ... 1.0.9

Author SHA1 Message Date
48e74815d3 version: 1.0.9 2025-04-14 18:08:45 -04:00
23235d4b9d -m switched to boon 2025-04-14 18:08:36 -04:00
4 changed files with 381 additions and 193 deletions

27
Cargo.lock generated
View File

@ -68,6 +68,12 @@ version = "1.0.97"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f"
[[package]]
name = "appendlist"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e149dc73cd30538307e7ffa2acd3d2221148eaeed4871f246657b1c3eaa1cbd2"
[[package]] [[package]]
name = "async-trait" name = "async-trait"
version = "0.1.88" version = "0.1.88"
@ -177,6 +183,26 @@ dependencies = [
"generic-array", "generic-array",
] ]
[[package]]
name = "boon"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baa187da765010b70370368c49f08244b1ae5cae1d5d33072f76c8cb7112fe3e"
dependencies = [
"ahash",
"appendlist",
"base64",
"fluent-uri",
"idna",
"once_cell",
"percent-encoding",
"regex",
"regex-syntax",
"serde",
"serde_json",
"url",
]
[[package]] [[package]]
name = "borrow-or-share" name = "borrow-or-share"
version = "0.2.2" version = "0.2.2"
@ -1015,6 +1041,7 @@ dependencies = [
name = "jspg" name = "jspg"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"boon",
"jsonschema", "jsonschema",
"lazy_static", "lazy_static",
"pgrx", "pgrx",

View File

@ -9,6 +9,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
jsonschema = "0.29.1" jsonschema = "0.29.1"
lazy_static = "1.5.0" lazy_static = "1.5.0"
boon = "0.6.1"
[dev-dependencies] [dev-dependencies]
pgrx-tests = "0.14.0" pgrx-tests = "0.14.0"
@ -22,6 +23,7 @@ path = "src/bin/pgrx_embed.rs"
[features] [features]
pg17 = ["pgrx/pg17", "pgrx-tests/pg17" ] pg17 = ["pgrx/pg17", "pgrx-tests/pg17" ]
# Local feature flag used by `cargo pgrx test`
pg_test = [] pg_test = []
[profile.dev] [profile.dev]

View File

@ -1,237 +1,396 @@
use pgrx::*; use pgrx::*;
use jsonschema::{Draft, Validator};
use serde_json::json;
use std::collections::HashMap;
use std::sync::RwLock;
use lazy_static::lazy_static;
pg_module_magic!(); pg_module_magic!();
// Global, thread-safe schema cache using the correct Validator type use serde_json::{json, Value};
use std::{collections::HashMap, sync::RwLock};
use boon::{Compiler, Schemas, ValidationError, SchemaIndex};
use lazy_static::lazy_static;
struct BoonCache {
schemas: Schemas,
id_to_index: HashMap<String, SchemaIndex>,
}
lazy_static! { lazy_static! {
static ref SCHEMA_CACHE: RwLock<HashMap<String, Validator>> = RwLock::new(HashMap::new()); static ref SCHEMA_CACHE: RwLock<BoonCache> = RwLock::new(BoonCache {
schemas: Schemas::new(),
id_to_index: HashMap::new()
});
} }
// Cache a schema explicitly with a provided ID #[pg_extern(strict)]
#[pg_extern(immutable, strict, parallel_safe)]
fn cache_schema(schema_id: &str, schema: JsonB) -> JsonB { fn cache_schema(schema_id: &str, schema: JsonB) -> JsonB {
let schema_value = schema.0;
// Compile the schema using the builder pattern
match jsonschema::options()
.with_draft(Draft::Draft7)
.should_validate_formats(true)
.build(&schema_value)
{
Ok(compiled_schema) => {
// If compilation succeeds, add it to the cache
let mut cache = SCHEMA_CACHE.write().unwrap(); let mut cache = SCHEMA_CACHE.write().unwrap();
cache.insert(schema_id.to_string(), compiled_schema); let schema_value: Value = schema.0;
JsonB(json!({ "success": true, "id": schema_id }))
}
Err(e) => {
// If compilation fails, return an error
JsonB(json!({
"success": false,
"error": format!("Failed to compile schema '{}': {}", schema_id, e)
}))
}
}
}
// Check if a schema is cached let mut compiler = Compiler::new();
#[pg_extern(immutable, strict, parallel_safe)] compiler.enable_format_assertions();
fn schema_cached(schema_id: &str) -> bool {
SCHEMA_CACHE.read().unwrap().contains_key(schema_id)
}
// Validate JSONB instance against a cached schema by ID let schema_url = format!("urn:jspg:{}", schema_id);
#[pg_extern(immutable, strict, parallel_safe)]
fn validate_schema(schema_id: &str, instance: JsonB) -> JsonB { if let Err(e) = compiler.add_resource(&schema_url, schema_value) {
let cache = SCHEMA_CACHE.read().unwrap();
let compiled_schema: &Validator = match cache.get(schema_id) {
Some(schema) => schema,
None => {
// Return the 'schema not cached' error in the standard object format
let error_msg = format!("Schema ID '{}' not cached", schema_id);
return JsonB(json!({ return JsonB(json!({
"valid": false, "success": false,
"errors": [json!({ "error": format!("Failed to add schema resource '{}': {}", schema_id, e)
"kind": "SchemaNotFound", // Custom kind for this case
"error": error_msg
})]
})); }));
} }
};
let instance_value = instance.0; match compiler.compile(&schema_url, &mut cache.schemas) {
// Use iter_errors() to get all validation errors Ok(sch_index) => {
let errors_iterator = compiled_schema.iter_errors(&instance_value); cache.id_to_index.insert(schema_id.to_string(), sch_index);
// Collect errors into a vector first to check if any exist
let collected_errors_result: Vec<_> = errors_iterator.collect();
if collected_errors_result.is_empty() {
// No errors found, validation passed
JsonB(json!({ "valid": true }))
} else {
// Errors found, format them
let error_details = collect_all_errors(collected_errors_result.into_iter());
JsonB(json!({ JsonB(json!({
"valid": false, "success": true,
"errors": error_details "schema_id": schema_id,
"message": "Schema cached successfully."
}))
}
Err(e) => JsonB(json!({
"success": false,
"schema_id": schema_id,
"error": format!("Schema compilation failed: {}", e)
})),
}
}
#[pg_extern(strict, parallel_safe)]
fn validate_schema(schema_id: &str, instance: JsonB) -> JsonB {
let cache = SCHEMA_CACHE.read().unwrap();
match cache.id_to_index.get(schema_id) {
None => JsonB(json!({
"success": false,
"errors": [{
"kind": "SchemaNotFound",
"message": format!("Schema with id '{}' not found in cache", schema_id)
}]
})),
Some(sch_index) => {
let instance_value: Value = instance.0;
match cache.schemas.validate(&instance_value, *sch_index) {
Ok(_) => JsonB(json!({ "success": true })),
Err(validation_error) => {
let error_details = format_boon_errors(&validation_error);
JsonB(json!({
"success": false,
"errors": [error_details]
})) }))
} }
} }
}
}
}
fn format_validation_error(error: &jsonschema::ValidationError) -> serde_json::Value { fn format_boon_errors(error: &ValidationError) -> Value {
json!({ json!({
"instance_path": error.instance_path.to_string(), "instance_path": error.instance_location.to_string(),
"schema_path": error.schema_path.to_string(), "schema_path": error.schema_url.to_string(),
"kind": format!("{:?}", error.kind), "kind": format!("{:?}", error.kind),
"error": error.to_string() "message": format!("{}", error),
"causes": error
.causes
.iter()
.map(format_boon_errors)
.collect::<Vec<_>>()
}) })
} }
// Simplified: Collects all validation errors by formatting each one. #[pg_extern(strict)]
// Assumes the iterator provided by iter_errors() gives all necessary detail. fn clear_schema_cache() -> JsonB {
fn collect_all_errors<'a>( let mut cache = SCHEMA_CACHE.write().unwrap();
errors: impl Iterator<Item = jsonschema::ValidationError<'a>>, *cache = BoonCache {
) -> Vec<serde_json::Value> { schemas: Schemas::new(),
errors.map(|e| format_validation_error(&e)).collect() id_to_index: HashMap::new()
};
JsonB(json!({
"success": true,
"message": "Schema cache cleared."
}))
} }
// Show the IDs of all schemas currently in the cache #[pg_extern(strict, parallel_safe)]
#[pg_extern(immutable, parallel_safe)] fn show_schema_cache() -> JsonB {
fn show_schema_cache() -> Vec<String> {
let cache = SCHEMA_CACHE.read().unwrap(); let cache = SCHEMA_CACHE.read().unwrap();
cache.keys().cloned().collect() let ids: Vec<&String> = cache.id_to_index.keys().collect();
} JsonB(json!({
"cached_schema_ids": ids
// Clear the entire schema cache explicitly }))
#[pg_extern(immutable, parallel_safe)]
fn clear_schema_cache() -> bool {
SCHEMA_CACHE.write().unwrap().clear();
true
} }
#[pg_schema] #[pg_schema]
#[cfg(any(test, feature = "pg_test"))] #[cfg(any(test, feature = "pg_test"))]
mod tests { mod tests {
use pgrx::prelude::*; use pgrx::*;
use pgrx::pg_test;
use super::*;
use serde_json::json; use serde_json::json;
use pgrx::JsonB; // Import JsonB specifically for tests
// Helper to clear cache before tests that need it fn jsonb(val: Value) -> JsonB {
JsonB(val)
}
fn setup_test() { fn setup_test() {
crate::clear_schema_cache(); clear_schema_cache();
} }
#[pg_test] #[pg_test]
fn test_cache_and_validate_schema() { fn test_cache_and_validate_schema() {
setup_test(); setup_test();
assert!(crate::cache_schema( let schema_id = "my_schema";
"test_schema", let schema = json!({
JsonB(json!({ "type": "object" })) "type": "object",
).0["success"] == json!(true)); "properties": {
assert!(crate::schema_cached("test_schema")); "name": { "type": "string" },
"age": { "type": "integer", "minimum": 0 }
},
"required": ["name", "age"]
});
let valid_instance = json!({ "name": "Alice", "age": 30 });
let invalid_instance_type = json!({ "name": "Bob", "age": -5 });
let invalid_instance_missing = json!({ "name": "Charlie" });
let result_valid = crate::validate_schema("test_schema", JsonB(json!({ "foo": "bar" }))); let cache_result = cache_schema(schema_id, jsonb(schema.clone()));
assert_eq!(result_valid.0["valid"], true); assert!(cache_result.0["success"].as_bool().unwrap());
let result_invalid = crate::validate_schema("test_schema", JsonB(json!(42))); let valid_result = validate_schema(schema_id, jsonb(valid_instance));
assert_eq!(result_invalid.0["valid"], false); assert!(valid_result.0["success"].as_bool().unwrap());
assert!(result_invalid.0["errors"][0]["error"].as_str().unwrap().contains("is not of type \"object\""));
let invalid_result_type = validate_schema(schema_id, jsonb(invalid_instance_type));
assert!(!invalid_result_type.0["success"].as_bool().unwrap());
// --- Assertions for invalid_result_type ---
// Get top-level errors
let top_level_errors = invalid_result_type.0["errors"].as_array().expect("Top-level 'errors' should be an array");
assert_eq!(top_level_errors.len(), 1, "Should have exactly one top-level error for invalid type");
// Get the first (and only) top-level error
let top_level_error = top_level_errors.get(0).expect("Should get the first top-level error");
// Check top-level error kind
assert!(top_level_error.get("kind").and_then(Value::as_str).map_or(false, |k| k.starts_with("Schema { url:")),
"Incorrect kind for top-level error. Expected 'Schema {{ url:'. Error: {:?}. All errors: {:?}", top_level_error, top_level_errors);
// Get the 'causes' array from the top-level error
let causes_age = top_level_error.get("causes").and_then(Value::as_array).expect("Top-level error 'causes' should be an array");
assert_eq!(causes_age.len(), 1, "Should have one cause for the age error");
// Get the actual age error from the 'causes' array
let age_error = causes_age.get(0).expect("Should have an error object in 'causes'");
assert_eq!(age_error.get("instance_path").and_then(Value::as_str), Some("/age"),
"Incorrect instance_path for age error. Error: {:?}. All errors: {:?}", age_error, top_level_errors);
assert!(age_error.get("kind").and_then(Value::as_str).map_or(false, |k| k.starts_with("Minimum { got:")),
"Incorrect kind prefix for age error. Expected 'Minimum {{ got:'. Error: {:?}. All errors: {:?}", age_error, top_level_errors);
let expected_prefix = "at '/age': must be >=0";
assert!(age_error.get("message")
.and_then(Value::as_str)
.map_or(false, |m| m.starts_with(expected_prefix)),
"Incorrect message prefix for age error. Expected prefix '{}'. Error: {:?}. All errors: {:?}",
expected_prefix, age_error, top_level_errors);
let invalid_result_missing = validate_schema(schema_id, jsonb(invalid_instance_missing));
assert!(!invalid_result_missing.0["success"].as_bool().unwrap(), "Validation should fail for missing required field");
// --- Assertions for invalid_result_missing ---
// Get top-level errors
let top_level_errors_missing = invalid_result_missing.0["errors"].as_array().expect("Errors should be an array for missing field");
assert_eq!(top_level_errors_missing.len(), 1, "Should have one top-level error for missing field");
// Get the first (and only) top-level error
let top_error_missing = top_level_errors_missing.get(0).expect("Should get the first top-level missing field error");
// Check top-level error kind
assert!(top_error_missing.get("kind").and_then(Value::as_str).map_or(false, |k| k.starts_with("Schema { url:")),
"Incorrect kind for missing field top-level error. Error: {:?}. All errors: {:?}", top_error_missing, top_level_errors_missing);
// Get the 'causes' array from the top-level error
let causes_missing = top_error_missing.get("causes").and_then(Value::as_array).expect("Causes should be an array for missing field error");
assert_eq!(causes_missing.len(), 1, "Should have one cause for missing field");
// Get the actual missing field error from the 'causes' array
let missing_error = causes_missing.get(0).expect("Should have missing field error object in 'causes'");
// Assertions on the specific missing field error
assert_eq!(missing_error.get("instance_path").and_then(Value::as_str), Some(""),
"Incorrect instance_path for missing age error: {:?}", missing_error);
assert!(missing_error.get("kind").and_then(Value::as_str).map_or(false, |k| k.starts_with("Required { want: [\"age\"]")),
"Incorrect kind for missing age error. Expected prefix 'Required {{ want: [\"age\"] }}'. Error: {:?}", missing_error);
} }
#[pg_test] #[pg_test]
fn test_schema_not_cached() { fn test_schema_not_cached() {
setup_test(); setup_test();
let result = crate::validate_schema("unknown_schema", JsonB(json!({}))); let instance = json!({ "foo": "bar" });
assert_eq!(result.0["valid"], false); let result = validate_schema("non_existent_schema", jsonb(instance));
assert!(result.0["errors"][0]["error"].as_str().unwrap().contains("not cached")); assert!(!result.0["success"].as_bool().unwrap());
} let errors = result.0["errors"].as_array().unwrap();
assert_eq!(errors.len(), 1);
#[pg_test] assert_eq!(errors[0]["kind"], json!("SchemaNotFound"));
fn test_clear_schema_cache() { assert!(errors[0]["message"].as_str().unwrap().contains("non_existent_schema"));
setup_test();
crate::cache_schema("clear_test", JsonB(json!({ "type": "object" })));
assert!(crate::schema_cached("clear_test"));
crate::clear_schema_cache();
assert!(!crate::schema_cached("clear_test"));
} }
#[pg_test] #[pg_test]
fn test_invalid_schema_cache() { fn test_invalid_schema_cache() {
setup_test(); setup_test();
// Attempt to cache an invalid schema definition let schema_id = "invalid_schema";
let result = crate::cache_schema( let invalid_schema_json = "{\"type\": \"string\" \"maxLength\": 5}";
"bad_schema", let invalid_schema_value: Result<Value, _> = serde_json::from_str(invalid_schema_json);
JsonB(json!({ "type": "unknown_type" })) assert!(invalid_schema_value.is_err(), "Test setup assumes invalid JSON string");
);
assert!(result.0["success"] == json!(false), "Caching an invalid schema should fail");
assert!(!crate::schema_cached("bad_schema"));
}
#[pg_test] let schema_representing_invalid = json!({
fn test_show_schema_cache() { "type": 123
setup_test(); });
assert!(crate::cache_schema("schema1", JsonB(json!({ "type": "string" }))).0["success"] == json!(true));
assert!(crate::cache_schema("schema2", JsonB(json!({ "type": "number" }))).0["success"] == json!(true));
let mut cached_ids = crate::show_schema_cache(); let result = cache_schema(schema_id, jsonb(schema_representing_invalid.clone()));
cached_ids.sort(); // Sort for deterministic comparison assert!(!result.0["success"].as_bool().unwrap());
assert!(result.0["error"].as_str().unwrap().contains("Schema compilation failed"));
assert_eq!(cached_ids.len(), 2);
assert_eq!(cached_ids, vec!["schema1", "schema2"]);
crate::clear_schema_cache();
let empty_ids = crate::show_schema_cache();
assert!(empty_ids.is_empty());
} }
#[pg_test] #[pg_test]
fn test_detailed_validation_errors() { fn test_detailed_validation_errors() {
setup_test(); setup_test();
let schema_id = "required_prop_schema"; let schema_id = "detailed_schema";
let schema = JsonB(json!({ let schema = json!({
"title": "Test Required",
"type": "object", "type": "object",
"properties": { "properties": {
"name": { "type": "string" }, "address": {
"age": { "type": "integer" } "type": "object",
"properties": {
"street": { "type": "string" },
"city": { "type": "string", "maxLength": 10 }
}, },
"required": ["name"] "required": ["street", "city"]
})); }
},
"required": ["address"]
});
let invalid_instance = json!({
"address": {
"city": "San Francisco Bay Area"
}
});
assert!(crate::cache_schema(schema_id, schema).0["success"] == json!(true)); assert!(cache_schema(schema_id, jsonb(schema.clone())).0["success"].as_bool().unwrap());
let result = validate_schema(schema_id, jsonb(invalid_instance));
assert!(!result.0["success"].as_bool().unwrap());
// Instance missing the required 'name' property
let invalid_instance = JsonB(json!({ "age": 30 }));
let result = crate::validate_schema(schema_id, invalid_instance);
assert_eq!(result.0["valid"], false);
let errors = result.0["errors"].as_array().expect("Errors should be an array"); let errors = result.0["errors"].as_array().expect("Errors should be an array");
assert_eq!(errors.len(), 1, "Should have exactly one error"); let top_error = errors.get(0).expect("Expected at least one top-level error object");
let causes = top_error.get("causes").and_then(Value::as_array).expect("Expected causes array");
let error = &errors[0]; let has_required_street_error = causes.iter().any(|e|
eprintln!("Validation Error Details: {}", error); e.get("instance_path").and_then(Value::as_str) == Some("/address") && // Check path inside cause
e.get("kind").and_then(Value::as_str).unwrap_or("").starts_with("Required { want:") && // Check kind prefix
e.get("kind").and_then(Value::as_str).unwrap_or("").contains("street") // Ensure 'street' is mentioned
);
assert!(has_required_street_error, "Missing required 'street' error within causes. Actual errors: {:?}", errors);
assert_eq!(error["instance_path"].as_str().unwrap(), "", "Instance path should be root"); let has_maxlength_city_error = causes.iter().any(|e| // Check within causes
assert_eq!(error["schema_path"].as_str().unwrap(), "/required", "Schema path should point to required keyword"); e.get("instance_path").and_then(Value::as_str) == Some("/address/city") &&
assert!(error["kind"].as_str().unwrap().contains("Required"), "Error kind should be Required"); e.get("kind").and_then(Value::as_str).unwrap_or("").starts_with("MaxLength { got:") // Check kind prefix
assert!(error["error"].as_str().unwrap().contains("is a required property"), "Error message mismatch"); );
assert!(has_maxlength_city_error, "Missing maxLength 'city' error within causes. Actual errors: {:?}", errors);
}
#[pg_test]
fn test_oneof_validation_errors() {
setup_test();
let schema_id = "oneof_schema";
let schema = json!({
"type": "object",
"properties": {
"value": {
"oneOf": [
{ "type": "string", "minLength": 5 },
{ "type": "number", "minimum": 10 }
]
}
},
"required": ["value"]
});
assert!(cache_schema(schema_id, jsonb(schema.clone())).0["success"].as_bool().unwrap());
let invalid_instance = json!({ "value": "abc" });
let result = validate_schema(schema_id, jsonb(invalid_instance));
assert!(!result.0["success"].as_bool().unwrap());
let errors_val = result.0["errors"].as_array().expect("Errors should be an array");
let top_schema_error = errors_val.get(0).expect("Expected at least one top-level Schema error object");
let schema_error_causes = top_schema_error.get("causes").and_then(Value::as_array).expect("Expected causes array for Schema error");
let oneof_error = schema_error_causes.iter().find(|e| {
e.get("kind").and_then(Value::as_str) == Some("OneOf(None)") &&
e.get("instance_path").and_then(Value::as_str) == Some("/value")
}).expect("Could not find the OneOf(None) error for /value within Schema causes");
let oneof_causes = oneof_error.get("causes").and_then(Value::as_array)
.expect("Expected causes array for OneOf error");
let has_minlength_error = oneof_causes.iter().any(|e| // Check within OneOf causes
e.get("instance_path").and_then(Value::as_str) == Some("/value") &&
e.get("kind").and_then(Value::as_str).unwrap_or("").starts_with("MinLength { got:") // Check kind prefix
);
assert!(has_minlength_error, "Missing MinLength error within OneOf causes. Actual errors: {:?}", errors_val);
let has_type_error = oneof_causes.iter().any(|e| // Check within OneOf causes
e.get("instance_path").and_then(Value::as_str) == Some("/value") &&
e.get("kind").and_then(Value::as_str).unwrap_or("").starts_with("Type { got: String, want: Types") // More specific kind check
);
assert!(has_type_error, "Missing Type error within OneOf causes. Actual errors: {:?}", errors_val);
}
#[pg_test]
fn test_clear_schema_cache() {
setup_test();
let schema_id = "schema_to_clear";
let schema = json!({ "type": "string" });
cache_schema(schema_id, jsonb(schema.clone()));
let show_result1 = show_schema_cache();
assert!(show_result1.0["cached_schema_ids"].as_array().unwrap().iter().any(|id| id.as_str() == Some(schema_id)));
let clear_result = clear_schema_cache();
assert!(clear_result.0["success"].as_bool().unwrap());
let show_result2 = show_schema_cache();
assert!(show_result2.0["cached_schema_ids"].as_array().unwrap().is_empty());
let instance = json!("test");
let validate_result = validate_schema(schema_id, jsonb(instance));
assert!(!validate_result.0["success"].as_bool().unwrap());
assert_eq!(validate_result.0["errors"].as_array().unwrap()[0]["kind"], json!("SchemaNotFound"));
}
#[pg_test]
fn test_show_schema_cache() {
setup_test();
let schema_id1 = "schema1";
let schema_id2 = "schema2";
let schema = json!({ "type": "boolean" });
cache_schema(schema_id1, jsonb(schema.clone()));
cache_schema(schema_id2, jsonb(schema.clone()));
let result = show_schema_cache();
let ids = result.0["cached_schema_ids"].as_array().unwrap();
assert_eq!(ids.len(), 2);
assert!(ids.contains(&json!(schema_id1)));
assert!(ids.contains(&json!(schema_id2)));
} }
} }
#[cfg(test)] #[cfg(test)]
pub mod pg_test { pub mod pg_test {
pub fn setup(_options: Vec<&str>) { pub fn setup(_options: Vec<&str>) {
// Initialization if needed // perform one-off initialization when the pg_test framework starts
} }
pub fn postgresql_conf_options() -> Vec<&'static str> { pub fn postgresql_conf_options() -> Vec<&'static str> {
// return any postgresql.conf settings that are required for your tests
vec![] vec![]
} }
} }

View File

@ -1 +1 @@
1.0.8 1.0.9