356 lines
13 KiB
Rust
356 lines
13 KiB
Rust
use pgrx::*;
|
|
|
|
pg_module_magic!();
|
|
|
|
use serde_json::{json, Value};
|
|
use std::{collections::HashMap, sync::RwLock};
|
|
use boon::{Compiler, Schemas, ValidationError, SchemaIndex, CompileError};
|
|
use lazy_static::lazy_static;
|
|
|
|
struct BoonCache {
|
|
schemas: Schemas,
|
|
id_to_index: HashMap<String, SchemaIndex>,
|
|
}
|
|
|
|
lazy_static! {
|
|
static ref SCHEMA_CACHE: RwLock<BoonCache> = RwLock::new(BoonCache {
|
|
schemas: Schemas::new(),
|
|
id_to_index: HashMap::new(),
|
|
});
|
|
}
|
|
|
|
#[pg_extern(strict)]
|
|
fn cache_json_schema(schema_id: &str, schema: JsonB) -> JsonB {
|
|
let mut cache = SCHEMA_CACHE.write().unwrap();
|
|
let schema_value: Value = schema.0;
|
|
let schema_path = format!("urn:{}", schema_id);
|
|
|
|
let mut compiler = Compiler::new();
|
|
compiler.enable_format_assertions();
|
|
|
|
// Use schema_path when adding the resource
|
|
if let Err(e) = compiler.add_resource(&schema_path, schema_value.clone()) {
|
|
return JsonB(json!({
|
|
"errors": [{
|
|
"code": "SCHEMA_RESOURCE_ADD_FAILED",
|
|
"message": format!("Failed to add schema resource '{}'", schema_id),
|
|
"details": {
|
|
"path": schema_path,
|
|
"cause": format!("{}", e)
|
|
}
|
|
}]
|
|
}));
|
|
}
|
|
|
|
// Use schema_path when compiling
|
|
match compiler.compile(&schema_path, &mut cache.schemas) {
|
|
Ok(sch_index) => {
|
|
// Store the index using the original schema_id as the key
|
|
cache.id_to_index.insert(schema_id.to_string(), sch_index);
|
|
JsonB(json!({ "response": "success" }))
|
|
}
|
|
Err(e) => {
|
|
let errors = match &e {
|
|
CompileError::ValidationError { url: _url, src } => {
|
|
// Collect leaf errors from the meta-schema validation failure
|
|
let mut error_list = Vec::new();
|
|
collect_validation_errors(src, &mut error_list);
|
|
// Filter and format errors properly - no instance for schema compilation
|
|
format_drop_errors(error_list, &schema_value)
|
|
}
|
|
_ => {
|
|
// Other compilation errors
|
|
vec![json!({
|
|
"code": "SCHEMA_COMPILATION_FAILED",
|
|
"message": format!("Schema '{}' compilation failed", schema_id),
|
|
"details": {
|
|
"path": schema_path,
|
|
"cause": format!("{:?}", e)
|
|
}
|
|
})]
|
|
}
|
|
};
|
|
JsonB(json!({ "errors": errors }))
|
|
}
|
|
}
|
|
}
|
|
|
|
#[pg_extern(strict, parallel_safe)]
|
|
fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB {
|
|
let cache = SCHEMA_CACHE.read().unwrap();
|
|
|
|
// Lookup uses the original schema_id
|
|
match cache.id_to_index.get(schema_id) {
|
|
None => JsonB(json!({
|
|
"errors": [{
|
|
"code": "SCHEMA_NOT_FOUND",
|
|
"message": format!("Schema '{}' not found in cache", schema_id),
|
|
"details": {
|
|
"cause": "Schema must be cached before validation"
|
|
}
|
|
}]
|
|
})),
|
|
Some(sch_index) => {
|
|
let instance_value: Value = instance.0;
|
|
match cache.schemas.validate(&instance_value, *sch_index) {
|
|
Ok(_) => JsonB(json!({ "response": "success" })),
|
|
Err(validation_error) => {
|
|
let mut error_list = Vec::new();
|
|
collect_validation_errors(&validation_error, &mut error_list);
|
|
let errors = format_drop_errors(error_list, &instance_value);
|
|
|
|
JsonB(json!({ "errors": errors }))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Recursively collects validation errors
|
|
fn collect_validation_errors(error: &ValidationError, errors_list: &mut Vec<(String, String, String)>) {
|
|
// Check if this is a structural error that we should skip
|
|
let error_message = format!("{}", error.kind);
|
|
let is_structural = error_message == "validation failed" ||
|
|
error_message == "allOf failed" ||
|
|
error_message == "anyOf failed" ||
|
|
error_message == "not failed" ||
|
|
error_message.starts_with("oneOf failed");
|
|
|
|
if error.causes.is_empty() && !is_structural {
|
|
// This is a leaf error that's not structural
|
|
// Format just the error kind, not the whole validation error
|
|
let message = format!("{}", error.kind);
|
|
|
|
errors_list.push((
|
|
error.instance_location.to_string(),
|
|
error.schema_url.to_string(),
|
|
message
|
|
));
|
|
} else {
|
|
// Recurse into causes
|
|
for cause in &error.causes {
|
|
collect_validation_errors(cause, errors_list);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Formats errors according to DropError structure
|
|
fn format_drop_errors(raw_errors: Vec<(String, String, String)>, instance: &Value) -> Vec<Value> {
|
|
use std::collections::HashMap;
|
|
use std::collections::hash_map::Entry;
|
|
|
|
// We don't filter structural paths from instance paths anymore
|
|
// because instance paths shouldn't contain these segments anyway
|
|
// The issue was likely with schema paths, not instance paths
|
|
let plausible_errors = raw_errors;
|
|
|
|
// 2. Deduplicate by instance_path and format as DropError
|
|
let mut unique_errors: HashMap<String, Value> = HashMap::new();
|
|
for (instance_path, schema_path, message) in plausible_errors {
|
|
if let Entry::Vacant(entry) = unique_errors.entry(instance_path.clone()) {
|
|
// Convert message to error code and make it human readable
|
|
let (code, human_message) = enhance_error_message(&message);
|
|
|
|
// Extract the failing value from the instance
|
|
let failing_value = extract_value_at_path(instance, &instance_path);
|
|
|
|
entry.insert(json!({
|
|
"code": code,
|
|
"message": human_message,
|
|
"details": {
|
|
"path": schema_path,
|
|
"context": json!({
|
|
"instance_path": instance_path,
|
|
"failing_value": failing_value
|
|
}),
|
|
"cause": message // Original error message
|
|
}
|
|
}));
|
|
}
|
|
}
|
|
|
|
unique_errors.into_values().collect()
|
|
}
|
|
|
|
// Helper function to extract value at a JSON pointer path
|
|
fn extract_value_at_path(instance: &Value, path: &str) -> Value {
|
|
let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
|
|
let mut current = instance;
|
|
|
|
for part in parts {
|
|
match current {
|
|
Value::Object(map) => {
|
|
if let Some(value) = map.get(part) {
|
|
current = value;
|
|
} else {
|
|
return Value::Null;
|
|
}
|
|
}
|
|
Value::Array(arr) => {
|
|
if let Ok(index) = part.parse::<usize>() {
|
|
if let Some(value) = arr.get(index) {
|
|
current = value;
|
|
} else {
|
|
return Value::Null;
|
|
}
|
|
} else {
|
|
return Value::Null;
|
|
}
|
|
}
|
|
_ => return Value::Null,
|
|
}
|
|
}
|
|
|
|
current.clone()
|
|
}
|
|
|
|
// Helper to convert validation messages to error codes and human-readable messages
|
|
fn enhance_error_message(message: &str) -> (String, String) {
|
|
// Match exact boon error message patterns
|
|
let trimmed = message.trim();
|
|
|
|
if trimmed.contains("value must be one of") {
|
|
("ENUM_VIOLATED".to_string(),
|
|
"Value is not one of the allowed options".to_string())
|
|
} else if trimmed.contains("length must be >=") && trimmed.contains("but got") {
|
|
("MIN_LENGTH_VIOLATED".to_string(),
|
|
"Field length is below the minimum required".to_string())
|
|
} else if trimmed.contains("length must be <=") && trimmed.contains("but got") {
|
|
("MAX_LENGTH_VIOLATED".to_string(),
|
|
"Field length exceeds the maximum allowed".to_string())
|
|
} else if trimmed.contains("must be >=") && trimmed.contains("but got") {
|
|
("MINIMUM_VIOLATED".to_string(),
|
|
"Value is below the minimum allowed".to_string())
|
|
} else if trimmed.contains("must be <=") && trimmed.contains("but got") {
|
|
("MAXIMUM_VIOLATED".to_string(),
|
|
"Value exceeds the maximum allowed".to_string())
|
|
} else if trimmed.contains("must be >") && trimmed.contains("but got") {
|
|
("EXCLUSIVE_MINIMUM_VIOLATED".to_string(),
|
|
"Value must be greater than the minimum".to_string())
|
|
} else if trimmed.contains("must be <") && trimmed.contains("but got") {
|
|
("EXCLUSIVE_MAXIMUM_VIOLATED".to_string(),
|
|
"Value must be less than the maximum".to_string())
|
|
} else if trimmed.contains("does not match pattern") {
|
|
("PATTERN_VIOLATED".to_string(),
|
|
"Value does not match the required pattern".to_string())
|
|
} else if trimmed.contains("missing properties") {
|
|
("REQUIRED_FIELD_MISSING".to_string(),
|
|
"Required field is missing".to_string())
|
|
} else if trimmed.contains("want") && trimmed.contains("but got") {
|
|
("TYPE_MISMATCH".to_string(),
|
|
"Field type does not match the expected type".to_string())
|
|
} else if trimmed.starts_with("value must be") && !trimmed.contains("one of") {
|
|
("CONST_VIOLATED".to_string(),
|
|
"Value does not match the required constant".to_string())
|
|
} else if trimmed.contains("is not valid") && trimmed.contains(":") {
|
|
("FORMAT_INVALID".to_string(),
|
|
extract_format_message(trimmed))
|
|
} else if trimmed.contains("items at") && trimmed.contains("are equal") {
|
|
("UNIQUE_ITEMS_VIOLATED".to_string(),
|
|
"Array contains duplicate items".to_string())
|
|
} else if trimmed.contains("additionalProperties") && trimmed.contains("not allowed") {
|
|
("ADDITIONAL_PROPERTIES_NOT_ALLOWED".to_string(),
|
|
"Object contains properties that are not allowed".to_string())
|
|
} else if trimmed.contains("is not multipleOf") {
|
|
("MULTIPLE_OF_VIOLATED".to_string(),
|
|
"Value is not a multiple of the required factor".to_string())
|
|
} else if trimmed.contains("minimum") && trimmed.contains("properties required") {
|
|
("MIN_PROPERTIES_VIOLATED".to_string(),
|
|
"Object has fewer properties than required".to_string())
|
|
} else if trimmed.contains("maximum") && trimmed.contains("properties required") {
|
|
("MAX_PROPERTIES_VIOLATED".to_string(),
|
|
"Object has more properties than allowed".to_string())
|
|
} else if trimmed.contains("minimum") && trimmed.contains("items required") {
|
|
("MIN_ITEMS_VIOLATED".to_string(),
|
|
"Array has fewer items than required".to_string())
|
|
} else if trimmed.contains("maximum") && trimmed.contains("items required") {
|
|
("MAX_ITEMS_VIOLATED".to_string(),
|
|
"Array has more items than allowed".to_string())
|
|
} else if trimmed == "false schema" {
|
|
("FALSE_SCHEMA".to_string(),
|
|
"Schema validation always fails".to_string())
|
|
} else if trimmed == "not failed" {
|
|
("NOT_VIOLATED".to_string(),
|
|
"Value matched a schema it should not match".to_string())
|
|
} else if trimmed == "allOf failed" {
|
|
("ALL_OF_VIOLATED".to_string(),
|
|
"Value does not match all required schemas".to_string())
|
|
} else if trimmed == "anyOf failed" {
|
|
("ANY_OF_VIOLATED".to_string(),
|
|
"Value does not match any of the allowed schemas".to_string())
|
|
} else if trimmed.contains("oneOf failed") {
|
|
("ONE_OF_VIOLATED".to_string(),
|
|
"Value must match exactly one schema".to_string())
|
|
} else if trimmed == "validation failed" {
|
|
("VALIDATION_FAILED".to_string(),
|
|
"Validation failed".to_string())
|
|
} else {
|
|
// For any unmatched patterns, try to provide a generic human-readable message
|
|
// while preserving the original error in details.cause
|
|
("VALIDATION_FAILED".to_string(),
|
|
"Validation failed".to_string())
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Extract a better format message
|
|
fn extract_format_message(message: &str) -> String {
|
|
if message.contains("date-time") {
|
|
"Invalid date-time format".to_string()
|
|
} else if message.contains("email") {
|
|
"Invalid email format".to_string()
|
|
} else if message.contains("uri") {
|
|
"Invalid URI format".to_string()
|
|
} else if message.contains("uuid") {
|
|
"Invalid UUID format".to_string()
|
|
} else {
|
|
"Invalid format".to_string()
|
|
}
|
|
}
|
|
|
|
#[pg_extern(strict, parallel_safe)]
|
|
fn json_schema_cached(schema_id: &str) -> bool {
|
|
let cache = SCHEMA_CACHE.read().unwrap();
|
|
cache.id_to_index.contains_key(schema_id)
|
|
}
|
|
|
|
#[pg_extern(strict)]
|
|
fn clear_json_schemas() -> JsonB {
|
|
let mut cache = SCHEMA_CACHE.write().unwrap();
|
|
*cache = BoonCache {
|
|
schemas: Schemas::new(),
|
|
id_to_index: HashMap::new(),
|
|
};
|
|
JsonB(json!({ "response": "success" }))
|
|
}
|
|
|
|
#[pg_extern(strict, parallel_safe)]
|
|
fn show_json_schemas() -> JsonB {
|
|
let cache = SCHEMA_CACHE.read().unwrap();
|
|
let ids: Vec<String> = cache.id_to_index.keys().cloned().collect();
|
|
JsonB(json!({ "response": ids }))
|
|
}
|
|
|
|
/// This module is required by `cargo pgrx test` invocations.
|
|
/// It must be visible at the root of your extension crate.
|
|
#[cfg(test)]
|
|
pub mod pg_test {
|
|
pub fn setup(_options: Vec<&str>) {
|
|
// perform one-off initialization when the pg_test framework starts
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn postgresql_conf_options() -> Vec<&'static str> {
|
|
// return any postgresql.conf settings that are required for your tests
|
|
vec![]
|
|
}
|
|
}
|
|
|
|
|
|
#[cfg(any(test, feature = "pg_test"))]
|
|
#[pg_schema]
|
|
mod tests {
|
|
include!("tests.rs");
|
|
}
|