From b8c0e08068b62f49c7d44c340c00fc005c66581b Mon Sep 17 00:00:00 2001 From: Alex Groleau Date: Mon, 21 Apr 2025 17:11:24 -0400 Subject: [PATCH] more filtering --- src/lib.rs | 53 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0f56329..3a05d24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,8 +52,8 @@ fn cache_json_schema(schema_id: &str, schema: JsonB) -> JsonB { // Collect leaf errors from the meta-schema validation failure let mut error_list = Vec::new(); collect_leaf_errors(src, &mut error_list); - // Return the flat list directly - json!(error_list) + // Filter and deduplicate errors, returning as a single JSON Value (Array) + json!(filter_boon_errors(error_list)) } _ => { // Keep existing handling for other compilation errors @@ -91,16 +91,13 @@ fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB { match cache.schemas.validate(&instance_value, *sch_index) { Ok(_) => JsonB(json!({ "success": true })), Err(validation_error) => { - // Collect all leaf errors first - let mut raw_error_list = Vec::new(); - collect_leaf_errors(&validation_error, &mut raw_error_list); - - // Filter the errors (e.g., deduplicate by instance_path) - let filtered_error_list = filter_boon_errors(raw_error_list); - + // Directly use the result of format_validation_error + // which now includes the top-level success indicator and flat error list + let mut error_list = Vec::new(); + collect_leaf_errors(&validation_error, &mut error_list); JsonB(json!({ "success": false, - "error": filtered_error_list // Return the filtered list + "error": filter_boon_errors(error_list) // Filter and deduplicate errors })) } } @@ -130,28 +127,48 @@ fn collect_leaf_errors(error: &ValidationError, errors_list: &mut Vec) { } } -// Filters collected errors, e.g., deduplicating by instance_path +// Filters collected errors, removing structural noise and then deduplicating by instance_path fn filter_boon_errors(raw_errors: Vec) -> Vec { use std::collections::HashMap; use std::collections::hash_map::Entry; - // Use a HashMap to keep only the first error for each instance_path - let mut unique_errors: HashMap = HashMap::new(); + // Define schema keywords that indicate structural paths, not instance paths + let structural_path_segments = [ + "/allOf/", "/anyOf/", "/oneOf/", + "/if/", "/then/", "/else/", + "/not/" + // Note: "/properties/" and "/items/" are generally valid, + // but might appear spuriously in boon's paths for complex types. + // We exclude only the explicitly logical/combinatorial ones for now. + ]; - for error_value in raw_errors { + // 1. Filter out errors with instance_paths containing structural segments + let plausible_errors: Vec = raw_errors.into_iter().filter(|error_value| { + if let Some(instance_path_value) = error_value.get("instance_path") { + if let Some(instance_path_str) = instance_path_value.as_str() { + // Keep if NONE of the structural segments are present + !structural_path_segments.iter().any(|&segment| instance_path_str.contains(segment)) + } else { + false // Invalid instance_path type, filter out + } + } else { + false // No instance_path field, filter out + } + }).collect(); + + // 2. Deduplicate the remaining plausible errors by instance_path + let mut unique_errors: HashMap = HashMap::new(); + for error_value in plausible_errors { if let Some(instance_path_value) = error_value.get("instance_path") { if let Some(instance_path_str) = instance_path_value.as_str() { - // Use Entry API to insert only if the key is not present if let Entry::Vacant(entry) = unique_errors.entry(instance_path_str.to_string()) { entry.insert(error_value); } } } - // If error doesn't have instance_path or it's not a string, we might ignore it or handle differently. - // For now, we implicitly ignore errors without a valid string instance_path for deduplication. } - // Collect the unique errors from the map values + // Collect the unique errors unique_errors.into_values().collect() }