more filtering

This commit is contained in:
2025-04-21 17:11:24 -04:00
parent c734983a59
commit b8c0e08068

View File

@ -52,8 +52,8 @@ fn cache_json_schema(schema_id: &str, schema: JsonB) -> JsonB {
// Collect leaf errors from the meta-schema validation failure
let mut error_list = Vec::new();
collect_leaf_errors(src, &mut error_list);
// Return the flat list directly
json!(error_list)
// Filter and deduplicate errors, returning as a single JSON Value (Array)
json!(filter_boon_errors(error_list))
}
_ => {
// Keep existing handling for other compilation errors
@ -91,16 +91,13 @@ fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB {
match cache.schemas.validate(&instance_value, *sch_index) {
Ok(_) => JsonB(json!({ "success": true })),
Err(validation_error) => {
// Collect all leaf errors first
let mut raw_error_list = Vec::new();
collect_leaf_errors(&validation_error, &mut raw_error_list);
// Filter the errors (e.g., deduplicate by instance_path)
let filtered_error_list = filter_boon_errors(raw_error_list);
// Directly use the result of format_validation_error
// which now includes the top-level success indicator and flat error list
let mut error_list = Vec::new();
collect_leaf_errors(&validation_error, &mut error_list);
JsonB(json!({
"success": false,
"error": filtered_error_list // Return the filtered list
"error": filter_boon_errors(error_list) // Filter and deduplicate errors
}))
}
}
@ -130,28 +127,48 @@ fn collect_leaf_errors(error: &ValidationError, errors_list: &mut Vec<Value>) {
}
}
// Filters collected errors, e.g., deduplicating by instance_path
// Filters collected errors, removing structural noise and then deduplicating by instance_path
fn filter_boon_errors(raw_errors: Vec<Value>) -> Vec<Value> {
use std::collections::HashMap;
use std::collections::hash_map::Entry;
// Use a HashMap to keep only the first error for each instance_path
let mut unique_errors: HashMap<String, Value> = HashMap::new();
// Define schema keywords that indicate structural paths, not instance paths
let structural_path_segments = [
"/allOf/", "/anyOf/", "/oneOf/",
"/if/", "/then/", "/else/",
"/not/"
// Note: "/properties/" and "/items/" are generally valid,
// but might appear spuriously in boon's paths for complex types.
// We exclude only the explicitly logical/combinatorial ones for now.
];
for error_value in raw_errors {
// 1. Filter out errors with instance_paths containing structural segments
let plausible_errors: Vec<Value> = raw_errors.into_iter().filter(|error_value| {
if let Some(instance_path_value) = error_value.get("instance_path") {
if let Some(instance_path_str) = instance_path_value.as_str() {
// Keep if NONE of the structural segments are present
!structural_path_segments.iter().any(|&segment| instance_path_str.contains(segment))
} else {
false // Invalid instance_path type, filter out
}
} else {
false // No instance_path field, filter out
}
}).collect();
// 2. Deduplicate the remaining plausible errors by instance_path
let mut unique_errors: HashMap<String, Value> = HashMap::new();
for error_value in plausible_errors {
if let Some(instance_path_value) = error_value.get("instance_path") {
if let Some(instance_path_str) = instance_path_value.as_str() {
// Use Entry API to insert only if the key is not present
if let Entry::Vacant(entry) = unique_errors.entry(instance_path_str.to_string()) {
entry.insert(error_value);
}
}
}
// If error doesn't have instance_path or it's not a string, we might ignore it or handle differently.
// For now, we implicitly ignore errors without a valid string instance_path for deduplication.
}
// Collect the unique errors from the map values
// Collect the unique errors
unique_errors.into_values().collect()
}