fixed type mismatch checking to not fail fast and work through nested data

This commit is contained in:
2025-09-12 22:59:27 -04:00
parent 3fdbf60396
commit 3d770b0831
3 changed files with 369 additions and 317 deletions

View File

@ -17,10 +17,15 @@ enum SchemaType {
PrivatePunc,
}
struct BoonCache {
struct Schema {
index: SchemaIndex,
t: SchemaType,
value: Value,
}
struct Cache {
schemas: Schemas,
id_to_index: HashMap<String, SchemaIndex>,
id_to_type: HashMap<String, SchemaType>,
map: HashMap<String, Schema>,
}
// Structure to hold error information without lifetimes
@ -33,177 +38,105 @@ struct Error {
}
lazy_static! {
static ref SCHEMA_CACHE: RwLock<BoonCache> = RwLock::new(BoonCache {
static ref SCHEMA_CACHE: RwLock<Cache> = RwLock::new(Cache {
schemas: Schemas::new(),
id_to_index: HashMap::new(),
id_to_type: HashMap::new(),
map: HashMap::new(),
});
}
#[pg_extern(strict)]
fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB {
let mut cache = SCHEMA_CACHE.write().unwrap();
let enums_value: Value = enums.0;
let types_value: Value = types.0;
let puncs_value: Value = puncs.0;
let mut cache = SCHEMA_CACHE.write().unwrap();
let enums_value: Value = enums.0;
let types_value: Value = types.0;
let puncs_value: Value = puncs.0;
// Clear existing cache
*cache = BoonCache {
schemas: Schemas::new(),
id_to_index: HashMap::new(),
id_to_type: HashMap::new(),
};
*cache = Cache {
schemas: Schemas::new(),
map: HashMap::new(),
};
// Create the boon compiler and enable format assertions
let mut compiler = Compiler::new();
compiler.enable_format_assertions();
let mut compiler = Compiler::new();
compiler.enable_format_assertions();
let mut errors = Vec::new();
let mut errors = Vec::new();
let mut schemas_to_compile = Vec::new();
// Track all schema IDs for compilation
let mut all_schema_ids = Vec::new();
// Phase 1: Add all enum schemas as resources (priority 1 - these are referenced by types and puncs)
// Enums are never strict - they're reusable building blocks
if let Some(enums_array) = enums_value.as_array() {
for enum_row in enums_array {
if let Some(enum_obj) = enum_row.as_object() {
if let (Some(enum_name), Some(schemas_raw)) = (
enum_obj.get("name").and_then(|v| v.as_str()),
enum_obj.get("schemas")
) {
// Parse the schemas JSONB field
if let Some(schemas_array) = schemas_raw.as_array() {
for schema_def in schemas_array {
if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
if let Err(e) = add_schema_resource(&mut compiler, schema_id, schema_def.clone(), SchemaType::Enum, &mut errors) {
errors.push(json!({
"code": "ENUM_SCHEMA_RESOURCE_FAILED",
"message": format!("Failed to add schema resource '{}' for enum '{}'", schema_id, enum_name),
"details": {
"enum_name": enum_name,
"schema_id": schema_id,
"cause": format!("{}", e)
// Phase 1: Enums
if let Some(enums_array) = enums_value.as_array() {
for enum_row in enums_array {
if let Some(schemas_raw) = enum_row.get("schemas") {
if let Some(schemas_array) = schemas_raw.as_array() {
for schema_def in schemas_array {
if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), SchemaType::Enum));
}
}
}));
} else {
all_schema_ids.push(schema_id.to_string());
cache.id_to_type.insert(schema_id.to_string(), SchemaType::Enum);
}
}
}
}
}
}
}
}
// Phase 2: Add all type schemas as resources (priority 2 - these are referenced by puncs)
// Types are always strict - they should not allow extra properties
if let Some(types_array) = types_value.as_array() {
for type_row in types_array {
if let Some(type_obj) = type_row.as_object() {
if let (Some(type_name), Some(schemas_raw)) = (
type_obj.get("name").and_then(|v| v.as_str()),
type_obj.get("schemas")
) {
// Parse the schemas JSONB field
if let Some(schemas_array) = schemas_raw.as_array() {
for schema_def in schemas_array {
if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
if let Err(e) = add_schema_resource(&mut compiler, schema_id, schema_def.clone(), SchemaType::Type, &mut errors) {
errors.push(json!({
"code": "TYPE_SCHEMA_RESOURCE_FAILED",
"message": format!("Failed to add schema resource '{}' for type '{}'", schema_id, type_name),
"details": {
"type_name": type_name,
"schema_id": schema_id,
"cause": format!("{}", e)
// Phase 2: Types
if let Some(types_array) = types_value.as_array() {
for type_row in types_array {
if let Some(schemas_raw) = type_row.get("schemas") {
if let Some(schemas_array) = schemas_raw.as_array() {
for schema_def in schemas_array {
if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), SchemaType::Type));
}
}
}));
} else {
all_schema_ids.push(schema_id.to_string());
cache.id_to_type.insert(schema_id.to_string(), SchemaType::Type);
}
}
}
}
}
}
}
}
// Phase 3: Add all punc schemas as resources (these may reference enum and type schemas)
// Each punc gets strict validation based on its public field
if let Some(puncs_array) = puncs_value.as_array() {
for punc_row in puncs_array {
if let Some(punc_obj) = punc_row.as_object() {
if let Some(punc_name) = punc_obj.get("name").and_then(|v| v.as_str()) {
// Determine schema type based on public status
let is_public = punc_obj.get("public")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let punc_schema_type = if is_public { SchemaType::PublicPunc } else { SchemaType::PrivatePunc };
// Add punc schemas from the 'schemas' array
if let Some(schemas_raw) = punc_obj.get("schemas") {
if let Some(schemas_array) = schemas_raw.as_array() {
for schema_def in schemas_array {
if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
let request_schema_id = format!("{}.request", punc_name);
let response_schema_id = format!("{}.response", punc_name);
let schema_type_for_def = if schema_id == request_schema_id || schema_id == response_schema_id {
punc_schema_type
} else {
SchemaType::Type // For local/nested schemas
};
if let Err(e) = add_schema_resource(&mut compiler, schema_id, schema_def.clone(), schema_type_for_def, &mut errors) {
errors.push(json!({
"code": "PUNC_SCHEMA_RESOURCE_FAILED",
"message": format!("Failed to add schema resource '{}' for punc '{}'", schema_id, punc_name),
"details": {
"punc_name": punc_name,
"schema_id": schema_id,
"cause": format!("{}", e)
}
}));
} else {
all_schema_ids.push(schema_id.to_string());
cache.id_to_type.insert(schema_id.to_string(), schema_type_for_def);
}
// Phase 3: Puncs
if let Some(puncs_array) = puncs_value.as_array() {
for punc_row in puncs_array {
if let Some(punc_obj) = punc_row.as_object() {
if let Some(punc_name) = punc_obj.get("name").and_then(|v| v.as_str()) {
let is_public = punc_obj.get("public").and_then(|v| v.as_bool()).unwrap_or(false);
let punc_schema_type = if is_public { SchemaType::PublicPunc } else { SchemaType::PrivatePunc };
if let Some(schemas_raw) = punc_obj.get("schemas") {
if let Some(schemas_array) = schemas_raw.as_array() {
for schema_def in schemas_array {
if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
let request_schema_id = format!("{}.request", punc_name);
let response_schema_id = format!("{}.response", punc_name);
let schema_type_for_def = if schema_id == request_schema_id || schema_id == response_schema_id {
punc_schema_type
} else {
SchemaType::Type
};
schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), schema_type_for_def));
}
}
}
}
}
}
}
}
}
}
}
}
// Phase 4: Compile all schemas now that all resources are added
if !errors.is_empty() {
// If we had errors adding resources, don't attempt compilation
return JsonB(json!({ "errors": errors }));
}
// Add all resources to compiler first
for (id, value, schema_type) in &schemas_to_compile {
add_schema_resource(&mut compiler, id, value.clone(), *schema_type, &mut errors);
}
if let Err(_) = compile_all_schemas(&mut compiler, &mut cache, &all_schema_ids, &mut errors) {
// Add a high-level wrapper error when schema compilation fails
errors.push(json!({
"code": "COMPILE_ALL_SCHEMAS_FAILED",
"message": "Failed to compile JSON schemas during cache operation",
"details": {
"cause": "Schema compilation failed - see detailed errors above"
}
}));
}
if !errors.is_empty() {
return JsonB(json!({ "errors": errors }));
}
if errors.is_empty() {
JsonB(json!({ "response": "success" }))
} else {
JsonB(json!({ "errors": errors }))
}
// Compile all schemas
compile_all_schemas(&mut compiler, &mut cache, &schemas_to_compile, &mut errors);
if errors.is_empty() {
JsonB(json!({ "response": "success" }))
} else {
JsonB(json!({ "errors": errors }))
}
}
// Helper function to add a schema resource (without compiling)
@ -213,74 +146,51 @@ fn add_schema_resource(
mut schema_value: Value,
schema_type: SchemaType,
errors: &mut Vec<Value>
) -> Result<(), String> {
// Apply strict validation based on schema type
) {
match schema_type {
SchemaType::Enum | SchemaType::PrivatePunc => {
// Enums and private puncs don't need strict validation
},
SchemaType::Type | SchemaType::PublicPunc => {
apply_strict_validation(&mut schema_value, schema_type);
}
SchemaType::Enum | SchemaType::PrivatePunc => {},
SchemaType::Type | SchemaType::PublicPunc => apply_strict_validation(&mut schema_value, schema_type),
}
// Use schema_id directly - simple IDs like "entity", "user", "punc.request"
if let Err(e) = compiler.add_resource(schema_id, schema_value.clone()) {
if let Err(e) = compiler.add_resource(schema_id, schema_value) {
errors.push(json!({
"code": "SCHEMA_RESOURCE_FAILED",
"message": format!("Failed to add schema resource '{}'", schema_id),
"details": {
"schema": schema_id,
"cause": format!("{}", e)
}
"details": { "schema": schema_id, "cause": format!("{}", e) }
}));
return Err(format!("Failed to add schema resource: {}", e));
}
Ok(())
}
// Helper function to compile all added resources
fn compile_all_schemas(
compiler: &mut Compiler,
cache: &mut BoonCache,
schema_ids: &[String],
errors: &mut Vec<Value>
) -> Result<(), String> {
for schema_id in schema_ids {
match compiler.compile(schema_id, &mut cache.schemas) {
Ok(sch_index) => {
// Store the index using the original schema_id as the key
cache.id_to_index.insert(schema_id.to_string(), sch_index);
}
Err(e) => {
match &e {
CompileError::ValidationError { url: _url, src } => {
// Collect leaf errors from the meta-schema validation failure
let mut error_list = Vec::new();
collect_errors(src, &mut error_list);
// Get schema value for error formatting - we'll need to reconstruct or store it
let schema_value = json!({}); // Placeholder - we don't have the original value here
let formatted_errors = format_errors(error_list, &schema_value, schema_id);
errors.extend(formatted_errors);
}
_ => {
// Other compilation errors
errors.push(json!({
"code": "SCHEMA_COMPILATION_FAILED",
"message": format!("Schema '{}' compilation failed", schema_id),
"details": {
"schema": schema_id,
"cause": format!("{:?}", e)
}
}));
}
};
return Err(format!("Schema compilation failed: {:?}", e));
}
compiler: &mut Compiler,
cache: &mut Cache,
schemas_to_compile: &[(String, Value, SchemaType)],
errors: &mut Vec<Value>,
) {
for (id, value, schema_type) in schemas_to_compile {
match compiler.compile(id, &mut cache.schemas) {
Ok(index) => {
cache.map.insert(id.clone(), Schema { index, t: *schema_type, value: value.clone() });
}
Err(e) => {
match &e {
CompileError::ValidationError { src, .. } => {
let mut error_list = Vec::new();
collect_errors(src, &mut error_list);
let formatted_errors = format_errors(error_list, value, id);
errors.extend(formatted_errors);
}
_ => {
errors.push(json!({
"code": "SCHEMA_COMPILATION_FAILED",
"message": format!("Schema '{}' compilation failed", id),
"details": { "schema": id, "cause": format!("{:?}", e) }
}));
}
};
}
}
}
}
Ok(())
}
// Helper function to apply strict validation to a schema
@ -328,90 +238,149 @@ fn apply_strict_validation_recursive(schema: &mut Value, inside_conditional: boo
}
}
#[pg_extern(strict, parallel_safe)]
fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB {
let cache = SCHEMA_CACHE.read().unwrap();
// Lookup uses the original schema_id - schemas should always be available after bulk caching
match cache.id_to_index.get(schema_id) {
None => JsonB(json!({
"errors": [{
"code": "SCHEMA_NOT_FOUND",
"message": format!("Schema '{}' not found in cache", schema_id),
"details": {
"schema": schema_id,
"cause": "Schema was not found in bulk cache - ensure cache_json_schemas was called"
}
}]
})),
Some(sch_index) => {
let instance_value: Value = instance.0;
match cache.schemas.validate(&instance_value, *sch_index) {
Ok(_) => {
// After standard validation, perform custom type check if it's a Type schema
if let Some(&schema_type) = cache.id_to_type.get(schema_id) {
if schema_type == SchemaType::Type {
return validate_type_against_schema_id(&instance_value, schema_id);
fn walk_and_validate_refs(
instance: &Value,
schema: &Value,
cache: &std::sync::RwLockReadGuard<Cache>,
path_parts: &mut Vec<String>,
type_validated: bool,
top_level_id: Option<&str>,
errors: &mut Vec<Value>,
) {
if let Some(ref_url) = schema.get("$ref").and_then(|v| v.as_str()) {
if let Some(s) = cache.map.get(ref_url) {
let mut new_type_validated = type_validated;
if !type_validated && s.t == SchemaType::Type {
let id_to_use = top_level_id.unwrap_or(ref_url);
let expected_type = id_to_use.split('.').next().unwrap_or(id_to_use);
if let Some(actual_type) = instance.get("type").and_then(|v| v.as_str()) {
if actual_type == expected_type {
new_type_validated = true;
} else {
path_parts.push("type".to_string());
let path = format!("/{}", path_parts.join("/"));
path_parts.pop();
errors.push(json!({
"code": "TYPE_MISMATCH",
"message": format!("Instance type '{}' does not match expected type '{}' derived from schema $ref", actual_type, expected_type),
"details": { "path": path, "context": instance, "cause": { "expected": expected_type, "actual": actual_type }, "schema": ref_url }
}));
}
} else {
if top_level_id.is_some() {
let path = if path_parts.is_empty() { "".to_string() } else { format!("/{}", path_parts.join("/")) };
errors.push(json!({
"code": "TYPE_MISMATCH",
"message": "Instance is missing 'type' property required for schema validation",
"details": { "path": path, "context": instance, "cause": { "expected": expected_type }, "schema": ref_url }
}));
}
}
}
}
// For non-Type schemas, or if type not found (shouldn't happen), success.
JsonB(json!({ "response": "success" }))
walk_and_validate_refs(instance, &s.value, cache, path_parts, new_type_validated, None, errors);
}
Err(validation_error) => {
let mut error_list = Vec::new();
collect_errors(&validation_error, &mut error_list);
let errors = format_errors(error_list, &instance_value, schema_id);
// Filter out FALSE_SCHEMA errors if there are other validation errors
let filtered_errors = filter_false_schema_errors(errors);
if filtered_errors.is_empty() {
JsonB(json!({ "response": "success" }))
} else {
JsonB(json!({ "errors": filtered_errors }))
}
}
}
}
}
if let Some(properties) = schema.get("properties").and_then(|v| v.as_object()) {
for (prop_name, prop_schema) in properties {
if let Some(prop_value) = instance.get(prop_name) {
path_parts.push(prop_name.clone());
walk_and_validate_refs(prop_value, prop_schema, cache, path_parts, type_validated, None, errors);
path_parts.pop();
}
}
}
if let Some(items_schema) = schema.get("items") {
if let Some(instance_array) = instance.as_array() {
for (i, item) in instance_array.iter().enumerate() {
path_parts.push(i.to_string());
walk_and_validate_refs(item, items_schema, cache, path_parts, false, None, errors);
path_parts.pop();
}
}
}
if let Some(all_of_array) = schema.get("allOf").and_then(|v| v.as_array()) {
for sub_schema in all_of_array {
walk_and_validate_refs(instance, sub_schema, cache, path_parts, type_validated, None, errors);
}
}
if let Some(any_of_array) = schema.get("anyOf").and_then(|v| v.as_array()) {
for sub_schema in any_of_array {
walk_and_validate_refs(instance, sub_schema, cache, path_parts, type_validated, None, errors);
}
}
if let Some(one_of_array) = schema.get("oneOf").and_then(|v| v.as_array()) {
for sub_schema in one_of_array {
walk_and_validate_refs(instance, sub_schema, cache, path_parts, type_validated, None, errors);
}
}
if let Some(if_schema) = schema.get("if") {
walk_and_validate_refs(instance, if_schema, cache, path_parts, type_validated, None, errors);
}
if let Some(then_schema) = schema.get("then") {
walk_and_validate_refs(instance, then_schema, cache, path_parts, type_validated, None, errors);
}
if let Some(else_schema) = schema.get("else") {
walk_and_validate_refs(instance, else_schema, cache, path_parts, type_validated, None, errors);
}
if let Some(not_schema) = schema.get("not") {
walk_and_validate_refs(instance, not_schema, cache, path_parts, type_validated, None, errors);
}
}
fn validate_type_against_schema_id(instance: &Value, schema_id: &str) -> JsonB {
// Get the main type (primary or before the first dot) and compare to type in instance
let expected_type = schema_id.split('.').next().unwrap_or(schema_id);
if let Some(actual_type) = instance.get("type").and_then(|v| v.as_str()) {
if actual_type == expected_type {
return JsonB(json!({ "response": "success" }));
#[pg_extern(strict, parallel_safe)]
fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB {
let cache = SCHEMA_CACHE.read().unwrap();
match cache.map.get(schema_id) {
None => JsonB(json!({
"errors": [{
"code": "SCHEMA_NOT_FOUND",
"message": format!("Schema '{}' not found in cache", schema_id),
"details": {
"schema": schema_id,
"cause": "Schema was not found in bulk cache - ensure cache_json_schemas was called"
}
}]
})),
Some(schema) => {
let instance_value: Value = instance.0;
match cache.schemas.validate(&instance_value, schema.index) {
Ok(_) => {
let mut custom_errors = Vec::new();
if schema.t == SchemaType::Type || schema.t == SchemaType::PublicPunc || schema.t == SchemaType::PrivatePunc {
let mut path_parts = vec![];
let top_level_id = if schema.t == SchemaType::Type { Some(schema_id) } else { None };
walk_and_validate_refs(&instance_value, &schema.value, &cache, &mut path_parts, false, top_level_id, &mut custom_errors);
}
if custom_errors.is_empty() {
JsonB(json!({ "response": "success" }))
} else {
JsonB(json!({ "errors": custom_errors }))
}
}
Err(validation_error) => {
let mut error_list = Vec::new();
collect_errors(&validation_error, &mut error_list);
let errors = format_errors(error_list, &instance_value, schema_id);
let filtered_errors = filter_false_schema_errors(errors);
if filtered_errors.is_empty() {
JsonB(json!({ "response": "success" }))
} else {
JsonB(json!({ "errors": filtered_errors }))
}
}
}
}
}
}
// If we reach here, validation failed. Now we build the specific error.
let (message, cause, context) =
if let Some(actual_type) = instance.get("type").and_then(|v| v.as_str()) {
// This handles the case where the type is a string but doesn't match.
(
format!("Instance type '{}' does not match expected type '{}' derived from schema ID", actual_type, expected_type),
json!({ "expected": expected_type, "actual": actual_type }),
json!(actual_type)
)
} else {
// This handles the case where 'type' is missing or not a string.
(
"Instance 'type' property is missing or not a string".to_string(),
json!("The 'type' property must be a string and is required for this validation."),
instance.get("type").unwrap_or(&Value::Null).clone()
)
};
JsonB(json!({
"errors": [{
"code": "TYPE_MISMATCH",
"message": message,
"details": {
"path": "/type",
"context": context,
"cause": cause,
"schema": schema_id
}
}]
}))
}
// Recursively collects validation errors
@ -1008,16 +977,15 @@ fn extract_value_at_path(instance: &Value, path: &str) -> Value {
#[pg_extern(strict, parallel_safe)]
fn json_schema_cached(schema_id: &str) -> bool {
let cache = SCHEMA_CACHE.read().unwrap();
cache.id_to_index.contains_key(schema_id)
cache.map.contains_key(schema_id)
}
#[pg_extern(strict)]
fn clear_json_schemas() -> JsonB {
let mut cache = SCHEMA_CACHE.write().unwrap();
*cache = BoonCache {
*cache = Cache {
schemas: Schemas::new(),
id_to_index: HashMap::new(),
id_to_type: HashMap::new(),
map: HashMap::new(),
};
JsonB(json!({ "response": "success" }))
}
@ -1025,7 +993,7 @@ fn clear_json_schemas() -> JsonB {
#[pg_extern(strict, parallel_safe)]
fn show_json_schemas() -> JsonB {
let cache = SCHEMA_CACHE.read().unwrap();
let ids: Vec<String> = cache.id_to_index.keys().cloned().collect();
let ids: Vec<String> = cache.map.keys().cloned().collect();
JsonB(json!({ "response": ids }))
}