more improvements to ref tracking in json schemas and tests

This commit is contained in:
2025-08-27 03:30:15 -04:00
parent 5e55786e3e
commit e40de2eb12
5 changed files with 1294 additions and 1351 deletions

View File

@ -9,6 +9,14 @@ use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::{collections::HashMap, sync::RwLock};
#[derive(Clone, Copy, Debug)]
enum SchemaType {
Enum,
Type,
PublicPunc,
PrivatePunc,
}
struct BoonCache {
schemas: Schemas,
id_to_index: HashMap<String, SchemaIndex>,
@ -31,8 +39,9 @@ lazy_static! {
}
#[pg_extern(strict)]
fn cache_json_schemas(types: JsonB, puncs: JsonB) -> JsonB {
fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB {
let mut cache = SCHEMA_CACHE.write().unwrap();
let enums_value: Value = enums.0;
let types_value: Value = types.0;
let puncs_value: Value = puncs.0;
@ -51,8 +60,42 @@ fn cache_json_schemas(types: JsonB, puncs: JsonB) -> JsonB {
// Track all schema IDs for compilation
let mut all_schema_ids = Vec::new();
// Phase 1: Add all type schemas as resources (these are referenced by puncs)
// Types are never strict - they're reusable building blocks
// Phase 1: Add all enum schemas as resources (priority 1 - these are referenced by types and puncs)
// Enums are never strict - they're reusable building blocks
if let Some(enums_array) = enums_value.as_array() {
for enum_row in enums_array {
if let Some(enum_obj) = enum_row.as_object() {
if let (Some(enum_name), Some(schemas_raw)) = (
enum_obj.get("name").and_then(|v| v.as_str()),
enum_obj.get("schemas")
) {
// Parse the schemas JSONB field
if let Some(schemas_array) = schemas_raw.as_array() {
for schema_def in schemas_array {
if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
if let Err(e) = add_schema_resource(&mut compiler, schema_id, schema_def.clone(), SchemaType::Enum, &mut errors) {
errors.push(json!({
"code": "ENUM_SCHEMA_RESOURCE_FAILED",
"message": format!("Failed to add schema resource '{}' for enum '{}'", schema_id, enum_name),
"details": {
"enum_name": enum_name,
"schema_id": schema_id,
"cause": format!("{}", e)
}
}));
} else {
all_schema_ids.push(schema_id.to_string());
}
}
}
}
}
}
}
}
// Phase 2: Add all type schemas as resources (priority 2 - these are referenced by puncs)
// Types are always strict - they should not allow extra properties
if let Some(types_array) = types_value.as_array() {
for type_row in types_array {
if let Some(type_obj) = type_row.as_object() {
@ -64,7 +107,7 @@ fn cache_json_schemas(types: JsonB, puncs: JsonB) -> JsonB {
if let Some(schemas_array) = schemas_raw.as_array() {
for schema_def in schemas_array {
if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
if let Err(e) = add_schema_resource(&mut compiler, schema_id, schema_def.clone(), false, &mut errors) {
if let Err(e) = add_schema_resource(&mut compiler, schema_id, schema_def.clone(), SchemaType::Type, &mut errors) {
errors.push(json!({
"code": "TYPE_SCHEMA_RESOURCE_FAILED",
"message": format!("Failed to add schema resource '{}' for type '{}'", schema_id, type_name),
@ -85,23 +128,24 @@ fn cache_json_schemas(types: JsonB, puncs: JsonB) -> JsonB {
}
}
// Phase 2: Add all punc schemas as resources (these may reference type schemas)
// Phase 3: Add all punc schemas as resources (these may reference enum and type schemas)
// Each punc gets strict validation based on its public field
if let Some(puncs_array) = puncs_value.as_array() {
for punc_row in puncs_array {
if let Some(punc_obj) = punc_row.as_object() {
if let Some(punc_name) = punc_obj.get("name").and_then(|v| v.as_str()) {
// Get the strict setting for this specific punc (public = strict)
let punc_strict = punc_obj.get("public")
// Determine schema type based on public status
let is_public = punc_obj.get("public")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let punc_schema_type = if is_public { SchemaType::PublicPunc } else { SchemaType::PrivatePunc };
// Add punc local schemas as resources (from schemas field) - use $id directly (universal)
if let Some(schemas_raw) = punc_obj.get("schemas") {
if let Some(schemas_array) = schemas_raw.as_array() {
for schema_def in schemas_array {
if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
if let Err(e) = add_schema_resource(&mut compiler, schema_id, schema_def.clone(), punc_strict, &mut errors) {
if let Err(e) = add_schema_resource(&mut compiler, schema_id, schema_def.clone(), SchemaType::Type, &mut errors) {
errors.push(json!({
"code": "PUNC_LOCAL_SCHEMA_RESOURCE_FAILED",
"message": format!("Failed to add local schema resource '{}' for punc '{}'", schema_id, punc_name),
@ -123,7 +167,7 @@ fn cache_json_schemas(types: JsonB, puncs: JsonB) -> JsonB {
if let Some(request_schema) = punc_obj.get("request") {
if !request_schema.is_null() {
let request_schema_id = format!("{}.request", punc_name);
if let Err(e) = add_schema_resource(&mut compiler, &request_schema_id, request_schema.clone(), punc_strict, &mut errors) {
if let Err(e) = add_schema_resource(&mut compiler, &request_schema_id, request_schema.clone(), punc_schema_type, &mut errors) {
errors.push(json!({
"code": "PUNC_REQUEST_SCHEMA_RESOURCE_FAILED",
"message": format!("Failed to add request schema resource for punc '{}'", punc_name),
@ -143,7 +187,7 @@ fn cache_json_schemas(types: JsonB, puncs: JsonB) -> JsonB {
if let Some(response_schema) = punc_obj.get("response") {
if !response_schema.is_null() {
let response_schema_id = format!("{}.response", punc_name);
if let Err(e) = add_schema_resource(&mut compiler, &response_schema_id, response_schema.clone(), punc_strict, &mut errors) {
if let Err(e) = add_schema_resource(&mut compiler, &response_schema_id, response_schema.clone(), punc_schema_type, &mut errors) {
errors.push(json!({
"code": "PUNC_RESPONSE_SCHEMA_RESOURCE_FAILED",
"message": format!("Failed to add response schema resource for punc '{}'", punc_name),
@ -163,14 +207,21 @@ fn cache_json_schemas(types: JsonB, puncs: JsonB) -> JsonB {
}
}
// Phase 3: Compile all schemas now that all resources are added
// Phase 4: Compile all schemas now that all resources are added
if !errors.is_empty() {
// If we had errors adding resources, don't attempt compilation
return JsonB(json!({ "errors": errors }));
}
if let Err(_) = compile_all_schemas(&mut compiler, &mut cache, &all_schema_ids, &mut errors) {
// compile_all_schemas already adds errors to the errors vector
// Add a high-level wrapper error when schema compilation fails
errors.push(json!({
"code": "COMPILE_ALL_SCHEMAS_FAILED",
"message": "Failed to compile JSON schemas during cache operation",
"details": {
"cause": "Schema compilation failed - see detailed errors above"
}
}));
}
if errors.is_empty() {
@ -185,12 +236,17 @@ fn add_schema_resource(
compiler: &mut Compiler,
schema_id: &str,
mut schema_value: Value,
strict: bool,
schema_type: SchemaType,
errors: &mut Vec<Value>
) -> Result<(), String> {
// Apply strict validation to all objects in the schema if requested
if strict {
apply_strict_validation(&mut schema_value);
// Apply strict validation based on schema type
match schema_type {
SchemaType::Enum | SchemaType::PrivatePunc => {
// Enums and private puncs don't need strict validation
},
SchemaType::Type | SchemaType::PublicPunc => {
apply_strict_validation(&mut schema_value, schema_type);
}
}
// Use schema_id directly - simple IDs like "entity", "user", "punc.request"
@ -256,22 +312,27 @@ fn compile_all_schemas(
//
// This recursively adds unevaluatedProperties: false to object-type schemas,
// but SKIPS schemas inside if/then/else to avoid breaking conditional validation.
fn apply_strict_validation(schema: &mut Value) {
apply_strict_validation_recursive(schema, false);
// For type schemas, it skips the top level to allow inheritance.
fn apply_strict_validation(schema: &mut Value, schema_type: SchemaType) {
apply_strict_validation_recursive(schema, false, schema_type, true);
}
fn apply_strict_validation_recursive(schema: &mut Value, inside_conditional: bool) {
fn apply_strict_validation_recursive(schema: &mut Value, inside_conditional: bool, schema_type: SchemaType, is_top_level: bool) {
match schema {
Value::Object(map) => {
// Skip adding strict validation if we're inside a conditional
if !inside_conditional {
// Add strict validation to object schemas only at top level
if let Some(Value::String(t)) = map.get("type") {
if t == "object" && !map.contains_key("unevaluatedProperties") && !map.contains_key("additionalProperties") {
// At top level, use unevaluatedProperties: false
// This considers all evaluated properties from all schemas
map.insert("unevaluatedProperties".to_string(), Value::Bool(false));
}
// OR if we're at the top level of a type schema (types should be extensible)
let skip_strict = inside_conditional || (matches!(schema_type, SchemaType::Type) && is_top_level);
if !skip_strict {
// Apply unevaluatedProperties: false to schemas that have $ref OR type: "object"
let has_ref = map.contains_key("$ref");
let has_object_type = map.get("type").and_then(|v| v.as_str()) == Some("object");
if (has_ref || has_object_type) && !map.contains_key("unevaluatedProperties") && !map.contains_key("additionalProperties") {
// Use unevaluatedProperties: false to prevent extra properties
// This considers all evaluated properties from all schemas including refs
map.insert("unevaluatedProperties".to_string(), Value::Bool(false));
}
}
@ -279,13 +340,13 @@ fn apply_strict_validation_recursive(schema: &mut Value, inside_conditional: boo
for (key, value) in map.iter_mut() {
// Mark when we're inside conditional branches
let in_conditional = inside_conditional || matches!(key.as_str(), "if" | "then" | "else");
apply_strict_validation_recursive(value, in_conditional);
apply_strict_validation_recursive(value, in_conditional, schema_type, false)
}
}
Value::Array(arr) => {
// Recurse into array items
for item in arr.iter_mut() {
apply_strict_validation_recursive(item, inside_conditional);
apply_strict_validation_recursive(item, inside_conditional, schema_type, false);
}
}
_ => {}
@ -957,6 +1018,16 @@ pub mod pg_test {
}
}
#[cfg(any(test, feature = "pg_test"))]
mod helpers {
include!("helpers.rs");
}
#[cfg(any(test, feature = "pg_test"))]
mod schemas {
include!("schemas.rs");
}
#[cfg(any(test, feature = "pg_test"))]
#[pg_schema]
mod tests {