From 4b6ea6536cc8f492f32298fef36c424bb0782b89 Mon Sep 17 00:00:00 2001 From: Alex Groleau Date: Fri, 10 Oct 2025 17:35:57 -0400 Subject: [PATCH] added type family support --- GEMINI.md | 24 ++++++++++++++++- out.txt | 44 ------------------------------- src/lib.rs | 38 ++++++++++++++++++++++++--- src/schemas.rs | 71 +++++++++++++++++++++++++++++++++++++++++++++++++- src/tests.rs | 52 +++++++++++++++++++++++++++++++++++- 5 files changed, 178 insertions(+), 51 deletions(-) delete mode 100644 out.txt diff --git a/GEMINI.md b/GEMINI.md index 5fcf101..5f06e49 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -10,7 +10,29 @@ This document outlines the purpose of the `jspg` project, its architecture, and The extension is designed for high-performance scenarios where schemas are defined once and used many times for validation. It achieves this through an in-memory cache. -1. **Caching:** A user first calls the `cache_json_schemas(enums, types, puncs)` SQL function. This function takes arrays of JSON objects representing different kinds of schemas within a larger application framework. It uses the vendored `boon` crate to compile all these schemas into an efficient internal format and stores them in a static, in-memory `SCHEMA_CACHE`. This cache is managed by a `RwLock` to allow concurrent reads during validation. +1. **Caching and Pre-processing:** A user first calls the `cache_json_schemas(enums, types, puncs)` SQL function. This function takes arrays of JSON objects representing different kinds of schemas: + - `enums`: Standalone enum schemas (e.g., for a `task_priority` list). + - `types`: Schemas for core application data models (e.g., `person`, `organization`). These may contain a `hierarchy` array for inheritance information. + - `puncs`: Schemas for API/function-specific requests and responses. + + Before compiling, `jspg` performs a crucial **pre-processing step** for type hierarchies. It inspects each definition in the `types` array. If a type includes a `hierarchy` array (e.g., a `person` type with `["entity", "organization", "user", "person"]`), `jspg` uses this to build a map of "type families." + + From this map, it generates new, virtual schemas on the fly. For example, for the `organization` type, it will generate a schema with `$id: "organization.family"` that contains an `enum` of all its descendant types, such as `["organization", "user", 'person"]`. + + This allows developers to write more flexible schemas. Instead of strictly requiring a `const` type, you can validate against an entire inheritance chain: + + ```json + // In an "organization" schema definition + "properties": { + "type": { + // Allows the 'type' field to be "organization", "user", or "person" + "$ref": "organization.family", + "override": true + } + } + ``` + + Finally, all user-defined schemas and the newly generated `.family` schemas are passed to the vendored `boon` crate, compiled into an efficient internal format, and stored in a static, in-memory `SCHEMA_CACHE`. This cache is managed by a `RwLock` to allow for high-performance, concurrent reads during validation. 2. **Validation:** The `validate_json_schema(schema_id, instance)` SQL function is then used to validate a JSONB `instance` against a specific, pre-cached schema identified by its `$id`. This function looks up the compiled schema in the cache and runs the validation, returning a success response or a detailed error report. diff --git a/out.txt b/out.txt deleted file mode 100644 index a571398..0000000 --- a/out.txt +++ /dev/null @@ -1,44 +0,0 @@ - -running 23 tests - Building extension with features pg_test pg17 - Running command "/opt/homebrew/bin/cargo" "build" "--lib" "--features" "pg_test pg17" "--message-format=json-render-diagnostics" - Installing extension - Copying control file to /opt/homebrew/share/postgresql@17/extension/jspg.control - Copying shared library to /opt/homebrew/lib/postgresql@17/jspg.dylib - Finished installing jspg -test tests::pg_test_cache_invalid ... ok -test tests::pg_test_validate_nested_req_deps ... ok -test tests::pg_test_validate_format_empty_string_with_ref ... ok -test tests::pg_test_validate_format_normal ... ok -test tests::pg_test_validate_format_empty_string ... ok -test tests::pg_test_validate_dependencies ... ok -test tests::pg_test_validate_dependencies_merging ... ok -test tests::pg_test_validate_additional_properties ... ok -test tests::pg_test_validate_enum_schema ... ok -test tests::pg_test_validate_errors ... ok -test tests::pg_test_validate_not_cached ... ok -test tests::pg_test_validate_oneof ... ok -test tests::pg_test_validate_punc_with_refs ... ok -test tests::pg_test_validate_property_merging ... ok -test tests::pg_test_validate_punc_local_refs ... ok -test tests::pg_test_validate_required_merging ... ok -test tests::pg_test_validate_required ... ok -test tests::pg_test_validate_simple ... ok -test tests::pg_test_validate_root_types ... ok -test tests::pg_test_validate_strict ... ok -test tests::pg_test_validate_title_override ... ok -test tests::pg_test_validate_unevaluated_properties ... ok -test tests::pg_test_validate_type_matching ... ok - -test result: ok. 23 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 7.66s - - -running 0 tests - -test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s - - -running 0 tests - -test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s - diff --git a/src/lib.rs b/src/lib.rs index 9327558..a6e8ff3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,12 +7,13 @@ use lazy_static::lazy_static; use serde_json::{json, Value, Number}; use std::borrow::Cow; use std::collections::hash_map::Entry; -use std::{collections::HashMap, sync::RwLock}; +use std::{collections::{HashMap, HashSet}, sync::RwLock}; #[derive(Clone, Copy, Debug, PartialEq)] enum SchemaType { Enum, Type, + Family, // Added for generated hierarchy schemas PublicPunc, PrivatePunc, } @@ -20,7 +21,6 @@ enum SchemaType { struct Schema { index: SchemaIndex, t: SchemaType, - value: Value, } struct Cache { @@ -77,9 +77,11 @@ fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB { } } - // Phase 2: Types + // Phase 2: Types & Hierarchy Pre-processing + let mut hierarchy_map: HashMap> = HashMap::new(); if let Some(types_array) = types_value.as_array() { for type_row in types_array { + // Process main schemas for the type if let Some(schemas_raw) = type_row.get("schemas") { if let Some(schemas_array) = schemas_raw.as_array() { for schema_def in schemas_array { @@ -89,9 +91,37 @@ fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB { } } } + + // Process hierarchy to build .family enums + if let Some(type_name) = type_row.get("name").and_then(|v| v.as_str()) { + if let Some(hierarchy_raw) = type_row.get("hierarchy") { + if let Some(hierarchy_array) = hierarchy_raw.as_array() { + for ancestor_val in hierarchy_array { + if let Some(ancestor_name) = ancestor_val.as_str() { + hierarchy_map + .entry(ancestor_name.to_string()) + .or_default() + .insert(type_name.to_string()); + } + } + } + } + } } } + // Generate and add the .family schemas + for (base_type, descendant_types) in hierarchy_map { + let family_schema_id = format!("{}.family", base_type); + let enum_values: Vec = descendant_types.into_iter().collect(); + let family_schema = json!({ + "$id": family_schema_id, + "type": "string", + "enum": enum_values + }); + schemas_to_compile.push((family_schema_id, family_schema, SchemaType::Family)); + } + // Phase 3: Puncs if let Some(puncs_array) = puncs_value.as_array() { for punc_row in puncs_array { @@ -166,7 +196,7 @@ fn compile_all_schemas( for (id, value, schema_type) in schemas_to_compile { match compiler.compile(id, &mut cache.schemas) { Ok(index) => { - cache.map.insert(id.clone(), Schema { index, t: *schema_type, value: value.clone() }); + cache.map.insert(id.clone(), Schema { index, t: *schema_type }); } Err(e) => { match &e { diff --git a/src/schemas.rs b/src/schemas.rs index 36018e0..1e20562 100644 --- a/src/schemas.rs +++ b/src/schemas.rs @@ -1056,4 +1056,73 @@ pub fn nullable_union_schemas() -> JsonB { }]); cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs)) -} \ No newline at end of file +} + +pub fn hierarchy_schemas() -> JsonB { + let enums = json!([]); + let types = json!([ + { + "name": "entity", + "hierarchy": ["entity"], + "schemas": [{ + "$id": "entity", + "type": "object", + "properties": { + "id": { "type": "string" }, + "type": { "$ref": "entity.family", "override": true } + }, + "required": ["id", "type"] + }] + }, + { + "name": "organization", + "hierarchy": ["entity", "organization"], + "schemas": [{ + "$id": "organization", + "$ref": "entity", + "properties": { + "type": { "$ref": "organization.family", "override": true }, + "name": { "type": "string" } + }, + "required": ["name"] + }] + }, + { + "name": "user", + "hierarchy": ["entity", "organization", "user"], + "schemas": [{ + "$id": "user", + "$ref": "organization", + "properties": { + "type": { "$ref": "user.family", "override": true }, + "password": { "type": "string" } + }, + "required": ["password"] + }] + }, + { + "name": "person", + "hierarchy": ["entity", "organization", "user", "person"], + "schemas": [{ + "$id": "person", + "$ref": "user", + "properties": { + "type": { "$ref": "person.family", "override": true }, + "first_name": { "type": "string" } + }, + "required": ["first_name"] + }] + } + ]); + + let puncs = json!([{ + "name": "test_org_punc", + "public": false, + "schemas": [{ + "$id": "test_org_punc.request", + "$ref": "organization" + }] + }]); + + cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs)) +} diff --git a/src/tests.rs b/src/tests.rs index ea5ef10..746a4b2 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1036,4 +1036,54 @@ fn test_validate_nullable_union() { let result_invalid = validate_json_schema("nullable_union_test.request", jsonb(invalid_string)); assert_failure(&result_invalid); assert_has_error(&result_invalid, "TYPE_MISMATCH", "/nullable_prop"); -} \ No newline at end of file +} + +#[pg_test] +fn test_validate_type_hierarchy() { + clear_json_schemas(); + let cache_result = hierarchy_schemas(); + assert_success(&cache_result); + + // 1. Test success case: validating a derived type (person) against a base schema (organization) + let person_instance = json!({ + "id": "person-id", + "type": "person", + "name": "person-name", + "password": "person-password", + "first_name": "person-first-name" + }); + let result_success = validate_json_schema("organization", jsonb(person_instance.clone())); + assert_success(&result_success); + + // 2. Test success case: validating a base type (organization) against its own schema + let org_instance = json!({ + "id": "org-id", + "type": "organization", + "name": "org-name" + }); + let result_org_success = validate_json_schema("organization", jsonb(org_instance)); + assert_success(&result_org_success); + + // 3. Test failure case: validating an ancestor type (entity) against a derived schema (organization) + let entity_instance = json!({ + "id": "entity-id", + "type": "entity" + }); + let result_fail_ancestor = validate_json_schema("organization", jsonb(entity_instance)); + assert_failure(&result_fail_ancestor); + assert_has_error(&result_fail_ancestor, "ENUM_VIOLATED", "/type"); + + // 4. Test failure case: validating a completely unrelated type + let unrelated_instance = json!({ + "id": "job-id", + "type": "job", + "name": "job-name" + }); + let result_fail_unrelated = validate_json_schema("organization", jsonb(unrelated_instance)); + assert_failure(&result_fail_unrelated); + assert_has_error(&result_fail_unrelated, "ENUM_VIOLATED", "/type"); + + // 5. Test that the punc using the schema also works + let punc_success = validate_json_schema("test_org_punc.request", jsonb(person_instance.clone())); + assert_success(&punc_success); +}