added type family support

This commit is contained in:
2025-10-10 17:35:57 -04:00
parent d8a924c662
commit 4b6ea6536c
5 changed files with 178 additions and 51 deletions

View File

@ -10,7 +10,29 @@ This document outlines the purpose of the `jspg` project, its architecture, and
The extension is designed for high-performance scenarios where schemas are defined once and used many times for validation. It achieves this through an in-memory cache.
1. **Caching:** A user first calls the `cache_json_schemas(enums, types, puncs)` SQL function. This function takes arrays of JSON objects representing different kinds of schemas within a larger application framework. It uses the vendored `boon` crate to compile all these schemas into an efficient internal format and stores them in a static, in-memory `SCHEMA_CACHE`. This cache is managed by a `RwLock` to allow concurrent reads during validation.
1. **Caching and Pre-processing:** A user first calls the `cache_json_schemas(enums, types, puncs)` SQL function. This function takes arrays of JSON objects representing different kinds of schemas:
- `enums`: Standalone enum schemas (e.g., for a `task_priority` list).
- `types`: Schemas for core application data models (e.g., `person`, `organization`). These may contain a `hierarchy` array for inheritance information.
- `puncs`: Schemas for API/function-specific requests and responses.
Before compiling, `jspg` performs a crucial **pre-processing step** for type hierarchies. It inspects each definition in the `types` array. If a type includes a `hierarchy` array (e.g., a `person` type with `["entity", "organization", "user", "person"]`), `jspg` uses this to build a map of "type families."
From this map, it generates new, virtual schemas on the fly. For example, for the `organization` type, it will generate a schema with `$id: "organization.family"` that contains an `enum` of all its descendant types, such as `["organization", "user", 'person"]`.
This allows developers to write more flexible schemas. Instead of strictly requiring a `const` type, you can validate against an entire inheritance chain:
```json
// In an "organization" schema definition
"properties": {
"type": {
// Allows the 'type' field to be "organization", "user", or "person"
"$ref": "organization.family",
"override": true
}
}
```
Finally, all user-defined schemas and the newly generated `.family` schemas are passed to the vendored `boon` crate, compiled into an efficient internal format, and stored in a static, in-memory `SCHEMA_CACHE`. This cache is managed by a `RwLock` to allow for high-performance, concurrent reads during validation.
2. **Validation:** The `validate_json_schema(schema_id, instance)` SQL function is then used to validate a JSONB `instance` against a specific, pre-cached schema identified by its `$id`. This function looks up the compiled schema in the cache and runs the validation, returning a success response or a detailed error report.

44
out.txt
View File

@ -1,44 +0,0 @@
running 23 tests
 Building extension with features pg_test pg17
 Running command "/opt/homebrew/bin/cargo" "build" "--lib" "--features" "pg_test pg17" "--message-format=json-render-diagnostics"
 Installing extension
 Copying control file to /opt/homebrew/share/postgresql@17/extension/jspg.control
 Copying shared library to /opt/homebrew/lib/postgresql@17/jspg.dylib
 Finished installing jspg
test tests::pg_test_cache_invalid ... ok
test tests::pg_test_validate_nested_req_deps ... ok
test tests::pg_test_validate_format_empty_string_with_ref ... ok
test tests::pg_test_validate_format_normal ... ok
test tests::pg_test_validate_format_empty_string ... ok
test tests::pg_test_validate_dependencies ... ok
test tests::pg_test_validate_dependencies_merging ... ok
test tests::pg_test_validate_additional_properties ... ok
test tests::pg_test_validate_enum_schema ... ok
test tests::pg_test_validate_errors ... ok
test tests::pg_test_validate_not_cached ... ok
test tests::pg_test_validate_oneof ... ok
test tests::pg_test_validate_punc_with_refs ... ok
test tests::pg_test_validate_property_merging ... ok
test tests::pg_test_validate_punc_local_refs ... ok
test tests::pg_test_validate_required_merging ... ok
test tests::pg_test_validate_required ... ok
test tests::pg_test_validate_simple ... ok
test tests::pg_test_validate_root_types ... ok
test tests::pg_test_validate_strict ... ok
test tests::pg_test_validate_title_override ... ok
test tests::pg_test_validate_unevaluated_properties ... ok
test tests::pg_test_validate_type_matching ... ok
test result: ok. 23 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 7.66s
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s

View File

@ -7,12 +7,13 @@ use lazy_static::lazy_static;
use serde_json::{json, Value, Number};
use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::{collections::HashMap, sync::RwLock};
use std::{collections::{HashMap, HashSet}, sync::RwLock};
#[derive(Clone, Copy, Debug, PartialEq)]
enum SchemaType {
Enum,
Type,
Family, // Added for generated hierarchy schemas
PublicPunc,
PrivatePunc,
}
@ -20,7 +21,6 @@ enum SchemaType {
struct Schema {
index: SchemaIndex,
t: SchemaType,
value: Value,
}
struct Cache {
@ -77,9 +77,11 @@ fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB {
}
}
// Phase 2: Types
// Phase 2: Types & Hierarchy Pre-processing
let mut hierarchy_map: HashMap<String, HashSet<String>> = HashMap::new();
if let Some(types_array) = types_value.as_array() {
for type_row in types_array {
// Process main schemas for the type
if let Some(schemas_raw) = type_row.get("schemas") {
if let Some(schemas_array) = schemas_raw.as_array() {
for schema_def in schemas_array {
@ -89,9 +91,37 @@ fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB {
}
}
}
// Process hierarchy to build .family enums
if let Some(type_name) = type_row.get("name").and_then(|v| v.as_str()) {
if let Some(hierarchy_raw) = type_row.get("hierarchy") {
if let Some(hierarchy_array) = hierarchy_raw.as_array() {
for ancestor_val in hierarchy_array {
if let Some(ancestor_name) = ancestor_val.as_str() {
hierarchy_map
.entry(ancestor_name.to_string())
.or_default()
.insert(type_name.to_string());
}
}
}
}
}
}
}
// Generate and add the .family schemas
for (base_type, descendant_types) in hierarchy_map {
let family_schema_id = format!("{}.family", base_type);
let enum_values: Vec<String> = descendant_types.into_iter().collect();
let family_schema = json!({
"$id": family_schema_id,
"type": "string",
"enum": enum_values
});
schemas_to_compile.push((family_schema_id, family_schema, SchemaType::Family));
}
// Phase 3: Puncs
if let Some(puncs_array) = puncs_value.as_array() {
for punc_row in puncs_array {
@ -166,7 +196,7 @@ fn compile_all_schemas(
for (id, value, schema_type) in schemas_to_compile {
match compiler.compile(id, &mut cache.schemas) {
Ok(index) => {
cache.map.insert(id.clone(), Schema { index, t: *schema_type, value: value.clone() });
cache.map.insert(id.clone(), Schema { index, t: *schema_type });
}
Err(e) => {
match &e {

View File

@ -1057,3 +1057,72 @@ pub fn nullable_union_schemas() -> JsonB {
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn hierarchy_schemas() -> JsonB {
let enums = json!([]);
let types = json!([
{
"name": "entity",
"hierarchy": ["entity"],
"schemas": [{
"$id": "entity",
"type": "object",
"properties": {
"id": { "type": "string" },
"type": { "$ref": "entity.family", "override": true }
},
"required": ["id", "type"]
}]
},
{
"name": "organization",
"hierarchy": ["entity", "organization"],
"schemas": [{
"$id": "organization",
"$ref": "entity",
"properties": {
"type": { "$ref": "organization.family", "override": true },
"name": { "type": "string" }
},
"required": ["name"]
}]
},
{
"name": "user",
"hierarchy": ["entity", "organization", "user"],
"schemas": [{
"$id": "user",
"$ref": "organization",
"properties": {
"type": { "$ref": "user.family", "override": true },
"password": { "type": "string" }
},
"required": ["password"]
}]
},
{
"name": "person",
"hierarchy": ["entity", "organization", "user", "person"],
"schemas": [{
"$id": "person",
"$ref": "user",
"properties": {
"type": { "$ref": "person.family", "override": true },
"first_name": { "type": "string" }
},
"required": ["first_name"]
}]
}
]);
let puncs = json!([{
"name": "test_org_punc",
"public": false,
"schemas": [{
"$id": "test_org_punc.request",
"$ref": "organization"
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}

View File

@ -1037,3 +1037,53 @@ fn test_validate_nullable_union() {
assert_failure(&result_invalid);
assert_has_error(&result_invalid, "TYPE_MISMATCH", "/nullable_prop");
}
#[pg_test]
fn test_validate_type_hierarchy() {
clear_json_schemas();
let cache_result = hierarchy_schemas();
assert_success(&cache_result);
// 1. Test success case: validating a derived type (person) against a base schema (organization)
let person_instance = json!({
"id": "person-id",
"type": "person",
"name": "person-name",
"password": "person-password",
"first_name": "person-first-name"
});
let result_success = validate_json_schema("organization", jsonb(person_instance.clone()));
assert_success(&result_success);
// 2. Test success case: validating a base type (organization) against its own schema
let org_instance = json!({
"id": "org-id",
"type": "organization",
"name": "org-name"
});
let result_org_success = validate_json_schema("organization", jsonb(org_instance));
assert_success(&result_org_success);
// 3. Test failure case: validating an ancestor type (entity) against a derived schema (organization)
let entity_instance = json!({
"id": "entity-id",
"type": "entity"
});
let result_fail_ancestor = validate_json_schema("organization", jsonb(entity_instance));
assert_failure(&result_fail_ancestor);
assert_has_error(&result_fail_ancestor, "ENUM_VIOLATED", "/type");
// 4. Test failure case: validating a completely unrelated type
let unrelated_instance = json!({
"id": "job-id",
"type": "job",
"name": "job-name"
});
let result_fail_unrelated = validate_json_schema("organization", jsonb(unrelated_instance));
assert_failure(&result_fail_unrelated);
assert_has_error(&result_fail_unrelated, "ENUM_VIOLATED", "/type");
// 5. Test that the punc using the schema also works
let punc_success = validate_json_schema("test_org_punc.request", jsonb(person_instance.clone()));
assert_success(&punc_success);
}