added keyword to jspg

This commit is contained in:
2026-02-26 15:46:01 -05:00
parent 7b55277116
commit bc5489b1ea
11 changed files with 1404 additions and 1532 deletions

View File

@ -9,7 +9,7 @@ It is designed to serve as the validation engine for the "Punc" architecture, wh
1. **Draft 2020-12 Compliance**: Attempt to adhere to the official JSON Schema Draft 2020-12 specification. 1. **Draft 2020-12 Compliance**: Attempt to adhere to the official JSON Schema Draft 2020-12 specification.
2. **Ultra-Fast Validation**: Compile schemas into an optimized in-memory representation for near-instant validation during high-throughput workloads. 2. **Ultra-Fast Validation**: Compile schemas into an optimized in-memory representation for near-instant validation during high-throughput workloads.
3. **Connection-Bound Caching**: Leverage the PostgreSQL session lifecycle to maintain a per-connection schema cache, eliminating the need for repetitive parsing. 3. **Connection-Bound Caching**: Leverage the PostgreSQL session lifecycle to maintain a per-connection schema cache, eliminating the need for repetitive parsing.
4. **Structural Inheritance**: Support object-oriented schema design via Implicit Keyword Shadowing and virtual `.family` schemas. 4. **Structural Inheritance**: Support object-oriented schema design via Implicit Keyword Shadowing and virtual `$family` references.
5. **Punc Integration**: validation is aware of the "Punc" context (request/response) and can validate `cue` objects efficiently. 5. **Punc Integration**: validation is aware of the "Punc" context (request/response) and can validate `cue` objects efficiently.
## 🔌 API Reference ## 🔌 API Reference
@ -27,7 +27,7 @@ Loads and compiles the entire schema registry into the session's memory, atomica
* **Behavior**: * **Behavior**:
* Parses all inputs into an internal schema graph. * Parses all inputs into an internal schema graph.
* Resolves all internal references (`$ref`). * Resolves all internal references (`$ref`).
* Generates virtual `.family` schemas for type hierarchies. * Generates virtual union schemas for type hierarchies referenced via `$family`.
* Compiles schemas into validators. * Compiles schemas into validators.
* **Returns**: `{"response": "success"}` or an error object. * **Returns**: `{"response": "success"}` or an error object.
@ -78,10 +78,10 @@ Standard JSON Schema composition (`allOf`) is additive (Intersection), meaning c
* **Composition (`allOf`)**: When using `allOf`, standard intersection rules apply. No shadowing occurs; all constraints from all branches must pass. This is used for mixins or interfaces. * **Composition (`allOf`)**: When using `allOf`, standard intersection rules apply. No shadowing occurs; all constraints from all branches must pass. This is used for mixins or interfaces.
### 2. Virtual Family Schemas (`.family`) ### 2. Virtual Family References (`$family`)
To support polymorphic fields (e.g., a field that accepts any "User" type), JSPG generates virtual schemas representing type hierarchies. To support polymorphic fields (e.g., a field that accepts any "User" type), JSPG generates virtual schemas representing type hierarchies.
* **Mechanism**: When caching types, if a type defines a `hierarchy` (e.g., `["entity", "organization", "person"]`), JSPG generates a schema like `organization.family` which is a `oneOf` containing refs to all valid descendants. * **Mechanism**: When caching types, if a type defines a `hierarchy` (e.g., `["entity", "organization", "person"]`), JSPG generates a virtual `oneOf` family containing refs to all valid descendants. These can be pointed to exclusively by using `{"$family": "organization"}`. Because `$family` is a macro-pointer that swaps in the virtual union, it **must** be used exclusively in its schema object; you cannot define other properties alongside it.
### 3. Strict by Default & Extensibility ### 3. Strict by Default & Extensibility
JSPG enforces a "Secure by Default" philosophy. All schemas are treated as if `unevaluatedProperties: false` (and `unevaluatedItems: false`) is set, unless explicitly overridden. JSPG enforces a "Secure by Default" philosophy. All schemas are treated as if `unevaluatedProperties: false` (and `unevaluatedItems: false`) is set, unless explicitly overridden.

118
src/context.rs Normal file
View File

@ -0,0 +1,118 @@
use crate::error::ValidationError;
use crate::instance::ValidationInstance;
use crate::result::ValidationResult;
use crate::schema::Schema;
use crate::validator::Validator;
use std::collections::HashSet;
pub struct ValidationContext<'a, I: ValidationInstance<'a>> {
pub validator: &'a Validator,
pub root: &'a Schema,
pub schema: &'a Schema,
pub instance: I,
pub path: String,
pub depth: usize,
pub scope: Vec<String>,
pub overrides: HashSet<String>,
pub extensible: bool,
pub reporter: bool,
}
impl<'a, I: ValidationInstance<'a>> ValidationContext<'a, I> {
pub fn new(
validator: &'a Validator,
root: &'a Schema,
schema: &'a Schema,
instance: I,
scope: Vec<String>,
overrides: HashSet<String>,
extensible: bool,
reporter: bool,
) -> Self {
let effective_extensible = schema.extensible.unwrap_or(extensible);
Self {
validator,
root,
schema,
instance,
path: String::new(),
depth: 0,
scope,
overrides,
extensible: effective_extensible,
reporter,
}
}
pub fn derive(
&self,
schema: &'a Schema,
instance: I,
path: &str,
scope: Vec<String>,
overrides: HashSet<String>,
extensible: bool,
reporter: bool,
) -> Self {
let effective_extensible = schema.extensible.unwrap_or(extensible);
Self {
validator: self.validator,
root: self.root,
schema,
instance,
path: path.to_string(),
depth: self.depth + 1,
scope,
overrides,
extensible: effective_extensible,
reporter,
}
}
pub fn derive_for_schema(&self, schema: &'a Schema, reporter: bool) -> Self {
self.derive(
schema,
self.instance,
&self.path,
self.scope.clone(),
HashSet::new(),
self.extensible,
reporter,
)
}
pub fn validate(&self) -> Result<ValidationResult, ValidationError> {
let mut effective_scope = self.scope.clone();
if let Some(id) = &self.schema.obj.id {
let current_base = self.scope.last().map(|s| s.as_str()).unwrap_or("");
let mut new_base = id.clone();
if !current_base.is_empty() {
if let Ok(base_url) = url::Url::parse(current_base) {
if let Ok(joined) = base_url.join(id) {
new_base = joined.to_string();
}
}
}
effective_scope.push(new_base);
let shadow = ValidationContext {
validator: self.validator,
root: self.root,
schema: self.schema,
instance: self.instance,
path: self.path.clone(),
depth: self.depth,
scope: effective_scope,
overrides: self.overrides.clone(),
extensible: self.extensible,
reporter: self.reporter,
};
return shadow.validate_scoped();
}
self.validate_scoped()
}
}

6
src/error.rs Normal file
View File

@ -0,0 +1,6 @@
#[derive(Debug, Clone, serde::Serialize)]
pub struct ValidationError {
pub code: String,
pub message: String,
pub path: String,
}

98
src/instance.rs Normal file
View File

@ -0,0 +1,98 @@
use serde_json::Value;
use std::collections::HashSet;
use std::ptr::NonNull;
pub trait ValidationInstance<'a>: Copy + Clone {
fn as_value(&self) -> &'a Value;
fn child_at_key(&self, key: &str) -> Option<Self>;
fn child_at_index(&self, idx: usize) -> Option<Self>;
fn prune_object(&self, _keys: &HashSet<String>) {}
fn prune_array(&self, _indices: &HashSet<usize>) {}
}
#[derive(Clone, Copy)]
pub struct ReadOnlyInstance<'a>(pub &'a Value);
impl<'a> ValidationInstance<'a> for ReadOnlyInstance<'a> {
fn as_value(&self) -> &'a Value {
self.0
}
fn child_at_key(&self, key: &str) -> Option<Self> {
self.0.get(key).map(ReadOnlyInstance)
}
fn child_at_index(&self, idx: usize) -> Option<Self> {
self.0.get(idx).map(ReadOnlyInstance)
}
}
#[derive(Clone, Copy)]
pub struct MutableInstance {
ptr: NonNull<Value>,
}
impl MutableInstance {
pub fn new(val: &mut Value) -> Self {
Self {
ptr: NonNull::from(val),
}
}
}
impl<'a> ValidationInstance<'a> for MutableInstance {
fn as_value(&self) -> &'a Value {
unsafe { self.ptr.as_ref() }
}
fn child_at_key(&self, key: &str) -> Option<Self> {
unsafe {
if let Some(obj) = self.ptr.as_ref().as_object() {
if obj.contains_key(key) {
let parent_mut = &mut *self.ptr.as_ptr();
if let Some(child_val) = parent_mut.get_mut(key) {
return Some(MutableInstance::new(child_val));
}
}
}
None
}
}
fn child_at_index(&self, idx: usize) -> Option<Self> {
unsafe {
if let Some(arr) = self.ptr.as_ref().as_array() {
if idx < arr.len() {
let parent_mut = &mut *self.ptr.as_ptr();
if let Some(child_val) = parent_mut.get_mut(idx) {
return Some(MutableInstance::new(child_val));
}
}
}
None
}
}
fn prune_object(&self, keys: &HashSet<String>) {
unsafe {
let val_mut = &mut *self.ptr.as_ptr();
if let Some(obj) = val_mut.as_object_mut() {
obj.retain(|k, _| keys.contains(k));
}
}
}
fn prune_array(&self, indices: &HashSet<usize>) {
unsafe {
let val_mut = &mut *self.ptr.as_ptr();
if let Some(arr) = val_mut.as_array_mut() {
let mut i = 0;
arr.retain(|_| {
let keep = indices.contains(&i);
i += 1;
keep
});
}
}
}
}

View File

@ -11,8 +11,13 @@ mod schema;
pub mod util; pub mod util;
mod validator; mod validator;
use crate::schema::Schema; pub mod context;
use serde_json::{Value, json}; pub mod error;
pub mod instance;
pub mod result;
pub(crate) mod rules;
use serde_json::json;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
lazy_static::lazy_static! { lazy_static::lazy_static! {
@ -26,79 +31,12 @@ lazy_static::lazy_static! {
#[pg_extern(strict)] #[pg_extern(strict)]
pub fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB { pub fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB {
// 1. Build a new Registry LOCALLY (on stack) // 1 & 2. Build Registry, Families, and Wrap in Validator all in one shot
let mut registry = registry::Registry::new(); let new_validator = crate::validator::Validator::from_punc_definition(
Some(&enums.0),
// Generate Family Schemas from Types Some(&types.0),
{ Some(&puncs.0),
let mut family_map: std::collections::HashMap<String, std::collections::HashSet<String>> = );
std::collections::HashMap::new();
if let Value::Array(arr) = &types.0 {
for item in arr {
if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) {
for ancestor in hierarchy {
if let Some(anc_str) = ancestor.as_str() {
family_map
.entry(anc_str.to_string())
.or_default()
.insert(name.to_string());
}
}
}
}
}
}
for (family_name, members) in family_map {
let id = format!("{}.family", family_name);
// Object Union (for polymorphic object validation)
// This allows the schema to match ANY of the types in the family hierarchy
let object_refs: Vec<Value> = members.iter().map(|s| json!({ "$ref": s })).collect();
let schema_json = json!({
"$id": id,
"oneOf": object_refs
});
if let Ok(schema) = serde_json::from_value::<Schema>(schema_json) {
registry.add(schema);
}
}
// Helper to parse and cache a list of items
let mut cache_items = |items: JsonB| {
if let Value::Array(arr) = items.0 {
for item in arr {
// For now, we assume the item structure matches what the generator expects
// or what `json_schemas.sql` sends.
// The `Schema` struct in `schema.rs` is designed to deserialize standard JSON Schema.
// However, the input here is an array of objects that *contain* a `schemas` array.
// We need to extract those inner schemas.
if let Some(schemas_val) = item.get("schemas") {
if let Value::Array(schemas) = schemas_val {
for schema_val in schemas {
// Deserialize into our robust Schema struct to ensure validity/parsing
if let Ok(schema) = serde_json::from_value::<Schema>(schema_val.clone()) {
// Registry handles compilation
registry.add(schema);
}
}
}
}
}
}
};
cache_items(enums);
cache_items(types);
cache_items(puncs); // public/private distinction logic to come later
}
// 2. Wrap in Validator and Arc
let new_validator = validator::Validator::new(registry);
let new_arc = Arc::new(new_validator); let new_arc = Arc::new(new_validator);
// 3. ATOMIC SWAP // 3. ATOMIC SWAP

27
src/result.rs Normal file
View File

@ -0,0 +1,27 @@
use crate::error::ValidationError;
use std::collections::HashSet;
#[derive(Debug, Default, Clone, serde::Serialize)]
pub struct ValidationResult {
pub errors: Vec<ValidationError>,
#[serde(skip)]
pub evaluated_keys: HashSet<String>,
#[serde(skip)]
pub evaluated_indices: HashSet<usize>,
}
impl ValidationResult {
pub fn new() -> Self {
Self::default()
}
pub fn merge(&mut self, other: ValidationResult) {
self.errors.extend(other.errors);
self.evaluated_keys.extend(other.evaluated_keys);
self.evaluated_indices.extend(other.evaluated_indices);
}
pub fn is_valid(&self) -> bool {
self.errors.is_empty()
}
}

1008
src/rules.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -35,6 +35,9 @@ pub struct SchemaObject {
pub pattern_properties: Option<BTreeMap<String, Arc<Schema>>>, pub pattern_properties: Option<BTreeMap<String, Arc<Schema>>>,
#[serde(rename = "additionalProperties")] #[serde(rename = "additionalProperties")]
pub additional_properties: Option<Arc<Schema>>, pub additional_properties: Option<Arc<Schema>>,
#[serde(rename = "$family")]
pub family: Option<String>,
pub required: Option<Vec<String>>, pub required: Option<Vec<String>>,
// dependencies can be schema dependencies or property dependencies // dependencies can be schema dependencies or property dependencies

View File

@ -50,74 +50,12 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
let group = &suite[index]; let group = &suite[index];
let mut failures = Vec::<String>::new(); let mut failures = Vec::<String>::new();
// Create Local Registry for this test group // Create Validator Instance and parse enums, types, and puncs automatically
let mut registry = crate::registry::Registry::new(); let mut validator = Validator::from_punc_definition(
group.enums.as_ref(),
// Helper to register items with 'schemas' group.types.as_ref(),
let register_schemas = |registry: &mut crate::registry::Registry, items_val: Option<&Value>| { group.puncs.as_ref(),
if let Some(val) = items_val { );
if let Value::Array(arr) = val {
for item in arr {
if let Some(schemas_val) = item.get("schemas") {
if let Value::Array(schemas) = schemas_val {
for schema_val in schemas {
if let Ok(schema) =
serde_json::from_value::<crate::schema::Schema>(schema_val.clone())
{
registry.add(schema);
}
}
}
}
}
}
}
};
// 1. Register Family Schemas if 'types' is present
if let Some(types_val) = &group.types {
if let Value::Array(arr) = types_val {
let mut family_map: std::collections::HashMap<String, std::collections::HashSet<String>> =
std::collections::HashMap::new();
for item in arr {
if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) {
for ancestor in hierarchy {
if let Some(anc_str) = ancestor.as_str() {
family_map
.entry(anc_str.to_string())
.or_default()
.insert(name.to_string());
}
}
}
}
}
for (family_name, members) in family_map {
let id = format!("{}.family", family_name);
let object_refs: Vec<Value> = members
.iter()
.map(|s| serde_json::json!({ "$ref": s }))
.collect();
let schema_json = serde_json::json!({
"$id": id,
"oneOf": object_refs
});
if let Ok(schema) = serde_json::from_value::<crate::schema::Schema>(schema_json) {
registry.add(schema);
}
}
}
}
// 2. Register items directly
register_schemas(&mut registry, group.enums.as_ref());
register_schemas(&mut registry, group.types.as_ref());
register_schemas(&mut registry, group.puncs.as_ref());
// 3. Register root 'schemas' if present (generic test support) // 3. Register root 'schemas' if present (generic test support)
// Some tests use a raw 'schema' or 'schemas' field at the group level // Some tests use a raw 'schema' or 'schemas' field at the group level
@ -126,12 +64,12 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
Ok(mut schema) => { Ok(mut schema) => {
let id_clone = schema.obj.id.clone(); let id_clone = schema.obj.id.clone();
if id_clone.is_some() { if id_clone.is_some() {
registry.add(schema); validator.registry.add(schema);
} else { } else {
// Fallback ID if none provided in schema // Fallback ID if none provided in schema
let id = format!("test:{}:{}", path, index); let id = format!("test:{}:{}", path, index);
schema.obj.id = Some(id); schema.obj.id = Some(id);
registry.add(schema); validator.registry.add(schema);
} }
} }
Err(e) => { Err(e) => {
@ -143,9 +81,6 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
} }
} }
// Create Validator Instance (Takes ownership of registry)
let validator = Validator::new(registry);
// 4. Run Tests // 4. Run Tests
for (_test_index, test) in group.tests.iter().enumerate() { for (_test_index, test) in group.tests.iter().enumerate() {
let mut schema_id = test.schema_id.clone(); let mut schema_id = test.schema_id.clone();
@ -251,79 +186,13 @@ pub fn run_test_file(path: &str) -> Result<(), String> {
let mut failures = Vec::<String>::new(); let mut failures = Vec::<String>::new();
for (group_index, group) in suite.into_iter().enumerate() { for (group_index, group) in suite.into_iter().enumerate() {
// Create Isolated Registry for this test group // Create Validator Instance and parse enums, types, and puncs automatically
let mut registry = crate::registry::Registry::new(); let mut validator = Validator::from_punc_definition(
group.enums.as_ref(),
group.types.as_ref(),
group.puncs.as_ref(),
);
// Helper to register items with 'schemas'
let register_schemas = |registry: &mut crate::registry::Registry, items_val: Option<Value>| {
if let Some(val) = items_val {
if let Value::Array(arr) = val {
for item in arr {
if let Some(schemas_val) = item.get("schemas") {
if let Value::Array(schemas) = schemas_val {
for schema_val in schemas {
if let Ok(schema) =
serde_json::from_value::<crate::schema::Schema>(schema_val.clone())
{
registry.add(schema);
}
}
}
}
}
}
}
};
// 1. Register Family Schemas if 'types' is present
if let Some(types_val) = &group.types {
if let Value::Array(arr) = types_val {
let mut family_map: std::collections::HashMap<String, std::collections::HashSet<String>> =
std::collections::HashMap::new();
for item in arr {
if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
// Default hierarchy contains self if not specified?
// Usually hierarchy is explicit in these tests.
if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) {
for ancestor in hierarchy {
if let Some(anc_str) = ancestor.as_str() {
family_map
.entry(anc_str.to_string())
.or_default()
.insert(name.to_string());
}
}
}
}
}
for (family_name, members) in family_map {
let id = format!("{}.family", family_name);
let object_refs: Vec<Value> = members
.into_iter()
.map(|s| serde_json::json!({ "$ref": s }))
.collect();
let schema_json = serde_json::json!({
"$id": id,
"oneOf": object_refs
});
if let Ok(schema) = serde_json::from_value::<crate::schema::Schema>(schema_json) {
registry.add(schema);
}
}
}
}
// Register 'types', 'enums', and 'puncs' if present (JSPG style)
register_schemas(&mut registry, group.types);
register_schemas(&mut registry, group.enums);
register_schemas(&mut registry, group.puncs);
// Register main 'schema' if present (Standard style)
// Ensure ID is a valid URI to avoid Url::parse errors in Compiler
let unique_id = format!("test:{}:{}", path, group_index); let unique_id = format!("test:{}:{}", path, group_index);
// Register main 'schema' if present (Standard style) // Register main 'schema' if present (Standard style)
@ -336,12 +205,9 @@ pub fn run_test_file(path: &str) -> Result<(), String> {
if schema.obj.id.is_none() { if schema.obj.id.is_none() {
schema.obj.id = Some(unique_id.clone()); schema.obj.id = Some(unique_id.clone());
} }
registry.add(schema); validator.registry.add(schema);
} }
// Create Instance (Takes Ownership)
let validator = Validator::new(registry);
for test in group.tests { for test in group.tests {
// Use explicit schema_id from test, or default to unique_id // Use explicit schema_id from test, or default to unique_id
let schema_id = test.schema_id.as_deref().unwrap_or(&unique_id).to_string(); let schema_id = test.schema_id.as_deref().unwrap_or(&unique_id).to_string();

File diff suppressed because it is too large Load Diff

View File

@ -1067,7 +1067,7 @@
"schemas": [ "schemas": [
{ {
"$id": "polymorphic_org_punc.request", "$id": "polymorphic_org_punc.request",
"$ref": "organization.family" "$family": "organization"
} }
] ]
}, },
@ -1080,6 +1080,21 @@
"$ref": "organization" "$ref": "organization"
} }
] ]
},
{
"name": "invalid_family_punc",
"public": false,
"schemas": [
{
"$id": "invalid_family_punc.request",
"$family": "organization",
"properties": {
"extra": {
"type": "string"
}
}
}
]
} }
], ],
"tests": [ "tests": [
@ -1240,6 +1255,23 @@
"path": "/first_name" "path": "/first_name"
} }
] ]
},
{
"description": "invalid schema due to family exclusivity violation",
"schema_id": "invalid_family_punc.request",
"data": {
"id": "org-2",
"type": "organization",
"name": "Strict Corp",
"extra": "value"
},
"valid": false,
"expect_errors": [
{
"code": "INVALID_SCHEMA",
"path": ""
}
]
} }
] ]
}, },