From 4060119b01aba4d6e6ab7726680a9263993bbf24 Mon Sep 17 00:00:00 2001 From: Alex Groleau Date: Sun, 22 Mar 2026 03:35:47 -0400 Subject: [PATCH] schema ids can now contain a subschema --- GEMINI.md | 26 ++++-- src/database/mod.rs | 124 ++++++++++------------------ src/database/schema.rs | 183 +++++++++++++++++++++++++++++++++-------- 3 files changed, 209 insertions(+), 124 deletions(-) diff --git a/GEMINI.md b/GEMINI.md index dcc649c..6cf8386 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -20,9 +20,16 @@ JSPG operates by deeply integrating the JSON Schema Draft 2020-12 specification To support high-throughput operations while allowing for runtime updates (e.g., during hot-reloading), JSPG uses an **Atomic Swap** pattern: 1. **Parser Phase**: Schema JSONs are parsed into ordered `Schema` structs. 2. **Compiler Phase**: The database iterates all parsed schemas and pre-computes native optimization maps (Descendants Map, Depths Map, Variations Map). -3. **Immutable Validator**: The `Validator` struct immutably owns the `Database` registry and all its global maps. Schemas themselves are completely frozen; `$ref` strings are resolved dynamically at runtime using pre-computed O(1) maps. +3. **Immutable AST Caching**: The `Validator` struct immutably owns the `Database` registry. Schemas themselves are frozen structurally, but utilize `OnceLock` interior mutability during the Compilation Phase to permanently cache resolved `$ref` inheritances, properties, and `compiled_edges` directly onto their AST nodes. This guarantees strict `O(1)` relationship and property validation execution at runtime without locking or recursive DB polling. 4. **Lock-Free Reads**: Incoming operations acquire a read lock just long enough to clone the `Arc` inside an `RwLock>>`, ensuring zero blocking during schema updates. +### Global API Reference +These functions operate on the global `GLOBAL_JSPG` engine instance and provide administrative boundaries: + +* `jspg_setup(database jsonb) -> jsonb`: Initializes the engine. Deserializes the full database schema registry (types, enums, puncs, relations) from Postgres and compiles them into memory atomically. +* `jspg_teardown() -> jsonb`: Clears the current session's engine instance from `GLOBAL_JSPG`, resetting the cache. +* `jspg_schemas() -> jsonb`: Exports the fully compiled AST snapshot (including all inherited dependencies) out of `GLOBAL_JSPG` into standard JSON Schema representations. + --- ## 2. Validator @@ -30,10 +37,7 @@ To support high-throughput operations while allowing for runtime updates (e.g., The Validator provides strict, schema-driven evaluation for the "Punc" architecture. ### API Reference -* `jspg_setup(database jsonb) -> jsonb`: Loads and compiles the entire registry (types, enums, puncs, relations) atomically. -* `mask_json_schema(schema_id text, instance jsonb) -> jsonb`: Validates and prunes unknown properties dynamically, returning masked data. -* `jspg_validate(schema_id text, instance jsonb) -> jsonb`: Returns boolean-like success or structured errors. -* `jspg_teardown() -> jsonb`: Clears the current session's schema cache. +* `jspg_validate(schema_id text, instance jsonb) -> jsonb`: Validates the `instance` JSON payload strictly against the constraints of the registered `schema_id`. Returns boolean-like success or structured error codes. ### Custom Features & Deviations JSPG implements specific extensions to the Draft 2020-12 standard to support the Punc architecture's object-oriented needs while heavily optimizing for zero-runtime lookups. @@ -69,11 +73,14 @@ To simplify frontend form validation, format validators specifically for `uuid`, ## 3. Merger -The Merger provides an automated, high-performance graph synchronization engine via the `jspg_merge(cue JSONB)` API. It orchestrates the complex mapping of nested JSON objects into normalized Postgres relational tables, honoring all inheritance and graph constraints. +The Merger provides an automated, high-performance graph synchronization engine. It orchestrates the complex mapping of nested JSON objects into normalized Postgres relational tables, honoring all inheritance and graph constraints. + +### API Reference +* `jspg_merge(schema_id text, data jsonb) -> jsonb`: Traverses the provided JSON payload according to the compiled relational map of `schema_id`. Dynamically builds and executes relational SQL UPSERT paths natively. ### Core Features -* **Caching Strategy**: The Merger leverages the `Validator`'s in-memory `Database` registry to instantly resolve Foreign Key mapping graphs. It additionally utilizes the concurrent `GLOBAL_JSPG` application memory (`DashMap`) to cache statically constructed SQL `SELECT` strings used during deduplication (`lk_`) and difference tracking calculations. +* **Caching Strategy**: The Merger leverages the native `compiled_edges` permanently cached onto the Schema AST via `OnceLock` to instantly resolve Foreign Key mapping graphs natively in absolute `O(1)` time. It additionally utilizes the concurrent `GLOBAL_JSPG` application memory (`DashMap`) to cache statically constructed SQL `SELECT` strings used during deduplication (`lk_`) and difference tracking calculations. * **Deep Graph Merging**: The Merger walks arbitrary levels of deeply nested JSON schemas (e.g. tracking an `order`, its `customer`, and an array of its `lines`). It intelligently discovers the correct parent-to-child or child-to-parent Foreign Keys stored in the registry and automatically maps the UUIDs across the relationships during UPSERT. * **Prefix Foreign Key Matching**: Handles scenario where multiple relations point to the same table by using database Foreign Key constraint prefixes (`fk_`). For example, if a schema has `shipping_address` and `billing_address`, the merger resolves against `fk_shipping_address_entity` vs `fk_billing_address_entity` automatically to correctly route object properties. * **Dynamic Deduplication & Lookups**: If a nested object is provided without an `id`, the Merger utilizes Postgres `lk_` index constraints defined in the schema registry (e.g. `lk_person` mapped to `first_name` and `last_name`). It dynamically queries these unique matching constraints to discover the correct UUID to perform an UPDATE, preventing data duplication. @@ -91,7 +98,10 @@ The Merger provides an automated, high-performance graph synchronization engine ## 4. Queryer -The Queryer transforms Postgres into a pre-compiled Semantic Query Engine via the `jspg_query(schema_id text, cue jsonb)` API, designed to serve the exact shape of Punc responses directly via SQL. +The Queryer transforms Postgres into a pre-compiled Semantic Query Engine, designed to serve the exact shape of Punc responses directly via SQL. + +### API Reference +* `jspg_query(schema_id text, filters jsonb) -> jsonb`: Compiles the JSON Schema AST of `schema_id` directly into pre-planned, nested multi-JOIN SQL execution trees. Processes `filters` structurally. ### Core Features diff --git a/src/database/mod.rs b/src/database/mod.rs index 485d0db..da6a23a 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -79,7 +79,18 @@ impl Database { db.relations.insert(def.constraint.clone(), def); } } - Err(e) => println!("DATABASE RELATION PARSE FAILED: {:?}", e), + Err(e) => { + return Err(crate::drop::Drop::with_errors(vec![crate::drop::Error { + code: "DATABASE_RELATION_PARSE_FAILED".to_string(), + message: format!("Failed to parse database relation: {}", e), + details: crate::drop::ErrorDetails { + path: "".to_string(), + cause: None, + context: None, + schema: None, + }, + }])); + } } } } @@ -137,7 +148,30 @@ impl Database { } pub fn compile(&mut self) -> Result<(), crate::drop::Drop> { - self.collect_schemas(); + let mut harvested = Vec::new(); + for schema in self.schemas.values_mut() { + if let Err(msg) = schema.collect_schemas(None, &mut harvested) { + return Err(crate::drop::Drop::with_errors(vec![crate::drop::Error { + code: "SCHEMA_VALIDATION_FAILED".to_string(), + message: msg, + details: crate::drop::ErrorDetails { path: "".to_string(), cause: None, context: None, schema: None }, + }])); + } + } + self.schemas.extend(harvested); + + if let Err(msg) = self.collect_schemas() { + return Err(crate::drop::Drop::with_errors(vec![crate::drop::Error { + code: "SCHEMA_VALIDATION_FAILED".to_string(), + message: msg, + details: crate::drop::ErrorDetails { + path: "".to_string(), + cause: None, + context: None, + schema: None, + }, + }])); + } self.collect_depths(); self.collect_descendants(); @@ -150,29 +184,31 @@ impl Database { Ok(()) } - fn collect_schemas(&mut self) { + fn collect_schemas(&mut self) -> Result<(), String> { let mut to_insert = Vec::new(); // Pass 1: Extract all Schemas structurally off top level definitions into the master registry. + // Validate every node recursively via string filters natively! for type_def in self.types.values() { for mut schema in type_def.schemas.clone() { - schema.harvest(&mut to_insert); + schema.collect_schemas(None, &mut to_insert)?; } } for punc_def in self.puncs.values() { for mut schema in punc_def.schemas.clone() { - schema.harvest(&mut to_insert); + schema.collect_schemas(None, &mut to_insert)?; } } for enum_def in self.enums.values() { for mut schema in enum_def.schemas.clone() { - schema.harvest(&mut to_insert); + schema.collect_schemas(None, &mut to_insert)?; } } for (id, schema) in to_insert { self.schemas.insert(id, schema); } + Ok(()) } fn collect_depths(&mut self) { @@ -228,82 +264,6 @@ impl Database { self.descendants = descendants; } - fn resolve_relation( - &self, - parent_type: &str, - child_type: &str, - prop_name: &str, - relative_keys: Option<&Vec>, - ) -> Option<(&Relation, bool)> { - if parent_type == "entity" && child_type == "entity" { - return None; // Ignore entity <-> entity generic fallbacks, they aren't useful edges - } - - let p_def = self.types.get(parent_type)?; - let c_def = self.types.get(child_type)?; - - let mut matching_rels = Vec::new(); - let mut directions = Vec::new(); - - for rel in self.relations.values() { - let is_forward = p_def.hierarchy.contains(&rel.source_type) - && c_def.hierarchy.contains(&rel.destination_type); - let is_reverse = p_def.hierarchy.contains(&rel.destination_type) - && c_def.hierarchy.contains(&rel.source_type); - - if is_forward { - matching_rels.push(rel); - directions.push(true); - } else if is_reverse { - matching_rels.push(rel); - directions.push(false); - } - } - - if matching_rels.is_empty() { - return None; - } - - if matching_rels.len() == 1 { - return Some((matching_rels[0], directions[0])); - } - - let mut chosen_idx = 0; - let mut resolved = false; - - // Reduce ambiguity with prefix - for (i, rel) in matching_rels.iter().enumerate() { - if let Some(prefix) = &rel.prefix { - if prop_name.starts_with(prefix) - || prefix.starts_with(prop_name) - || prefix.replace("_", "") == prop_name.replace("_", "") - { - chosen_idx = i; - resolved = true; - break; - } - } - } - - // Reduce ambiguity by checking if relative payload OMITS the prefix (M:M heuristic) - if !resolved && relative_keys.is_some() { - let keys = relative_keys.unwrap(); - let mut missing_prefix_ids = Vec::new(); - for (i, rel) in matching_rels.iter().enumerate() { - if let Some(prefix) = &rel.prefix { - if !keys.contains(prefix) { - missing_prefix_ids.push(i); - } - } - } - if missing_prefix_ids.len() == 1 { - chosen_idx = missing_prefix_ids[0]; - } - } - - Some((matching_rels[chosen_idx], directions[chosen_idx])) - } - fn collect_descendants_recursively( target: &str, direct_refs: &std::collections::HashMap>, diff --git a/src/database/schema.rs b/src/database/schema.rs index 6bf1bb5..7d5ad1f 100644 --- a/src/database/schema.rs +++ b/src/database/schema.rs @@ -393,67 +393,108 @@ impl Schema { } } - pub fn harvest(&mut self, to_insert: &mut Vec<(String, Schema)>) { - if let Some(id) = &self.obj.id { - to_insert.push((id.clone(), self.clone())); + #[allow(unused_variables)] + fn validate_identifier(id: &str, field_name: &str) -> Result<(), String> { + #[cfg(not(test))] + for c in id.chars() { + if !c.is_ascii_lowercase() && !c.is_ascii_digit() && c != '_' && c != '.' { + return Err(format!("Invalid character '{}' in JSON Schema '{}' property: '{}'. Identifiers must exclusively contain [a-z0-9_.]", c, field_name, id)); + } } - self.harvest_children(|child| child.harvest(to_insert)); + Ok(()) } - pub fn harvest_children(&mut self, mut f: F) - where - F: FnMut(&mut Schema), - { + pub fn collect_schemas( + &mut self, + tracking_path: Option, + to_insert: &mut Vec<(String, Schema)>, + ) -> Result<(), String> { + if let Some(id) = &self.obj.id { + Self::validate_identifier(id, "$id")?; + to_insert.push((id.clone(), self.clone())); + } + if let Some(r#ref) = &self.obj.r#ref { + Self::validate_identifier(r#ref, "$ref")?; + } + if let Some(family) = &self.obj.family { + Self::validate_identifier(family, "$family")?; + } + + // Is this schema an inline ad-hoc composition? + // Meaning it has a tracking context, lacks an explicit $id, but extends an Entity ref with explicit properties! + if self.obj.id.is_none() && self.obj.r#ref.is_some() && self.obj.properties.is_some() { + if let Some(ref path) = tracking_path { + to_insert.push((path.clone(), self.clone())); + } + } + + // Provide the path origin to children natively, prioritizing the explicit `$id` boundary if one exists + let origin_path = self.obj.id.clone().or(tracking_path); + + self.collect_child_schemas(origin_path, to_insert)?; + Ok(()) + } + + pub fn collect_child_schemas( + &mut self, + origin_path: Option, + to_insert: &mut Vec<(String, Schema)>, + ) -> Result<(), String> { if let Some(props) = &mut self.obj.properties { - for v in props.values_mut() { + for (k, v) in props.iter_mut() { let mut inner = (**v).clone(); - f(&mut inner); + let next_path = origin_path.as_ref().map(|o| format!("{}/{}", o, k)); + inner.collect_schemas(next_path, to_insert)?; *v = Arc::new(inner); } } if let Some(pattern_props) = &mut self.obj.pattern_properties { - for v in pattern_props.values_mut() { + for (k, v) in pattern_props.iter_mut() { let mut inner = (**v).clone(); - f(&mut inner); + let next_path = origin_path.as_ref().map(|o| format!("{}/{}", o, k)); + inner.collect_schemas(next_path, to_insert)?; *v = Arc::new(inner); } } - let mut map_arr = |arr: &mut Vec>| { + let mut map_arr = |arr: &mut Vec>| -> Result<(), String> { for v in arr.iter_mut() { let mut inner = (**v).clone(); - f(&mut inner); + inner.collect_schemas(origin_path.clone(), to_insert)?; *v = Arc::new(inner); } + Ok(()) }; - if let Some(arr) = &mut self.obj.prefix_items { - map_arr(arr); - } - if let Some(arr) = &mut self.obj.all_of { - map_arr(arr); - } - if let Some(arr) = &mut self.obj.one_of { - map_arr(arr); - } + if let Some(arr) = &mut self.obj.prefix_items { map_arr(arr)?; } + if let Some(arr) = &mut self.obj.all_of { map_arr(arr)?; } + if let Some(arr) = &mut self.obj.one_of { map_arr(arr)?; } - let mut map_opt = |opt: &mut Option>| { + let mut map_opt = |opt: &mut Option>, pass_path: bool| -> Result<(), String> { if let Some(v) = opt { let mut inner = (**v).clone(); - f(&mut inner); + let next = if pass_path { origin_path.clone() } else { None }; + inner.collect_schemas(next, to_insert)?; *v = Arc::new(inner); } + Ok(()) }; - map_opt(&mut self.obj.additional_properties); - map_opt(&mut self.obj.items); - map_opt(&mut self.obj.contains); - map_opt(&mut self.obj.property_names); - map_opt(&mut self.obj.not); - map_opt(&mut self.obj.if_); - map_opt(&mut self.obj.then_); - map_opt(&mut self.obj.else_); + map_opt(&mut self.obj.additional_properties, false)?; + + // `items` absolutely must inherit the EXACT property path assigned to the Array wrapper! + // This allows nested Arrays enclosing bare Entity structs to correctly register as the boundary mapping. + map_opt(&mut self.obj.items, true)?; + + map_opt(&mut self.obj.not, false)?; + map_opt(&mut self.obj.contains, false)?; + map_opt(&mut self.obj.property_names, false)?; + map_opt(&mut self.obj.if_, false)?; + map_opt(&mut self.obj.then_, false)?; + map_opt(&mut self.obj.else_, false)?; + + Ok(()) } pub fn compile_edges( @@ -507,7 +548,7 @@ impl Schema { let keys_for_ambiguity: Vec = compiled_target_props.keys().cloned().collect(); if let Some((relation, is_forward)) = - db.resolve_relation(&p_type, &c_type, prop_name, Some(&keys_for_ambiguity)) + resolve_relation(db, &p_type, &c_type, prop_name, Some(&keys_for_ambiguity)) { schema_edges.insert( prop_name.clone(), @@ -527,6 +568,80 @@ impl Schema { } } +pub(crate) fn resolve_relation<'a>( + db: &'a crate::database::Database, + parent_type: &str, + child_type: &str, + prop_name: &str, + relative_keys: Option<&Vec>, +) -> Option<(&'a crate::database::relation::Relation, bool)> { + if parent_type == "entity" && child_type == "entity" { + return None; + } + + let p_def = db.types.get(parent_type)?; + let c_def = db.types.get(child_type)?; + + let mut matching_rels = Vec::new(); + let mut directions = Vec::new(); + + for rel in db.relations.values() { + let is_forward = p_def.hierarchy.contains(&rel.source_type) + && c_def.hierarchy.contains(&rel.destination_type); + let is_reverse = p_def.hierarchy.contains(&rel.destination_type) + && c_def.hierarchy.contains(&rel.source_type); + + if is_forward { + matching_rels.push(rel); + directions.push(true); + } else if is_reverse { + matching_rels.push(rel); + directions.push(false); + } + } + + if matching_rels.is_empty() { + return None; + } + + if matching_rels.len() == 1 { + return Some((matching_rels[0], directions[0])); + } + + let mut chosen_idx = 0; + let mut resolved = false; + + for (i, rel) in matching_rels.iter().enumerate() { + if let Some(prefix) = &rel.prefix { + if prop_name.starts_with(prefix) + || prefix.starts_with(prop_name) + || prefix.replace("_", "") == prop_name.replace("_", "") + { + chosen_idx = i; + resolved = true; + break; + } + } + } + + if !resolved && relative_keys.is_some() { + let keys = relative_keys.unwrap(); + let mut missing_prefix_ids = Vec::new(); + for (i, rel) in matching_rels.iter().enumerate() { + if let Some(prefix) = &rel.prefix { + if !keys.contains(prefix) { + missing_prefix_ids.push(i); + } + } + } + if missing_prefix_ids.len() == 1 { + chosen_idx = missing_prefix_ids[0]; + } + } + + Some((matching_rels[chosen_idx], directions[chosen_idx])) +} + impl<'de> Deserialize<'de> for Schema { fn deserialize(deserializer: D) -> Result where