From 6a275e1d9053c8cac610b386a509186cb3393772 Mon Sep 17 00:00:00 2001 From: Alex Groleau Date: Fri, 13 Mar 2026 01:17:27 -0400 Subject: [PATCH] stems fixes and tests --- GEMINI.md | 7 +- fixtures/stems.json | 175 ++++++++++++++++++++++++++++++++++++++ src/database/mod.rs | 96 ++++++++++++++++----- src/entity/GEMINI.md | 79 ----------------- src/queryer/compiler.rs | 4 +- src/tests/fixtures.rs | 6 ++ src/tests/runner.rs | 10 +++ src/tests/types/case.rs | 41 +++++++-- src/tests/types/expect.rs | 1 + 9 files changed, 304 insertions(+), 115 deletions(-) create mode 100644 fixtures/stems.json delete mode 100644 src/entity/GEMINI.md diff --git a/GEMINI.md b/GEMINI.md index 2c9a21a..334994a 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -109,6 +109,8 @@ A `Stem` is **not a JSON Pointer** or a physical path string (like `/properties/ Because `pg_notify` (Beats) fire rigidly from physical Postgres tables (e.g. `{"type": "phone_number"}`), the Go Framework only ever needs to know: "Does the schema `with_contacts.person` contain the `phone_number` Entity anywhere inside its tree?" * **Initialization:** During startup (`jspg_stems()`), the database crawls all Schemas and maps out every physical Entity Type it references. It builds a flat dictionary of `Schema ID -> [Entity Types]` (e.g. `with_contacts.person -> ["person", "contact", "phone_number", "email_address"]`). +* **Identifier Prioritization**: When determining if a nested object boundary is an Entity, JSPG natively prioritizes defined `$id` tags over `$ref` inheritance pointers to prevent polymorphic boundaries from devolving into their generic base classes. +* **Cyclical Deduplication**: Because Punc relationships often reference back on themselves via deeply nested classes, the Stem Engine applies intelligent path deduplication. If the active `current_path` already ends with the target entity string, it traverses the inheritance properties without appending the entity to the stem path again, eliminating infinite powerset loops. * **Relationship Path Squashing:** When calculating nested string paths structurally to discover these boundaries, JSPG intentionally **omits** properties natively named `target` or `source` if they belong to a native database `relationship` table override. This ensures paths like `phone_numbers/contact/target` correctly register their beat resolution pattern as `phone_numbers/contact/phone_number`. * **The Go Router**: The Golang Punc framework uses this exact mapping to register WebSocket Beat frequencies exclusively on the Entity types discovered. * **The Queryer Execution**: When the Go framework asks JSPG to hydrate a partial `phone_number` stem for the `with_contacts.person` schema, instead of jumping through string paths, the SQL Compiler simply reaches into the Schema's AST using the `phone_number` Type string, pulls out exactly that entity's mapping rules, and returns a fully correlated `SELECT` block! This natively handles nested array properties injected via `oneOf` or array references efficiently bypassing runtime powerset expansion. @@ -127,7 +129,8 @@ To solve this, JSPG introduces the `DatabaseExecutor` trait inside `src/database ### Universal Test Harness (`src/tests/`) JSPG abandons the standard `cargo pgrx test` model in favor of native OS testing for a >1000x speed increase (`~0.05s` execution). -1. **JSON Fixtures**: All core interactions are defined abstractly as JSON arrays in `fixtures/`. Each file contains suites of `TestCase` objects with an `action` flag (`validate`, `merge`, `query`). +1. **JSON Fixtures**: All core interactions are defined abstractly as JSON arrays in `fixtures/`. Each file contains suites of `TestCase` objects with an `action` flag (`compile`, `validate`, `merge`, `query`). 2. **`build.rs` Generator**: The build script traverses the JSON fixtures, extracts their structural identities, and generates standard `#[test]` blocks into `src/tests/fixtures.rs`. 3. **Modular Test Dispatcher**: The `src/tests/types/` module deserializes the abstract JSON test payloads into `Suite`, `Case`, and `Expect` data structures. -4. **Unit Context Execution**: When `cargo test` executes, the `Runner` feeds the JSON payloads directly into `case.execute(db)`. Because the tests run natively inside the module via `#cfg(test)`, the Rust compiler globally erases `pgrx` C-linkage, instantiates the `MockExecutor`, and allows for pure structural evaluation of complex database logic completely in memory. + * The `compile` action natively asserts the exact output shape of `jspg_stems`, allowing structural and relationship mapping logic to be tested purely through JSON without writing brute-force manual tests in Rust. +4. **Unit Context Execution**: When `cargo test` executes, the runner iterates the JSON payloads. Because the tests run natively inside the module via `#cfg(test)`, the Rust compiler globally erases `pgrx` C-linkage, instantiates the `MockExecutor`, and allows for pure structural evaluation of complex database logic completely in memory in parallel. diff --git a/fixtures/stems.json b/fixtures/stems.json new file mode 100644 index 0000000..e89cfff --- /dev/null +++ b/fixtures/stems.json @@ -0,0 +1,175 @@ +[ + { + "description": "Stem Engine Unit Tests", + "database": { + "puncs": [], + "enums": [], + "relations": [ + { + "id": "rel1", + "type": "relation", + "constraint": "fk_contact_entity", + "source_type": "contact", + "source_columns": ["entity_id"], + "destination_type": "person", + "destination_columns": ["id"], + "prefix": null + }, + { + "id": "rel2", + "type": "relation", + "constraint": "fk_relationship_target", + "source_type": "relationship", + "source_columns": ["target_id", "target_type"], + "destination_type": "entity", + "destination_columns": ["id", "type"], + "prefix": "target" + } + ], + "types": [ + { + "name": "entity", + "hierarchy": ["entity"], + "schemas": [{ + "$id": "entity", + "type": "object", + "properties": {} + }] + }, + { + "name": "person", + "hierarchy": ["person", "entity"], + "schemas": [{ + "$id": "person", + "$ref": "entity", + "properties": {} + }] + }, + { + "name": "email_address", + "hierarchy": ["email_address", "entity"], + "schemas": [{ + "$id": "email_address", + "$ref": "entity", + "properties": {} + }] + }, + { + "name": "phone_number", + "hierarchy": ["phone_number", "entity"], + "schemas": [{ + "$id": "phone_number", + "$ref": "entity", + "properties": {} + }] + }, + { + "name": "relationship", + "relationship": true, + "hierarchy": ["relationship", "entity"], + "schemas": [{ + "$id": "relationship", + "$ref": "entity", + "properties": {} + }] + }, + { + "name": "contact", + "relationship": true, + "hierarchy": ["contact", "relationship", "entity"], + "schemas": [{ + "$id": "contact", + "$ref": "relationship", + "properties": { + "target": { + "oneOf": [ + { "$ref": "phone_number" }, + { "$ref": "email_address" } + ] + } + } + }] + }, + { + "name": "save_person", + "schemas": [{ + "$id": "save_person.response", + "$ref": "person", + "properties": { + "contacts": { + "type": "array", + "items": { "$ref": "contact" } + } + } + }] + } + ] + }, + "tests": [ + { + "description": "correctly squashes deep oneOf refs through array paths", + "action": "compile", + "expect": { + "success": true, + "stems": { + "save_person.response": { + "": { + "type": "person" + }, + "contacts/contact": { + "type": "contact", + "relation": "contacts_id" + }, + "contacts/contact/email_address": { + "type": "email_address", + "relation": "target_id" + }, + "contacts/contact/phone_number": { + "type": "phone_number", + "relation": "target_id" + } + }, + "contact": { + "": { + "type": "contact" + }, + "email_address": { + "type": "email_address", + "relation": "target_id" + }, + "phone_number": { + "type": "phone_number", + "relation": "target_id" + } + }, + "person": { + "": { + "type": "person" + } + }, + "email_address": { + "": { + "type": "email_address" + } + }, + "phone_number": { + "": { + "type": "phone_number" + } + }, + "relationship": { + "": { + "type": "relationship" + } + }, + "entity": { + "": { + "type": "entity" + } + } + } + } + } + ] + } +] diff --git a/src/database/mod.rs b/src/database/mod.rs index 3dbb099..42693e1 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -265,6 +265,7 @@ impl Database { String::from(""), None, None, + true, &mut inner_map, &mut errors, ); @@ -290,21 +291,16 @@ impl Database { mut current_path: String, parent_type: Option, property_name: Option, + is_root: bool, inner_map: &mut HashMap>, errors: &mut Vec, ) { let mut is_entity = false; let mut entity_type = String::new(); - let mut examine_id = None; - if let Some(ref r) = schema.obj.r#ref { - examine_id = Some(r.clone()); - } else if let Some(ref id) = schema.obj.id { - examine_id = Some(id.clone()); - } - - if let Some(target) = examine_id { - let parts: Vec<&str> = target.split('.').collect(); + // First check if the Schema's $id is a native Database Type + if let Some(ref id) = schema.obj.id { + let parts: Vec<&str> = id.split('.').collect(); if let Some(last_seg) = parts.last() { if db.types.contains_key(*last_seg) { is_entity = true; @@ -313,6 +309,20 @@ impl Database { } } + // If not found via $id, check the $ref pointer + // This allows ad-hoc schemas (like `save_person.response`) to successfully adopt the Type of what they $ref + if !is_entity { + if let Some(ref r) = schema.obj.r#ref { + let parts: Vec<&str> = r.split('.').collect(); + if let Some(last_seg) = parts.last() { + if db.types.contains_key(*last_seg) { + is_entity = true; + entity_type = last_seg.to_string(); + } + } + } + } + let mut relation_col = None; if is_entity { if let (Some(pt), Some(prop)) = (&parent_type, &property_name) { @@ -340,22 +350,37 @@ impl Database { schema: Arc::new(schema.clone()), }; - let mut branch_path = current_path.clone(); - if !current_path.is_empty() { - branch_path = format!("{}/{}", current_path, entity_type); - } + let mut branch_path = if is_root { + String::new() + } else if current_path.is_empty() { + entity_type.clone() + } else { + format!("{}/{}", current_path, entity_type) + }; - if inner_map.contains_key(&branch_path) { - errors.push(crate::drop::Error { - code: "STEM_COLLISION".to_string(), - message: format!("The stem path `{}` resolves to multiple Entity boundaries. This usually occurs during un-wrapped $family or oneOf polymorphic schemas where multiple Entities are directly assigned to the same property. To fix this, encapsulate the polymorphic branch.", branch_path), - details: crate::drop::ErrorDetails { - path: root_schema_id.to_string(), - }, - }); - } + // DEDUPLICATION: If we just recursed into the EXACT same entity type definition, + // do not append again and do not re-register the stem. + let already_registered = + if current_path == entity_type || current_path.ends_with(&format!("/{}", entity_type)) { + branch_path = current_path.clone(); + true + } else { + false + }; - inner_map.insert(branch_path.clone(), Arc::new(stem)); + if !already_registered { + if inner_map.contains_key(&branch_path) { + errors.push(crate::drop::Error { + code: "STEM_COLLISION".to_string(), + message: format!("The stem path `{}` resolves to multiple Entity boundaries. This usually occurs during un-wrapped $family or oneOf polymorphic schemas where multiple Entities are directly assigned to the same property. To fix this, encapsulate the polymorphic branch.", branch_path), + details: crate::drop::ErrorDetails { + path: root_schema_id.to_string(), + }, + }); + } + + inner_map.insert(branch_path.clone(), Arc::new(stem)); + } // Update current_path for structural children current_path = branch_path; @@ -381,6 +406,7 @@ impl Database { current_path.clone(), next_parent.clone(), Some(k.clone()), + false, inner_map, errors, ); @@ -403,6 +429,7 @@ impl Database { next_path, next_parent.clone(), Some(k.clone()), + false, inner_map, errors, ); @@ -418,11 +445,32 @@ impl Database { current_path.clone(), next_parent.clone(), property_name.clone(), + false, // Arrays themselves aren't polymorphic branches, their items might be inner_map, errors, ); } + // Follow external reference if we didn't just crawl local properties + if schema.obj.properties.is_none() && schema.obj.items.is_none() && schema.obj.one_of.is_none() + { + if let Some(ref r) = schema.obj.r#ref { + if let Some(target_schema) = db.schemas.get(r) { + Self::discover_stems( + db, + root_schema_id, + target_schema, + current_path.clone(), + next_parent.clone(), + property_name.clone(), + false, + inner_map, + errors, + ); + } + } + } + // Polymorphism branch if let Some(arr) = &schema.obj.one_of { for v in arr { @@ -433,6 +481,7 @@ impl Database { current_path.clone(), next_parent.clone(), property_name.clone(), + false, inner_map, errors, ); @@ -447,6 +496,7 @@ impl Database { current_path.clone(), next_parent.clone(), property_name.clone(), + false, inner_map, errors, ); diff --git a/src/entity/GEMINI.md b/src/entity/GEMINI.md deleted file mode 100644 index 10cab5b..0000000 --- a/src/entity/GEMINI.md +++ /dev/null @@ -1,79 +0,0 @@ -# Entity Engine (jspg) - -## Overview - -This document outlines the architecture for moving the complex, CPU-bound row merging (`merge_entity`) and dynamic querying (`query_entity`) functionality out of PL/pgSQL and directly into the Rust-based `jspg` extension. - -By treating the `jspg` schema registry as the absolute Single Source of Truth, we can leverage Rust and the Postgres query planner (via SPI) to achieve near O(1) execution planning for deeply nested reads, complex relational writes, and partial hydration beats. - -## The Problem - -Historically, `agreego.merge_entity` (PL/pgSQL) handled nested writes by segmenting JSON, resolving types, searching hierarchies, and dynamically concatenating `INSERT`/`UPDATE` statements. `agreego.query_entity` was conceived to do the same for reads (handling base security, inheritance JOINs, and filtering automatically). - -However, this design hits three major limitations: -1. **CPU Bound Operations**: PL/pgSQL is comparatively slow at complex string concatenation and massive JSON graph traversals. -2. **Query Planning Cache Busting**: Generating massive, dynamic SQL strings prevents Postgres from caching query plans. `EXECUTE dynamic_sql` forces the planner to re-evaluate statistics and execution paths on every function call, leading to extreme latency spikes at scale. -3. **The Hydration Beat Problem**: The Punc framework requires fetching specific UI "fragments" (e.g. just the `target` of a specific `contact` array element) to feed WebSockets. Hand-rolling CTEs for every possible sub-tree permutation to serve beats will quickly become unmaintainable. - -## The Solution: Semantic Engine Database - -By migrating `merge_entity` and `query_entity` to `jspg`, we turn the database into a pre-compiled Semantic Engine. - -1. **Schema-to-SQL Compilation**: During the connection lifecycle (`cache_json_schemas()`), `jspg` statically analyzes the JSON Schema AST. It acts as a compiler, translating the schema layout into perfectly optimized, multi-JOIN SQL query strings for *every* node/fragment in the schema. -2. **Prepared Statements (SPI)**: `jspg` feeds these computed SQL strings into the Postgres SPI (Server Programming Interface) using `Spi::prepare()`. Postgres calculates the query execution plan *once* and caches it in memory. -3. **Instant Execution**: When a Punc needs data, `jspg` retrieves the cached PreparedStatement, securely binds binary parameters, and executes the pre-planned query instantly. - -## Architecture - -### 1. The `cache_json_schemas()` Expansion -The initialization function must now ingest `types` and `agreego.relation` data so the internal `Registry` holds the full Relational Graph. - -During schema compilation, if a schema is associated with a database Type, it triggers the **SQL Compiler Phase**: -- It builds a table-resolution AST mapping to `JOIN` clauses based on foreign keys. -- It translates JSON schema properties to `SELECT jsonb_build_object(...)`. -- It generates static SQL for `INSERT`, `UPDATE`, and `SELECT` (including path-based fragment SELECTs). -- It calls `Spi::prepare()` to cache these plans inside the Session Context. - -### 2. `agreego.query_entity` (Reads) -* **API**: `agreego.query_entity(schema_id TEXT, fragment_path TEXT, cue JSONB)` -* **Execution**: - * Rust locates the target Schema in memory. - * It uses the `fragment_path` (e.g., `/` for a full read, or `/contacts/0/target` for a hydration beat) to fetch the exact PreparedStatement. - * It binds variables (Row Level Security IDs, filtering, pagination limit/offset) parsed from the `cue`. - * SPI returns the heavily nested, pre-aggregated `JSONB` instantly. - -### 3. Unified Aggregations & Computeds (Schema `query` objects) -We replace the concept of a complex string parser (PEL) with native structured JSON JSON objects using the `query` keyword. - -A structured `query` block in the schema: -```json -"total": { - "type": "number", - "readOnly": true, - "query": { - "aggregate": "sum", - "source": "lines", - "field": "amount" - } -} -``` -* **Frontend (Dart)**: The Go generator parses the JSON object directly and emits the native UI aggregation code (e.g. `lines.fold(...)`) for instant UI updates before the server responds. -* **Backend (jspg)**: The Rust SQL compiler natively deserializes the `query` object into an internal struct. It recognizes the `aggregate` instruction and outputs a Postgres native aggregation: `(SELECT SUM(amount) FROM agreego.invoice_line WHERE invoice_id = t1.id)` as a column in the prepared `SELECT` statement. -* **Unification**: The database-calculated value acts as the authoritative truth, synchronizing and correcting the client automatically on the resulting `beat`. - -### 4. `agreego.merge_entity` (Writes) -* **API**: `agreego.merge_entity(cue JSONB)` -* **Execution**: - * Parses the incoming `cue` JSON via `serde_json` at C-like speeds. - * Recursively validates and *constructively masks* the tree against the strict schema. - * Traverses the relational graph (which is fully loaded in the `jspg` registry). - * Binds the new values directly into the cached `INSERT` or `UPDATE` SPI prepared statements for each table in the hierarchy. - * Evaluates field differences and natively uses `pg_notify` to fire atomic row-level changes for the Go Beat framework. - -## Roadmap - -1. **Relational Ingestion**: Update `cache_json_schemas` to pass relational metadata (`agreego.relation` rows) into the `jspg` registry cache. -2. **The SQL Compiler**: Build the AST-to-String compiler in Rust that reads properties, `$ref`s, and `$family` trees to piece together generic SQL. -3. **SPI Caching**: Integrate `Spi::prepare` into the `Validator` creation phase. -4. **Rust `merge_entity`**: Port the constructive structural extraction loop from PL/pgSQL to Rust. -5. **Rust `query_entity`**: Abstract the query runtime, mapping Punc JSON `filters` arrays to SPI-bound parameters safely. diff --git a/src/queryer/compiler.rs b/src/queryer/compiler.rs index 1fa32ed..58096cc 100644 --- a/src/queryer/compiler.rs +++ b/src/queryer/compiler.rs @@ -205,7 +205,7 @@ impl SqlCompiler { let local_ctx = format!("{}_{}", parent_alias, prop_name.unwrap_or("obj")); // 1. Build FROM clauses and table aliases - let (mut table_aliases, from_clauses) = self.build_hierarchy_from_clauses(type_def, &local_ctx); + let (table_aliases, from_clauses) = self.build_hierarchy_from_clauses(type_def, &local_ctx); // 2. Map properties and build jsonb_build_object args let select_args = self.map_properties_to_aliases( @@ -225,7 +225,7 @@ impl SqlCompiler { }; // 3. Build WHERE clauses - let mut where_clauses = self.build_filter_where_clauses( + let where_clauses = self.build_filter_where_clauses( schema, type_def, &table_aliases, diff --git a/src/tests/fixtures.rs b/src/tests/fixtures.rs index 7d54c11..90b4628 100644 --- a/src/tests/fixtures.rs +++ b/src/tests/fixtures.rs @@ -3443,6 +3443,12 @@ fn test_if_then_else_13_1() { crate::tests::runner::run_test_case(&path, 13, 1).unwrap(); } +#[test] +fn test_stems_0_0() { + let path = format!("{}/fixtures/stems.json", env!("CARGO_MANIFEST_DIR")); + crate::tests::runner::run_test_case(&path, 0, 0).unwrap(); +} + #[test] fn test_empty_string_0_0() { let path = format!("{}/fixtures/emptyString.json", env!("CARGO_MANIFEST_DIR")); diff --git a/src/tests/runner.rs b/src/tests/runner.rs index 0c85e99..b38b043 100644 --- a/src/tests/runner.rs +++ b/src/tests/runner.rs @@ -97,6 +97,16 @@ pub fn run_test_case(path: &str, suite_idx: usize, case_idx: usize) -> Result<() // 4. Run Tests match test.action.as_str() { + "compile" => { + let result = test.run_compile(db.clone()); + if let Err(e) = result { + println!("TEST COMPILE ERROR FOR '{}': {}", test.description, e); + failures.push(format!( + "[{}] Compile Test '{}' failed. Error: {}", + group.description, test.description, e + )); + } + } "validate" => { let result = test.run_validate(db.clone()); if let Err(e) = result { diff --git a/src/tests/types/case.rs b/src/tests/types/case.rs index 019e16e..7e728bf 100644 --- a/src/tests/types/case.rs +++ b/src/tests/types/case.rs @@ -38,16 +38,39 @@ fn default_action() -> String { } impl TestCase { - pub fn execute(&self, db: Arc) -> Result<(), String> { - match self.action.as_str() { - "validate" => self.run_validate(db), - "merge" => self.run_merge(db), - "query" => self.run_query(db), - _ => Err(format!( - "Unknown action '{}' for test '{}'", - self.action, self.description - )), + pub fn run_compile(&self, db: Arc) -> Result<(), String> { + let expected_success = self.expect.as_ref().map(|e| e.success).unwrap_or(false); + + // We assume db has already been setup and compiled successfully by runner.rs's `jspg_setup` + // We just need to check if there are compilation errors vs expected success + let got_success = true; // Setup ensures success unless setup fails, which runner handles + + if expected_success != got_success { + return Err(format!( + "Expected success: {}, Got: {}", + expected_success, got_success + )); } + + // Assert stems + if let Some(expect) = &self.expect { + if let Some(expected_stems) = &expect.stems { + // Convert the Db stems (HashMap>>) to matching JSON shape + let db_stems_json = serde_json::to_value(&db.stems).unwrap(); + let expect_stems_json = serde_json::to_value(expected_stems).unwrap(); + + if db_stems_json != expect_stems_json { + let expected_pretty = serde_json::to_string_pretty(&expect_stems_json).unwrap(); + let got_pretty = serde_json::to_string_pretty(&db_stems_json).unwrap(); + return Err(format!( + "Stem validation failed.\nExpected:\n{}\n\nGot:\n{}", + expected_pretty, got_pretty + )); + } + } + } + + Ok(()) } pub fn run_validate(&self, db: Arc) -> Result<(), String> { diff --git a/src/tests/types/expect.rs b/src/tests/types/expect.rs index 8b7f5a3..68e7b38 100644 --- a/src/tests/types/expect.rs +++ b/src/tests/types/expect.rs @@ -14,6 +14,7 @@ pub struct ExpectBlock { pub success: bool, pub result: Option, pub errors: Option>, + pub stems: Option>>, #[serde(default)] pub sql: Option>, }