Compare commits

..

7 Commits

Author SHA1 Message Date
e14f53e7d9 validator reorg 2026-02-26 19:17:13 -05:00
960a99034a version: 1.0.55 2026-02-26 15:47:49 -05:00
81388149e8 added keyword to jspg 2026-02-26 15:47:43 -05:00
b8b3f7a501 version: 1.0.54 2026-02-26 15:46:09 -05:00
bc5489b1ea added keyword to jspg 2026-02-26 15:46:01 -05:00
7b55277116 flow update 2026-02-25 13:22:27 -05:00
ed636b05a4 flow update 2026-02-24 18:00:20 -05:00
21 changed files with 1207 additions and 1258 deletions

View File

@ -9,7 +9,7 @@ It is designed to serve as the validation engine for the "Punc" architecture, wh
1. **Draft 2020-12 Compliance**: Attempt to adhere to the official JSON Schema Draft 2020-12 specification.
2. **Ultra-Fast Validation**: Compile schemas into an optimized in-memory representation for near-instant validation during high-throughput workloads.
3. **Connection-Bound Caching**: Leverage the PostgreSQL session lifecycle to maintain a per-connection schema cache, eliminating the need for repetitive parsing.
4. **Structural Inheritance**: Support object-oriented schema design via Implicit Keyword Shadowing and virtual `.family` schemas.
4. **Structural Inheritance**: Support object-oriented schema design via Implicit Keyword Shadowing and virtual `$family` references.
5. **Punc Integration**: validation is aware of the "Punc" context (request/response) and can validate `cue` objects efficiently.
## 🔌 API Reference
@ -27,7 +27,7 @@ Loads and compiles the entire schema registry into the session's memory, atomica
* **Behavior**:
* Parses all inputs into an internal schema graph.
* Resolves all internal references (`$ref`).
* Generates virtual `.family` schemas for type hierarchies.
* Generates virtual union schemas for type hierarchies referenced via `$family`.
* Compiles schemas into validators.
* **Returns**: `{"response": "success"}` or an error object.
@ -78,10 +78,10 @@ Standard JSON Schema composition (`allOf`) is additive (Intersection), meaning c
* **Composition (`allOf`)**: When using `allOf`, standard intersection rules apply. No shadowing occurs; all constraints from all branches must pass. This is used for mixins or interfaces.
### 2. Virtual Family Schemas (`.family`)
### 2. Virtual Family References (`$family`)
To support polymorphic fields (e.g., a field that accepts any "User" type), JSPG generates virtual schemas representing type hierarchies.
* **Mechanism**: When caching types, if a type defines a `hierarchy` (e.g., `["entity", "organization", "person"]`), JSPG generates a schema like `organization.family` which is a `oneOf` containing refs to all valid descendants.
* **Mechanism**: When caching types, if a type defines a `hierarchy` (e.g., `["entity", "organization", "person"]`), JSPG generates a virtual `oneOf` family containing refs to all valid descendants. These can be pointed to exclusively by using `{"$family": "organization"}`. Because `$family` is a macro-pointer that swaps in the virtual union, it **must** be used exclusively in its schema object; you cannot define other properties alongside it.
### 3. Strict by Default & Extensibility
JSPG enforces a "Secure by Default" philosophy. All schemas are treated as if `unevaluatedProperties: false` (and `unevaluatedItems: false`) is set, unless explicitly overridden.

View File

@ -33,7 +33,7 @@ fn main() {
let mut std_file = File::create(&std_dest_path).unwrap();
// Write headers
writeln!(std_file, "use jspg::util;").unwrap();
writeln!(std_file, "use jspg::validator::util;").unwrap();
// Walk tests/fixtures directly
let fixtures_path = "tests/fixtures";
@ -62,7 +62,7 @@ fn main() {
#[pg_test]
fn {}() {{
let path = format!("{{}}/tests/fixtures/{}.json", env!("CARGO_MANIFEST_DIR"));
crate::util::run_test_file_at_index(&path, {}).unwrap();
crate::validator::util::run_test_file_at_index(&path, {}).unwrap();
}}
"#,
fn_name, file_name, i

59
flow
View File

@ -15,25 +15,28 @@ CARGO_DEPENDENCIES=(cargo-pgrx==0.16.1)
GITEA_ORGANIZATION="cellular"
GITEA_REPOSITORY="jspg"
pgrx-prepare() {
pgrx-up() {
info "Initializing pgrx..."
# Explicitly point to the postgresql@${POSTGRES_VERSION} pg_config, don't rely on 'which'
local POSTGRES_CONFIG_PATH="/opt/homebrew/opt/postgresql@${POSTGRES_VERSION}/bin/pg_config"
if [ ! -x "$POSTGRES_CONFIG_PATH" ]; then
error "pg_config not found or not executable at $POSTGRES_CONFIG_PATH."
warning "Ensure postgresql@${POSTGRES_VERSION} is installed correctly via Homebrew."
return 2
abort "pg_config not found or not executable at $POSTGRES_CONFIG_PATH." 2
fi
if cargo pgrx init --pg"$POSTGRES_VERSION"="$POSTGRES_CONFIG_PATH"; then
success "pgrx initialized successfully."
else
error "Failed to initialize pgrx. Check PostgreSQL development packages are installed and $POSTGRES_CONFIG_PATH is valid."
return 2
success "pgrx initialized successfully." && return 0
fi
abort "Failed to initialize pgrx. Check PostgreSQL development packages are installed and $POSTGRES_CONFIG_PATH is valid." 2
}
pgrx-down() {
info "Taking pgrx down..."
}
build() {
local version
version=$(get-version) || return $?
@ -51,11 +54,10 @@ build() {
info "Creating tarball: ${tarball_path}"
# Set COPYFILE_DISABLE=1 to prevent macOS tar from including ._ metadata files
if COPYFILE_DISABLE=1 tar --exclude='.git*' --exclude='./target' --exclude='./package' --exclude='./flows' --exclude='./flow' -czf "${tarball_path}" .; then
success "Successfully created source tarball: ${tarball_path}"
else
error "Failed to create source tarball."
return 2
success "Successfully created source tarball: ${tarball_path}" && return 0
fi
abort "Failed to create source tarball." 2
}
install() {
@ -66,8 +68,7 @@ install() {
# Run the pgrx install command
if ! cargo pgrx install; then
error "cargo pgrx install command failed."
return 2
abort "cargo pgrx install command failed." 2
fi
success "PGRX extension v$version successfully built and installed."
@ -76,26 +77,23 @@ install() {
pg_sharedir=$("$POSTGRES_CONFIG_PATH" --sharedir)
local pg_config_status=$?
if [ $pg_config_status -ne 0 ] || [ -z "$pg_sharedir" ]; then
error "Failed to determine PostgreSQL shared directory using pg_config."
return 2
abort "Failed to determine PostgreSQL shared directory using pg_config." 2
fi
local installed_control_path="${pg_sharedir}/extension/jspg.control"
# Modify the control file
if [ ! -f "$installed_control_path" ]; then
error "Installed control file not found: '$installed_control_path'"
return 2
abort "Installed control file not found: '$installed_control_path'" 2
fi
info "Modifying control file for non-superuser access: ${installed_control_path}"
# Use sed -i '' for macOS compatibility
if sed -i '' '/^superuser = false/d' "$installed_control_path" && \
echo 'trusted = true' >> "$installed_control_path"; then
success "Control file modified successfully."
else
error "Failed to modify control file: ${installed_control_path}"
return 2
success "Control file modified successfully." && return 0
fi
abort "Failed to modify control file: ${installed_control_path}" 2
}
test() {
@ -109,26 +107,27 @@ clean() {
}
jspg-usage() {
printf "prepare\tCheck OS, Cargo, and PGRX dependencies.\n"
printf "install\tBuild and install the extension locally (after prepare).\n"
printf "reinstall\tClean, build, and install the extension locally (after prepare).\n"
printf "test-jspg\t\tRun pgrx integration tests.\n"
printf "test-validator\t\tRun validator integration tests.\n"
printf "clean\t\tRemove pgrx build artifacts.\n"
echo "up|Check OS, Cargo, and PGRX dependencies."
echo "install|Build and install the extension locally (after up)."
echo "reinstall|Clean, build, and install the extension locally (after up)."
echo "test-jspg|Run pgrx integration tests."
echo "test-validator|Run validator integration tests."
echo "clean|Remove pgrx build artifacts."
}
jspg-flow() {
case "$1" in
prepare) prepare && cargo-prepare && pgrx-prepare; return $?;;
up) up && rust-up && pgrx-up; return $?;;
down) pgrx-down && rust-down && down; return $?;;
build) build; return $?;;
install) install; return $?;;
reinstall) clean && install; return $?;;
test) test "${@:2}"; return $?;;
clean) clean; return $?;;
*) return 1 ;;
*) return 127 ;;
esac
}
register-flow "jspg-usage" "jspg-flow"
register-flow "jspg"
dispatch "$@"

2
flows

Submodule flows updated: 404da626c7...a7b0f5dc4d

79
src/entity/GEMINI.md Normal file
View File

@ -0,0 +1,79 @@
# Entity Engine (jspg)
## Overview
This document outlines the architecture for moving the complex, CPU-bound row merging (`merge_entity`) and dynamic querying (`query_entity`) functionality out of PL/pgSQL and directly into the Rust-based `jspg` extension.
By treating the `jspg` schema registry as the absolute Single Source of Truth, we can leverage Rust and the Postgres query planner (via SPI) to achieve near O(1) execution planning for deeply nested reads, complex relational writes, and partial hydration beats.
## The Problem
Historically, `agreego.merge_entity` (PL/pgSQL) handled nested writes by segmenting JSON, resolving types, searching hierarchies, and dynamically concatenating `INSERT`/`UPDATE` statements. `agreego.query_entity` was conceived to do the same for reads (handling base security, inheritance JOINs, and filtering automatically).
However, this design hits three major limitations:
1. **CPU Bound Operations**: PL/pgSQL is comparatively slow at complex string concatenation and massive JSON graph traversals.
2. **Query Planning Cache Busting**: Generating massive, dynamic SQL strings prevents Postgres from caching query plans. `EXECUTE dynamic_sql` forces the planner to re-evaluate statistics and execution paths on every function call, leading to extreme latency spikes at scale.
3. **The Hydration Beat Problem**: The Punc framework requires fetching specific UI "fragments" (e.g. just the `target` of a specific `contact` array element) to feed WebSockets. Hand-rolling CTEs for every possible sub-tree permutation to serve beats will quickly become unmaintainable.
## The Solution: Semantic Engine Database
By migrating `merge_entity` and `query_entity` to `jspg`, we turn the database into a pre-compiled Semantic Engine.
1. **Schema-to-SQL Compilation**: During the connection lifecycle (`cache_json_schemas()`), `jspg` statically analyzes the JSON Schema AST. It acts as a compiler, translating the schema layout into perfectly optimized, multi-JOIN SQL query strings for *every* node/fragment in the schema.
2. **Prepared Statements (SPI)**: `jspg` feeds these computed SQL strings into the Postgres SPI (Server Programming Interface) using `Spi::prepare()`. Postgres calculates the query execution plan *once* and caches it in memory.
3. **Instant Execution**: When a Punc needs data, `jspg` retrieves the cached PreparedStatement, securely binds binary parameters, and executes the pre-planned query instantly.
## Architecture
### 1. The `cache_json_schemas()` Expansion
The initialization function must now ingest `types` and `agreego.relation` data so the internal `Registry` holds the full Relational Graph.
During schema compilation, if a schema is associated with a database Type, it triggers the **SQL Compiler Phase**:
- It builds a table-resolution AST mapping to `JOIN` clauses based on foreign keys.
- It translates JSON schema properties to `SELECT jsonb_build_object(...)`.
- It generates static SQL for `INSERT`, `UPDATE`, and `SELECT` (including path-based fragment SELECTs).
- It calls `Spi::prepare()` to cache these plans inside the Session Context.
### 2. `agreego.query_entity` (Reads)
* **API**: `agreego.query_entity(schema_id TEXT, fragment_path TEXT, cue JSONB)`
* **Execution**:
* Rust locates the target Schema in memory.
* It uses the `fragment_path` (e.g., `/` for a full read, or `/contacts/0/target` for a hydration beat) to fetch the exact PreparedStatement.
* It binds variables (Row Level Security IDs, filtering, pagination limit/offset) parsed from the `cue`.
* SPI returns the heavily nested, pre-aggregated `JSONB` instantly.
### 3. Unified Aggregations & Computeds (Schema `query` objects)
We replace the concept of a complex string parser (PEL) with native structured JSON JSON objects using the `query` keyword.
A structured `query` block in the schema:
```json
"total": {
"type": "number",
"readOnly": true,
"query": {
"aggregate": "sum",
"source": "lines",
"field": "amount"
}
}
```
* **Frontend (Dart)**: The Go generator parses the JSON object directly and emits the native UI aggregation code (e.g. `lines.fold(...)`) for instant UI updates before the server responds.
* **Backend (jspg)**: The Rust SQL compiler natively deserializes the `query` object into an internal struct. It recognizes the `aggregate` instruction and outputs a Postgres native aggregation: `(SELECT SUM(amount) FROM agreego.invoice_line WHERE invoice_id = t1.id)` as a column in the prepared `SELECT` statement.
* **Unification**: The database-calculated value acts as the authoritative truth, synchronizing and correcting the client automatically on the resulting `beat`.
### 4. `agreego.merge_entity` (Writes)
* **API**: `agreego.merge_entity(cue JSONB)`
* **Execution**:
* Parses the incoming `cue` JSON via `serde_json` at C-like speeds.
* Recursively validates and *constructively masks* the tree against the strict schema.
* Traverses the relational graph (which is fully loaded in the `jspg` registry).
* Binds the new values directly into the cached `INSERT` or `UPDATE` SPI prepared statements for each table in the hierarchy.
* Evaluates field differences and natively uses `pg_notify` to fire atomic row-level changes for the Go Beat framework.
## Roadmap
1. **Relational Ingestion**: Update `cache_json_schemas` to pass relational metadata (`agreego.relation` rows) into the `jspg` registry cache.
2. **The SQL Compiler**: Build the AST-to-String compiler in Rust that reads properties, `$ref`s, and `$family` trees to piece together generic SQL.
3. **SPI Caching**: Integrate `Spi::prepare` into the `Validator` creation phase.
4. **Rust `merge_entity`**: Port the constructive structural extraction loop from PL/pgSQL to Rust.
5. **Rust `query_entity`**: Abstract the query runtime, mapping Punc JSON `filters` arrays to SPI-bound parameters safely.

View File

@ -2,17 +2,10 @@ use pgrx::*;
pg_module_magic!();
pub mod compiler;
pub mod drop;
pub mod formats;
pub mod validator;
pub mod registry;
mod schema;
pub mod util;
mod validator;
use crate::schema::Schema;
use serde_json::{Value, json};
use serde_json::json;
use std::sync::{Arc, RwLock};
lazy_static::lazy_static! {
@ -26,79 +19,12 @@ lazy_static::lazy_static! {
#[pg_extern(strict)]
pub fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB {
// 1. Build a new Registry LOCALLY (on stack)
let mut registry = registry::Registry::new();
// Generate Family Schemas from Types
{
let mut family_map: std::collections::HashMap<String, std::collections::HashSet<String>> =
std::collections::HashMap::new();
if let Value::Array(arr) = &types.0 {
for item in arr {
if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) {
for ancestor in hierarchy {
if let Some(anc_str) = ancestor.as_str() {
family_map
.entry(anc_str.to_string())
.or_default()
.insert(name.to_string());
}
}
}
}
}
}
for (family_name, members) in family_map {
let id = format!("{}.family", family_name);
// Object Union (for polymorphic object validation)
// This allows the schema to match ANY of the types in the family hierarchy
let object_refs: Vec<Value> = members.iter().map(|s| json!({ "$ref": s })).collect();
let schema_json = json!({
"$id": id,
"oneOf": object_refs
});
if let Ok(schema) = serde_json::from_value::<Schema>(schema_json) {
registry.add(schema);
}
}
// Helper to parse and cache a list of items
let mut cache_items = |items: JsonB| {
if let Value::Array(arr) = items.0 {
for item in arr {
// For now, we assume the item structure matches what the generator expects
// or what `json_schemas.sql` sends.
// The `Schema` struct in `schema.rs` is designed to deserialize standard JSON Schema.
// However, the input here is an array of objects that *contain* a `schemas` array.
// We need to extract those inner schemas.
if let Some(schemas_val) = item.get("schemas") {
if let Value::Array(schemas) = schemas_val {
for schema_val in schemas {
// Deserialize into our robust Schema struct to ensure validity/parsing
if let Ok(schema) = serde_json::from_value::<Schema>(schema_val.clone()) {
// Registry handles compilation
registry.add(schema);
}
}
}
}
}
}
};
cache_items(enums);
cache_items(types);
cache_items(puncs); // public/private distinction logic to come later
}
// 2. Wrap in Validator and Arc
let new_validator = validator::Validator::new(registry);
// 1 & 2. Build Registry, Families, and Wrap in Validator all in one shot
let new_validator = crate::validator::Validator::from_punc_definition(
Some(&enums.0),
Some(&types.0),
Some(&puncs.0),
);
let new_arc = Arc::new(new_validator);
// 3. ATOMIC SWAP

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
use crate::schema::Schema;
use crate::validator::schema::Schema;
use regex::Regex;
use serde_json::Value;
// use std::collections::HashMap;
@ -26,7 +26,7 @@ impl Compiler {
fn compile_formats_and_regexes(schema: &mut Schema) {
// 1. Compile Format
if let Some(format_str) = &schema.format {
if let Some(fmt) = crate::formats::FORMATS.get(format_str.as_str()) {
if let Some(fmt) = crate::validator::formats::FORMATS.get(format_str.as_str()) {
schema.compiled_format = Some(CompiledFormat::Func(fmt.func));
}
}
@ -64,13 +64,13 @@ impl Compiler {
if let Some(deps) = schema.dependencies.take() {
for (key, dep) in deps {
match dep {
crate::schema::Dependency::Props(props) => {
crate::validator::schema::Dependency::Props(props) => {
schema
.dependent_required
.get_or_insert_with(std::collections::BTreeMap::new)
.insert(key, props);
}
crate::schema::Dependency::Schema(sub_schema) => {
crate::validator::schema::Dependency::Schema(sub_schema) => {
schema
.dependent_schemas
.get_or_insert_with(std::collections::BTreeMap::new)
@ -86,7 +86,7 @@ impl Compiler {
// Compile self
if let Some(format_str) = &schema.format {
if let Some(fmt) = crate::formats::FORMATS.get(format_str.as_str()) {
if let Some(fmt) = crate::validator::formats::FORMATS.get(format_str.as_str()) {
schema.compiled_format = Some(CompiledFormat::Func(fmt.func));
}
}
@ -167,7 +167,7 @@ impl Compiler {
/// Recursively traverses the schema tree to build the local registry index.
fn compile_index(
schema: &Arc<Schema>,
registry: &mut crate::registry::Registry,
registry: &mut crate::validator::registry::Registry,
parent_base: Option<String>,
pointer: json_pointer::JsonPointer<String, Vec<String>>,
) {
@ -355,7 +355,7 @@ impl Compiler {
}
// 2. Build ID/Pointer Index
let mut registry = crate::registry::Registry::new();
let mut registry = crate::validator::registry::Registry::new();
// We need a temporary Arc to satisfy compile_index recursion
// But we are modifying root_schema.

118
src/validator/context.rs Normal file
View File

@ -0,0 +1,118 @@
use crate::validator::schema::Schema;
use crate::validator::Validator;
use crate::validator::error::ValidationError;
use crate::validator::instance::ValidationInstance;
use crate::validator::result::ValidationResult;
use std::collections::HashSet;
pub struct ValidationContext<'a, I: ValidationInstance<'a>> {
pub validator: &'a Validator,
pub root: &'a Schema,
pub schema: &'a Schema,
pub instance: I,
pub path: String,
pub depth: usize,
pub scope: Vec<String>,
pub overrides: HashSet<String>,
pub extensible: bool,
pub reporter: bool,
}
impl<'a, I: ValidationInstance<'a>> ValidationContext<'a, I> {
pub fn new(
validator: &'a Validator,
root: &'a Schema,
schema: &'a Schema,
instance: I,
scope: Vec<String>,
overrides: HashSet<String>,
extensible: bool,
reporter: bool,
) -> Self {
let effective_extensible = schema.extensible.unwrap_or(extensible);
Self {
validator,
root,
schema,
instance,
path: String::new(),
depth: 0,
scope,
overrides,
extensible: effective_extensible,
reporter,
}
}
pub fn derive(
&self,
schema: &'a Schema,
instance: I,
path: &str,
scope: Vec<String>,
overrides: HashSet<String>,
extensible: bool,
reporter: bool,
) -> Self {
let effective_extensible = schema.extensible.unwrap_or(extensible);
Self {
validator: self.validator,
root: self.root,
schema,
instance,
path: path.to_string(),
depth: self.depth + 1,
scope,
overrides,
extensible: effective_extensible,
reporter,
}
}
pub fn derive_for_schema(&self, schema: &'a Schema, reporter: bool) -> Self {
self.derive(
schema,
self.instance,
&self.path,
self.scope.clone(),
HashSet::new(),
self.extensible,
reporter,
)
}
pub fn validate(&self) -> Result<ValidationResult, ValidationError> {
let mut effective_scope = self.scope.clone();
if let Some(id) = &self.schema.obj.id {
let current_base = self.scope.last().map(|s| s.as_str()).unwrap_or("");
let mut new_base = id.clone().to_string();
if !current_base.is_empty() {
if let Ok(base_url) = url::Url::parse(current_base) {
if let Ok(joined) = base_url.join(id) {
new_base = joined.to_string();
}
}
}
effective_scope.push(new_base);
let shadow = ValidationContext {
validator: self.validator,
root: self.root,
schema: self.schema,
instance: self.instance,
path: self.path.clone(),
depth: self.depth,
scope: effective_scope,
overrides: self.overrides.clone(),
extensible: self.extensible,
reporter: self.reporter,
};
return shadow.validate_scoped();
}
self.validate_scoped()
}
}

6
src/validator/error.rs Normal file
View File

@ -0,0 +1,6 @@
#[derive(Debug, Clone, serde::Serialize)]
pub struct ValidationError {
pub code: String,
pub message: String,
pub path: String,
}

98
src/validator/instance.rs Normal file
View File

@ -0,0 +1,98 @@
use serde_json::Value;
use std::collections::HashSet;
use std::ptr::NonNull;
pub trait ValidationInstance<'a>: Copy + Clone {
fn as_value(&self) -> &'a Value;
fn child_at_key(&self, key: &str) -> Option<Self>;
fn child_at_index(&self, idx: usize) -> Option<Self>;
fn prune_object(&self, _keys: &HashSet<String>) {}
fn prune_array(&self, _indices: &HashSet<usize>) {}
}
#[derive(Clone, Copy)]
pub struct ReadOnlyInstance<'a>(pub &'a Value);
impl<'a> ValidationInstance<'a> for ReadOnlyInstance<'a> {
fn as_value(&self) -> &'a Value {
self.0
}
fn child_at_key(&self, key: &str) -> Option<Self> {
self.0.get(key).map(ReadOnlyInstance)
}
fn child_at_index(&self, idx: usize) -> Option<Self> {
self.0.get(idx).map(ReadOnlyInstance)
}
}
#[derive(Clone, Copy)]
pub struct MutableInstance {
ptr: NonNull<Value>,
}
impl MutableInstance {
pub fn new(val: &mut Value) -> Self {
Self {
ptr: NonNull::from(val),
}
}
}
impl<'a> ValidationInstance<'a> for MutableInstance {
fn as_value(&self) -> &'a Value {
unsafe { self.ptr.as_ref() }
}
fn child_at_key(&self, key: &str) -> Option<Self> {
unsafe {
if let Some(obj) = self.ptr.as_ref().as_object() {
if obj.contains_key(key) {
let parent_mut = &mut *self.ptr.as_ptr();
if let Some(child_val) = parent_mut.get_mut(key) {
return Some(MutableInstance::new(child_val));
}
}
}
None
}
}
fn child_at_index(&self, idx: usize) -> Option<Self> {
unsafe {
if let Some(arr) = self.ptr.as_ref().as_array() {
if idx < arr.len() {
let parent_mut = &mut *self.ptr.as_ptr();
if let Some(child_val) = parent_mut.get_mut(idx) {
return Some(MutableInstance::new(child_val));
}
}
}
None
}
}
fn prune_object(&self, keys: &HashSet<String>) {
unsafe {
let val_mut = &mut *self.ptr.as_ptr();
if let Some(obj) = val_mut.as_object_mut() {
obj.retain(|k, _| keys.contains(k));
}
}
}
fn prune_array(&self, indices: &HashSet<usize>) {
unsafe {
let val_mut = &mut *self.ptr.as_ptr();
if let Some(arr) = val_mut.as_array_mut() {
let mut i = 0;
arr.retain(|_| {
let keep = indices.contains(&i);
i += 1;
keep
});
}
}
}
}

269
src/validator/mod.rs Normal file
View File

@ -0,0 +1,269 @@
pub mod compiler;
pub mod context;
pub mod error;
pub mod formats;
pub mod instance;
pub mod registry;
pub mod result;
pub mod rules;
pub mod schema;
pub mod util;
pub use context::ValidationContext;
pub use error::ValidationError;
pub use instance::{MutableInstance, ReadOnlyInstance};
pub use result::ValidationResult;
use crate::validator::registry::Registry;
use crate::validator::schema::Schema;
use serde_json::Value;
use std::collections::HashSet;
use std::sync::Arc;
pub enum ResolvedRef<'a> {
Local(&'a Schema),
Global(&'a Schema, &'a Schema),
}
pub struct Validator {
pub registry: Registry,
pub families: std::collections::HashMap<String, Arc<Schema>>,
}
impl Validator {
pub fn from_punc_definition(
enums: Option<&Value>,
types: Option<&Value>,
puncs: Option<&Value>,
) -> Self {
let mut registry = Registry::new();
let mut families = std::collections::HashMap::new();
let mut family_map: std::collections::HashMap<String, std::collections::HashSet<String>> =
std::collections::HashMap::new();
if let Some(Value::Array(arr)) = types {
for item in arr {
if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) {
for ancestor in hierarchy {
if let Some(anc_str) = ancestor.as_str() {
family_map
.entry(anc_str.to_string())
.or_default()
.insert(name.to_string());
}
}
}
}
}
}
for (family_name, members) in family_map {
let object_refs: Vec<Value> = members
.iter()
.map(|s| serde_json::json!({ "$ref": s }))
.collect();
let schema_json = serde_json::json!({
"oneOf": object_refs
});
if let Ok(schema) = serde_json::from_value::<Schema>(schema_json) {
let compiled = crate::validator::compiler::Compiler::compile(schema, None);
families.insert(family_name, compiled);
}
}
let mut cache_items = |items_val: Option<&Value>| {
if let Some(Value::Array(arr)) = items_val {
for item in arr {
if let Some(Value::Array(schemas)) = item.get("schemas") {
for schema_val in schemas {
if let Ok(schema) = serde_json::from_value::<Schema>(schema_val.clone()) {
registry.add(schema);
}
}
}
}
}
};
cache_items(enums);
cache_items(types);
cache_items(puncs);
Self { registry, families }
}
pub fn get_schema_ids(&self) -> Vec<String> {
self.registry.schemas.keys().cloned().collect()
}
pub fn check_type(t: &str, val: &Value) -> bool {
if let Value::String(s) = val {
if s.is_empty() {
return true;
}
}
match t {
"null" => val.is_null(),
"boolean" => val.is_boolean(),
"string" => val.is_string(),
"number" => val.is_number(),
"integer" => crate::validator::util::is_integer(val),
"object" => val.is_object(),
"array" => val.is_array(),
_ => true,
}
}
pub fn resolve_ref<'a>(
&'a self,
root: &'a Schema,
ref_string: &str,
scope: &str,
) -> Option<(ResolvedRef<'a>, String)> {
if ref_string.starts_with('#') {
if let Some(indexrs) = &root.obj.compiled_registry {
if let Some(s) = indexrs.schemas.get(ref_string) {
return Some((ResolvedRef::Local(s.as_ref()), ref_string.to_string()));
}
}
}
if let Ok(base) = url::Url::parse(scope) {
if let Ok(joined) = base.join(ref_string) {
let joined_str = joined.to_string();
if let Some(indexrs) = &root.obj.compiled_registry {
if let Some(s) = indexrs.schemas.get(&joined_str) {
return Some((ResolvedRef::Local(s.as_ref() as &Schema), joined_str));
}
}
if let Ok(decoded) = percent_encoding::percent_decode_str(&joined_str).decode_utf8() {
let decoded_str = decoded.to_string();
if decoded_str != joined_str {
if let Some(indexrs) = &root.obj.compiled_registry {
if let Some(s) = indexrs.schemas.get(&decoded_str) {
return Some((ResolvedRef::Local(s.as_ref() as &Schema), decoded_str));
}
}
}
}
if let Some(s) = self.registry.schemas.get(&joined_str) {
return Some((ResolvedRef::Global(s.as_ref(), s.as_ref()), joined_str));
}
}
} else {
if ref_string.starts_with('#') {
let joined_str = format!("{}{}", scope, ref_string);
if let Some(indexrs) = &root.obj.compiled_registry {
if let Some(s) = indexrs.schemas.get(&joined_str) {
return Some((ResolvedRef::Local(s.as_ref() as &Schema), joined_str));
}
}
if let Ok(decoded) = percent_encoding::percent_decode_str(&joined_str).decode_utf8() {
let decoded_str = decoded.to_string();
if decoded_str != joined_str {
if let Some(indexrs) = &root.obj.compiled_registry {
if let Some(s) = indexrs.schemas.get(&decoded_str) {
return Some((ResolvedRef::Local(s.as_ref() as &Schema), decoded_str));
}
}
}
}
if let Some(s) = self.registry.schemas.get(&joined_str) {
return Some((ResolvedRef::Global(s.as_ref(), s.as_ref()), joined_str));
}
}
}
if let Ok(parsed) = url::Url::parse(ref_string) {
let absolute = parsed.to_string();
if let Some(indexrs) = &root.obj.compiled_registry {
if let Some(s) = indexrs.schemas.get(&absolute) {
return Some((ResolvedRef::Local(s.as_ref()), absolute));
}
}
let resource_base = if let Some((base, _)) = absolute.split_once('#') {
base
} else {
&absolute
};
if let Some(compiled) = self.registry.schemas.get(resource_base) {
if let Some(indexrs) = &compiled.obj.compiled_registry {
if let Some(s) = indexrs.schemas.get(&absolute) {
return Some((ResolvedRef::Global(compiled.as_ref(), s.as_ref()), absolute));
}
}
}
}
if let Some(compiled) = self.registry.schemas.get(ref_string) {
return Some((
ResolvedRef::Global(compiled.as_ref(), compiled.as_ref()),
ref_string.to_string(),
));
}
None
}
pub fn validate(
&self,
schema_id: &str,
instance: &Value,
) -> Result<ValidationResult, ValidationError> {
if let Some(schema) = self.registry.schemas.get(schema_id) {
let ctx = ValidationContext::new(
self,
schema,
schema,
ReadOnlyInstance(instance),
vec![],
HashSet::new(),
false,
false,
);
ctx.validate()
} else {
Err(ValidationError {
code: "SCHEMA_NOT_FOUND".to_string(),
message: format!("Schema {} not found", schema_id),
path: "".to_string(),
})
}
}
pub fn mask(
&self,
schema_id: &str,
instance: &mut Value,
) -> Result<ValidationResult, ValidationError> {
if let Some(schema) = self.registry.schemas.get(schema_id) {
let ctx = ValidationContext::new(
self,
schema,
schema,
MutableInstance::new(instance),
vec![],
HashSet::new(),
false,
false,
);
let res = ctx.validate()?;
Ok(res)
} else {
Err(ValidationError {
code: "SCHEMA_NOT_FOUND".to_string(),
message: format!("Schema {} not found", schema_id),
path: "".to_string(),
})
}
}
}

View File

@ -1,4 +1,4 @@
use crate::schema::Schema;
use crate::validator::schema::Schema;
use lazy_static::lazy_static;
use std::collections::HashMap;
use std::sync::RwLock;
@ -21,13 +21,13 @@ impl Registry {
}
}
pub fn add(&mut self, schema: crate::schema::Schema) {
pub fn add(&mut self, schema: crate::validator::schema::Schema) {
let id = schema
.obj
.id
.clone()
.expect("Schema must have an $id to be registered");
let compiled = crate::compiler::Compiler::compile(schema, Some(id.clone()));
let compiled = crate::validator::compiler::Compiler::compile(schema, Some(id.clone()));
self.schemas.insert(id, compiled);
}

27
src/validator/result.rs Normal file
View File

@ -0,0 +1,27 @@
use crate::validator::error::ValidationError;
use std::collections::HashSet;
#[derive(Debug, Default, Clone, serde::Serialize)]
pub struct ValidationResult {
pub errors: Vec<ValidationError>,
#[serde(skip)]
pub evaluated_keys: HashSet<String>,
#[serde(skip)]
pub evaluated_indices: HashSet<usize>,
}
impl ValidationResult {
pub fn new() -> Self {
Self::default()
}
pub fn merge(&mut self, other: ValidationResult) {
self.errors.extend(other.errors);
self.evaluated_keys.extend(other.evaluated_keys);
self.evaluated_indices.extend(other.evaluated_indices);
}
pub fn is_valid(&self) -> bool {
self.errors.is_empty()
}
}

File diff suppressed because it is too large Load Diff

View File

@ -35,6 +35,9 @@ pub struct SchemaObject {
pub pattern_properties: Option<BTreeMap<String, Arc<Schema>>>,
#[serde(rename = "additionalProperties")]
pub additional_properties: Option<Arc<Schema>>,
#[serde(rename = "$family")]
pub family: Option<String>,
pub required: Option<Vec<String>>,
// dependencies can be schema dependencies or property dependencies
@ -92,7 +95,7 @@ pub struct SchemaObject {
#[serde(
default,
rename = "const",
deserialize_with = "crate::util::deserialize_some"
deserialize_with = "crate::validator::util::deserialize_some"
)]
pub const_: Option<Value>,
@ -135,13 +138,13 @@ pub struct SchemaObject {
// Compiled Fields (Hidden from JSON/Serde)
#[serde(skip)]
pub compiled_format: Option<crate::compiler::CompiledFormat>,
pub compiled_format: Option<crate::validator::compiler::CompiledFormat>,
#[serde(skip)]
pub compiled_pattern: Option<crate::compiler::CompiledRegex>,
pub compiled_pattern: Option<crate::validator::compiler::CompiledRegex>,
#[serde(skip)]
pub compiled_pattern_properties: Option<Vec<(crate::compiler::CompiledRegex, Arc<Schema>)>>,
pub compiled_pattern_properties: Option<Vec<(crate::validator::compiler::CompiledRegex, Arc<Schema>)>>,
#[serde(skip)]
pub compiled_registry: Option<Arc<crate::registry::Registry>>,
pub compiled_registry: Option<Arc<crate::validator::registry::Registry>>,
}
#[derive(Debug, Clone, Serialize)]

View File

@ -25,7 +25,7 @@ struct TestCase {
expected: Option<serde_json::Value>,
}
// use crate::registry::REGISTRY; // No longer used directly for tests!
// use crate::validator::registry::REGISTRY; // No longer used directly for tests!
use crate::validator::Validator;
use serde_json::Value;
@ -50,88 +50,26 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
let group = &suite[index];
let mut failures = Vec::<String>::new();
// Create Local Registry for this test group
let mut registry = crate::registry::Registry::new();
// Helper to register items with 'schemas'
let register_schemas = |registry: &mut crate::registry::Registry, items_val: Option<&Value>| {
if let Some(val) = items_val {
if let Value::Array(arr) = val {
for item in arr {
if let Some(schemas_val) = item.get("schemas") {
if let Value::Array(schemas) = schemas_val {
for schema_val in schemas {
if let Ok(schema) =
serde_json::from_value::<crate::schema::Schema>(schema_val.clone())
{
registry.add(schema);
}
}
}
}
}
}
}
};
// 1. Register Family Schemas if 'types' is present
if let Some(types_val) = &group.types {
if let Value::Array(arr) = types_val {
let mut family_map: std::collections::HashMap<String, std::collections::HashSet<String>> =
std::collections::HashMap::new();
for item in arr {
if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) {
for ancestor in hierarchy {
if let Some(anc_str) = ancestor.as_str() {
family_map
.entry(anc_str.to_string())
.or_default()
.insert(name.to_string());
}
}
}
}
}
for (family_name, members) in family_map {
let id = format!("{}.family", family_name);
let object_refs: Vec<Value> = members
.iter()
.map(|s| serde_json::json!({ "$ref": s }))
.collect();
let schema_json = serde_json::json!({
"$id": id,
"oneOf": object_refs
});
if let Ok(schema) = serde_json::from_value::<crate::schema::Schema>(schema_json) {
registry.add(schema);
}
}
}
}
// 2. Register items directly
register_schemas(&mut registry, group.enums.as_ref());
register_schemas(&mut registry, group.types.as_ref());
register_schemas(&mut registry, group.puncs.as_ref());
// Create Validator Instance and parse enums, types, and puncs automatically
let mut validator = Validator::from_punc_definition(
group.enums.as_ref(),
group.types.as_ref(),
group.puncs.as_ref(),
);
// 3. Register root 'schemas' if present (generic test support)
// Some tests use a raw 'schema' or 'schemas' field at the group level
if let Some(schema_val) = &group.schema {
match serde_json::from_value::<crate::schema::Schema>(schema_val.clone()) {
match serde_json::from_value::<crate::validator::schema::Schema>(schema_val.clone()) {
Ok(mut schema) => {
let id_clone = schema.obj.id.clone();
if id_clone.is_some() {
registry.add(schema);
validator.registry.add(schema);
} else {
// Fallback ID if none provided in schema
let id = format!("test:{}:{}", path, index);
schema.obj.id = Some(id);
registry.add(schema);
validator.registry.add(schema);
}
}
Err(e) => {
@ -143,9 +81,6 @@ pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
}
}
// Create Validator Instance (Takes ownership of registry)
let validator = Validator::new(registry);
// 4. Run Tests
for (_test_index, test) in group.tests.iter().enumerate() {
let mut schema_id = test.schema_id.clone();
@ -251,84 +186,18 @@ pub fn run_test_file(path: &str) -> Result<(), String> {
let mut failures = Vec::<String>::new();
for (group_index, group) in suite.into_iter().enumerate() {
// Create Isolated Registry for this test group
let mut registry = crate::registry::Registry::new();
// Create Validator Instance and parse enums, types, and puncs automatically
let mut validator = Validator::from_punc_definition(
group.enums.as_ref(),
group.types.as_ref(),
group.puncs.as_ref(),
);
// Helper to register items with 'schemas'
let register_schemas = |registry: &mut crate::registry::Registry, items_val: Option<Value>| {
if let Some(val) = items_val {
if let Value::Array(arr) = val {
for item in arr {
if let Some(schemas_val) = item.get("schemas") {
if let Value::Array(schemas) = schemas_val {
for schema_val in schemas {
if let Ok(schema) =
serde_json::from_value::<crate::schema::Schema>(schema_val.clone())
{
registry.add(schema);
}
}
}
}
}
}
}
};
// 1. Register Family Schemas if 'types' is present
if let Some(types_val) = &group.types {
if let Value::Array(arr) = types_val {
let mut family_map: std::collections::HashMap<String, std::collections::HashSet<String>> =
std::collections::HashMap::new();
for item in arr {
if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
// Default hierarchy contains self if not specified?
// Usually hierarchy is explicit in these tests.
if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) {
for ancestor in hierarchy {
if let Some(anc_str) = ancestor.as_str() {
family_map
.entry(anc_str.to_string())
.or_default()
.insert(name.to_string());
}
}
}
}
}
for (family_name, members) in family_map {
let id = format!("{}.family", family_name);
let object_refs: Vec<Value> = members
.into_iter()
.map(|s| serde_json::json!({ "$ref": s }))
.collect();
let schema_json = serde_json::json!({
"$id": id,
"oneOf": object_refs
});
if let Ok(schema) = serde_json::from_value::<crate::schema::Schema>(schema_json) {
registry.add(schema);
}
}
}
}
// Register 'types', 'enums', and 'puncs' if present (JSPG style)
register_schemas(&mut registry, group.types);
register_schemas(&mut registry, group.enums);
register_schemas(&mut registry, group.puncs);
// Register main 'schema' if present (Standard style)
// Ensure ID is a valid URI to avoid Url::parse errors in Compiler
let unique_id = format!("test:{}:{}", path, group_index);
// Register main 'schema' if present (Standard style)
if let Some(ref schema_val) = group.schema {
let mut schema: crate::schema::Schema =
let mut schema: crate::validator::schema::Schema =
serde_json::from_value(schema_val.clone()).expect("Failed to parse test schema");
// If schema has no ID, assign unique_id and use add() or manual insert?
@ -336,12 +205,9 @@ pub fn run_test_file(path: &str) -> Result<(), String> {
if schema.obj.id.is_none() {
schema.obj.id = Some(unique_id.clone());
}
registry.add(schema);
validator.registry.add(schema);
}
// Create Instance (Takes Ownership)
let validator = Validator::new(registry);
for test in group.tests {
// Use explicit schema_id from test, or default to unique_id
let schema_id = test.schema_id.as_deref().unwrap_or(&unique_id).to_string();

View File

@ -1,4 +1,4 @@
use jspg::util;
use jspg::validator::util;
#[test]
fn test_anchor_0() {

View File

@ -1067,7 +1067,7 @@
"schemas": [
{
"$id": "polymorphic_org_punc.request",
"$ref": "organization.family"
"$family": "organization"
}
]
},
@ -1080,6 +1080,21 @@
"$ref": "organization"
}
]
},
{
"name": "invalid_family_punc",
"public": false,
"schemas": [
{
"$id": "invalid_family_punc.request",
"$family": "organization",
"properties": {
"extra": {
"type": "string"
}
}
}
]
}
],
"tests": [
@ -1240,6 +1255,23 @@
"path": "/first_name"
}
]
},
{
"description": "invalid schema due to family exclusivity violation",
"schema_id": "invalid_family_punc.request",
"data": {
"id": "org-2",
"type": "organization",
"name": "Strict Corp",
"extra": "value"
},
"valid": false,
"expect_errors": [
{
"code": "INVALID_SCHEMA",
"path": ""
}
]
}
]
},

View File

@ -1 +1 @@
1.0.53
1.0.55