jspg progress

2026-02-17 17:41:54 -05:00
parent 6e06b6fdc2
commit 32ed463df8
188 changed files with 36654 additions and 15058 deletions
--- a/src/compiler.rs
+++ b/src/compiler.rs
@ -0,0 +1,395 @@
+use crate::schema::Schema;
+use regex::Regex;
+use serde_json::Value;
+use std::collections::HashMap;
+use std::error::Error;
+use std::sync::Arc;
+
+/// Represents a compiled format validator
+#[derive(Debug, Clone)]
+pub enum CompiledFormat {
+  /// A simple function pointer validator
+  Func(fn(&Value) -> Result<(), Box<dyn Error + Send + Sync>>),
+  /// A regex-based validator
+  Regex(Regex),
+}
+
+/// A fully compiled schema with a root node and a pre-calculated index map.
+/// This allows O(1) lookup of any anchor or $id within the schema tree.
+#[derive(Debug, Clone)]
+pub struct CompiledSchema {
+  pub root: Arc<Schema>,
+  pub index: HashMap<String, Arc<Schema>>,
+}
+
+/// A wrapper for compiled regex patterns
+#[derive(Debug, Clone)]
+pub struct CompiledRegex(pub Regex);
+
+/// The Compiler is responsible for pre-calculating high-cost schema operations
+pub struct Compiler;
+
+impl Compiler {
+  /// Internal: Compiles formats and regexes in-place
+  fn compile_formats_and_regexes(schema: &mut Schema) {
+    // 1. Compile Format
+    if let Some(format_str) = &schema.format {
+      if let Some(fmt) = crate::formats::FORMATS.get(format_str.as_str()) {
+        schema.compiled_format = Some(CompiledFormat::Func(fmt.func));
+      }
+    }
+
+    // 2. Compile Pattern (regex)
+    if let Some(pattern_str) = &schema.pattern {
+      if let Ok(re) = Regex::new(pattern_str) {
+        schema.compiled_pattern = Some(CompiledRegex(re));
+      }
+    }
+
+    // 2.5 Compile Pattern Properties
+    if let Some(pp) = &schema.pattern_properties {
+      let mut compiled_pp = Vec::new();
+      for (pattern, sub_schema) in pp {
+        if let Ok(re) = Regex::new(pattern) {
+          compiled_pp.push((CompiledRegex(re), sub_schema.clone()));
+        } else {
+          eprintln!(
+            "Invalid patternProperty regex in schema (compile time): {}",
+            pattern
+          );
+        }
+      }
+      if !compiled_pp.is_empty() {
+        schema.compiled_pattern_properties = Some(compiled_pp);
+      }
+    }
+
+    // 3. Recurse
+    Self::compile_recursive(schema);
+  }
+
+  fn normalize_dependencies(schema: &mut Schema) {
+    if let Some(deps) = schema.dependencies.take() {
+      for (key, dep) in deps {
+        match dep {
+          crate::schema::Dependency::Props(props) => {
+            schema
+              .dependent_required
+              .get_or_insert_with(std::collections::BTreeMap::new)
+              .insert(key, props);
+          }
+          crate::schema::Dependency::Schema(sub_schema) => {
+            schema
+              .dependent_schemas
+              .get_or_insert_with(std::collections::BTreeMap::new)
+              .insert(key, sub_schema);
+          }
+        }
+      }
+    }
+  }
+
+  fn compile_recursive(schema: &mut Schema) {
+    Self::normalize_dependencies(schema);
+
+    // Compile self
+    if let Some(format_str) = &schema.format {
+      if let Some(fmt) = crate::formats::FORMATS.get(format_str.as_str()) {
+        schema.compiled_format = Some(CompiledFormat::Func(fmt.func));
+      }
+    }
+    if let Some(pattern_str) = &schema.pattern {
+      if let Ok(re) = Regex::new(pattern_str) {
+        schema.compiled_pattern = Some(CompiledRegex(re));
+      }
+    }
+
+    // Recurse
+
+    if let Some(defs) = &mut schema.definitions {
+      for s in defs.values_mut() {
+        Self::compile_recursive(Arc::make_mut(s));
+      }
+    }
+    if let Some(defs) = &mut schema.defs {
+      for s in defs.values_mut() {
+        Self::compile_recursive(Arc::make_mut(s));
+      }
+    }
+    if let Some(props) = &mut schema.properties {
+      for s in props.values_mut() {
+        Self::compile_recursive(Arc::make_mut(s));
+      }
+    }
+
+    // ... Recurse logic ...
+    if let Some(items) = &mut schema.items {
+      Self::compile_recursive(Arc::make_mut(items));
+    }
+    if let Some(prefix_items) = &mut schema.prefix_items {
+      for s in prefix_items {
+        Self::compile_recursive(Arc::make_mut(s));
+      }
+    }
+    if let Some(not) = &mut schema.not {
+      Self::compile_recursive(Arc::make_mut(not));
+    }
+    if let Some(all_of) = &mut schema.all_of {
+      for s in all_of {
+        Self::compile_recursive(Arc::make_mut(s));
+      }
+    }
+    if let Some(any_of) = &mut schema.any_of {
+      for s in any_of {
+        Self::compile_recursive(Arc::make_mut(s));
+      }
+    }
+    if let Some(one_of) = &mut schema.one_of {
+      for s in one_of {
+        Self::compile_recursive(Arc::make_mut(s));
+      }
+    }
+    if let Some(s) = &mut schema.if_ {
+      Self::compile_recursive(Arc::make_mut(s));
+    }
+    if let Some(s) = &mut schema.then_ {
+      Self::compile_recursive(Arc::make_mut(s));
+    }
+    if let Some(s) = &mut schema.else_ {
+      Self::compile_recursive(Arc::make_mut(s));
+    }
+
+    if let Some(ds) = &mut schema.dependent_schemas {
+      for s in ds.values_mut() {
+        Self::compile_recursive(Arc::make_mut(s));
+      }
+    }
+    if let Some(pn) = &mut schema.property_names {
+      Self::compile_recursive(Arc::make_mut(pn));
+    }
+  }
+
+  /// Recursively traverses the schema tree to build a map of all internal Anchors ($id) and JSON Pointers.
+  fn compile_index(
+    schema: &Arc<Schema>,
+    index: &mut HashMap<String, Arc<Schema>>,
+    parent_base: Option<String>,
+    pointer: json_pointer::JsonPointer<String, Vec<String>>,
+  ) {
+    // 1. Index using Parent Base (Path from Parent)
+    if let Some(base) = &parent_base {
+      // We use the pointer's string representation (e.g., "/definitions/foo")
+      // and append it to the base.
+      let fragment = pointer.to_string();
+      let ptr_uri = if fragment.is_empty() {
+        base.clone()
+      } else {
+        format!("{}#{}", base, fragment)
+      };
+      index.insert(ptr_uri, schema.clone());
+    }
+
+    // 2. Determine Current Scope... (unchanged logic, just use pointer)
+    let mut current_base = parent_base.clone();
+    let mut child_pointer = pointer.clone();
+
+    if let Some(id) = &schema.obj.id {
+      // ... resolve ID logic ...
+      let mut new_base = None;
+      if let Ok(_) = url::Url::parse(id) {
+        new_base = Some(id.clone());
+      } else if let Some(base) = &current_base {
+        if let Ok(base_url) = url::Url::parse(base) {
+          if let Ok(joined) = base_url.join(id) {
+            new_base = Some(joined.to_string());
+          }
+        }
+      } else {
+        new_base = Some(id.clone());
+      }
+
+      if let Some(base) = new_base {
+        index.insert(base.clone(), schema.clone());
+        current_base = Some(base);
+        child_pointer = json_pointer::JsonPointer::new(vec![]); // Reset
+      }
+    }
+
+    // 3. Index by Anchor (unchanged)
+    if let Some(anchor) = &schema.obj.anchor {
+      if let Some(base) = &current_base {
+        let anchor_uri = format!("{}#{}", base, anchor);
+        index.insert(anchor_uri, schema.clone());
+      }
+    }
+    // Index by Dynamic Anchor
+    if let Some(d_anchor) = &schema.obj.dynamic_anchor {
+      if let Some(base) = &current_base {
+        let anchor_uri = format!("{}#{}", base, d_anchor);
+        index.insert(anchor_uri.clone(), schema.clone());
+        println!("Indexed Dynamic Anchor: {}", anchor_uri);
+      }
+    }
+
+    // 3. Index by Anchor
+    if let Some(anchor) = &schema.obj.anchor {
+      if let Some(base) = &current_base {
+        let anchor_uri = format!("{}#{}", base, anchor);
+        index.insert(anchor_uri.clone(), schema.clone());
+        println!("Indexed Anchor: {}", anchor_uri);
+      }
+    }
+
+    // ... (Const/Enum indexing skipped for brevity, relies on string)
+
+    // 4. Recurse
+    if let Some(defs) = schema.defs.as_ref().or(schema.definitions.as_ref()) {
+      let segment = if schema.defs.is_some() {
+        "$defs"
+      } else {
+        "definitions"
+      };
+      for (key, sub_schema) in defs {
+        let mut sub = child_pointer.clone();
+        sub.push(segment.to_string());
+        // Decode key to avoid double encoding by JsonPointer
+        let decoded_key = percent_encoding::percent_decode_str(key).decode_utf8_lossy();
+        sub.push(decoded_key.to_string());
+        Self::compile_index(sub_schema, index, current_base.clone(), sub);
+      }
+    }
+
+    if let Some(props) = &schema.properties {
+      for (key, sub_schema) in props {
+        let mut sub = child_pointer.clone();
+        sub.push("properties".to_string());
+        sub.push(key.to_string());
+        Self::compile_index(sub_schema, index, current_base.clone(), sub);
+      }
+    }
+
+    if let Some(items) = &schema.items {
+      let mut sub = child_pointer.clone();
+      sub.push("items".to_string());
+      Self::compile_index(items, index, current_base.clone(), sub);
+    }
+
+    if let Some(prefix_items) = &schema.prefix_items {
+      for (i, sub_schema) in prefix_items.iter().enumerate() {
+        let mut sub = child_pointer.clone();
+        sub.push("prefixItems".to_string());
+        sub.push(i.to_string());
+        Self::compile_index(sub_schema, index, current_base.clone(), sub);
+      }
+    }
+
+    if let Some(all_of) = &schema.all_of {
+      for (i, sub_schema) in all_of.iter().enumerate() {
+        let mut sub = child_pointer.clone();
+        sub.push("allOf".to_string());
+        sub.push(i.to_string());
+        Self::compile_index(sub_schema, index, current_base.clone(), sub);
+      }
+    }
+    if let Some(any_of) = &schema.any_of {
+      for (i, sub_schema) in any_of.iter().enumerate() {
+        let mut sub = child_pointer.clone();
+        sub.push("anyOf".to_string());
+        sub.push(i.to_string());
+        Self::compile_index(sub_schema, index, current_base.clone(), sub);
+      }
+    }
+    if let Some(one_of) = &schema.one_of {
+      for (i, sub_schema) in one_of.iter().enumerate() {
+        let mut sub = child_pointer.clone();
+        sub.push("oneOf".to_string());
+        sub.push(i.to_string());
+        Self::compile_index(sub_schema, index, current_base.clone(), sub);
+      }
+    }
+
+    if let Some(not) = &schema.not {
+      let mut sub = child_pointer.clone();
+      sub.push("not".to_string());
+      Self::compile_index(not, index, current_base.clone(), sub);
+    }
+    if let Some(if_) = &schema.if_ {
+      let mut sub = child_pointer.clone();
+      sub.push("if".to_string());
+      Self::compile_index(if_, index, current_base.clone(), sub);
+    }
+    if let Some(then_) = &schema.then_ {
+      let mut sub = child_pointer.clone();
+      sub.push("then".to_string());
+      Self::compile_index(then_, index, current_base.clone(), sub);
+    }
+    if let Some(else_) = &schema.else_ {
+      let mut sub = child_pointer.clone();
+      sub.push("else".to_string());
+      Self::compile_index(else_, index, current_base.clone(), sub);
+    }
+    if let Some(deps) = &schema.dependent_schemas {
+      for (key, sub_schema) in deps {
+        let mut sub = child_pointer.clone();
+        sub.push("dependentSchemas".to_string());
+        sub.push(key.to_string());
+        Self::compile_index(sub_schema, index, current_base.clone(), sub);
+      }
+    }
+    if let Some(pp) = &schema.pattern_properties {
+      for (key, sub_schema) in pp {
+        let mut sub = child_pointer.clone();
+        sub.push("patternProperties".to_string());
+        sub.push(key.to_string());
+        Self::compile_index(sub_schema, index, current_base.clone(), sub);
+      }
+    }
+    if let Some(contains) = &schema.contains {
+      let mut sub = child_pointer.clone();
+      sub.push("contains".to_string());
+      Self::compile_index(contains, index, current_base.clone(), sub);
+    }
+    if let Some(property_names) = &schema.property_names {
+      let mut sub = child_pointer.clone();
+      sub.push("propertyNames".to_string());
+      Self::compile_index(property_names, index, current_base.clone(), sub);
+    }
+  }
+
+  /// Resolves a format string to a CompiledFormat (future optimization)
+  pub fn compile_format(_format: &str) -> Option<CompiledFormat> {
+    None
+  }
+
+  pub fn compile(mut root_schema: Schema, root_id: Option<String>) -> CompiledSchema {
+    // 1. Compile in-place (formats/regexes)
+    Self::compile_formats_and_regexes(&mut root_schema);
+
+    // Apply root_id override if schema ID is missing
+    if let Some(ref rid) = root_id {
+      if root_schema.obj.id.is_none() {
+        root_schema.obj.id = Some(rid.clone());
+      }
+    }
+
+    // 2. Wrap in Arc
+    let root = Arc::new(root_schema);
+    let mut index = HashMap::new();
+
+    // 3. Build ID/Pointer Index
+    // Default base_uri to "" so that pointers like "#/foo" are indexed even if no root ID exists
+    Self::compile_index(
+      &root,
+      &mut index,
+      root_id.clone().or(Some("".to_string())),
+      json_pointer::JsonPointer::new(vec![]),
+    );
+
+    // Also ensure root id is indexed if present
+    if let Some(rid) = root_id {
+      index.insert(rid, root.clone());
+    }
+
+    CompiledSchema { root, index }
+  }
+}
--- a/src/drop.rs
+++ b/src/drop.rs
@ -0,0 +1,61 @@
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct Drop {
+    // We don't need id, frequency, etc. for the validation result specifically,
+    // as they are added by the SQL wrapper. We just need to conform to the structure.
+    // The user said "Validator::validate always needs to return this drop type".
+    // So we should match it as closely as possible.
+    
+    #[serde(rename = "type")]
+    pub type_: String, // "drop"
+    
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub response: Option<Value>,
+    
+    #[serde(default)]
+    pub errors: Vec<Error>,
+}
+
+impl Drop {
+    pub fn new() -> Self {
+        Self {
+            type_: "drop".to_string(),
+            response: None,
+            errors: vec![],
+        }
+    }
+
+    pub fn success() -> Self {
+        Self {
+            type_: "drop".to_string(),
+            response: Some(serde_json::json!({ "result": "success" })), // Or appropriate success response
+            errors: vec![],
+        }
+    }
+
+    pub fn with_errors(errors: Vec<Error>) -> Self {
+        Self {
+            type_: "drop".to_string(),
+            response: None,
+            errors,
+        }
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct Error {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub punc: Option<String>,
+    pub code: String,
+    pub message: String,
+    pub details: ErrorDetails,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ErrorDetails {
+    pub path: String,
+    // Extensions can be added here (package, cause, etc)
+    // For now, validator only provides path
+}
--- a/src/formats.rs
+++ b/src/formats.rs
@ -0,0 +1,875 @@
+use std::{
+    collections::HashMap,
+    error::Error,
+    net::{Ipv4Addr, Ipv6Addr},
+};
+
+use lazy_static::lazy_static;
+use percent_encoding::percent_decode_str;
+use serde_json::Value;
+use url::Url;
+
+// use crate::ecma; // Assuming ecma is not yet available, stubbing regex for now
+
+/// Defines format for `format` keyword.
+#[derive(Clone, Copy)]
+pub struct Format {
+    /// Name of the format
+    pub name: &'static str,
+
+    /// validates given value.
+    pub func: fn(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>>, // Ensure thread safety if needed
+}
+
+lazy_static! {
+    pub(crate) static ref FORMATS: HashMap<&'static str, Format> = {
+        let mut m = HashMap::<&'static str, Format>::new();
+        // Helper to register formats
+        let mut register = |name, func| m.insert(name, Format { name, func });
+        
+        // register("regex", validate_regex); // Stubbed
+        register("ipv4", validate_ipv4);
+        register("ipv6", validate_ipv6);
+        register("hostname", validate_hostname);
+        register("idn-hostname", validate_idn_hostname);
+        register("email", validate_email);
+        register("idn-email", validate_idn_email);
+        register("date", validate_date);
+        register("time", validate_time);
+        register("date-time", validate_date_time);
+        register("duration", validate_duration);
+        register("period", validate_period);
+        register("json-pointer", validate_json_pointer);
+        register("relative-json-pointer", validate_relative_json_pointer);
+        register("uuid", validate_uuid);
+        register("uri", validate_uri);
+        register("iri", validate_iri);
+        register("uri-reference", validate_uri_reference);
+        register("iri-reference", validate_iri_reference);
+        register("uri-template", validate_uri_template);
+        m
+    };
+}
+
+/*
+fn validate_regex(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    // ecma::convert(s).map(|_| ())
+    Ok(())
+}
+*/
+
+fn validate_ipv4(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    s.parse::<Ipv4Addr>()?;
+    Ok(())
+}
+
+fn validate_ipv6(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    s.parse::<Ipv6Addr>()?;
+    Ok(())
+}
+
+fn validate_date(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_date(s)?;
+    Ok(())
+}
+
+fn matches_char(s: &str, index: usize, ch: char) -> bool {
+    s.is_char_boundary(index) && s[index..].starts_with(ch)
+}
+
+// see https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
+fn check_date(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // yyyy-mm-dd
+    if s.len() != 10 {
+        Err("must be 10 characters long")?;
+    }
+    if !matches_char(s, 4, '-') || !matches_char(s, 7, '-') {
+        Err("missing hyphen in correct place")?;
+    }
+
+    let mut ymd = s.splitn(3, '-').filter_map(|t| t.parse::<usize>().ok());
+    let (Some(y), Some(m), Some(d)) = (ymd.next(), ymd.next(), ymd.next()) else {
+        Err("non-positive year/month/day")?
+    };
+
+    if !matches!(m, 1..=12) {
+        Err(format!("{m} months in year"))?;
+    }
+    if !matches!(d, 1..=31) {
+        Err(format!("{d} days in month"))?;
+    }
+
+    match m {
+        2 => {
+            let mut feb_days = 28;
+            if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
+                feb_days += 1; // leap year
+            };
+            if d > feb_days {
+                Err(format!("february has {feb_days} days only"))?;
+            }
+        }
+        4 | 6 | 9 | 11 => {
+            if d > 30 {
+                Err("month has 30 days only")?;
+            }
+        }
+        _ => {}
+    }
+    Ok(())
+}
+
+fn validate_time(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_time(s)
+}
+
+fn check_time(mut str: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // min: hh:mm:ssZ
+    if str.len() < 9 {
+        Err("less than 9 characters long")?
+    }
+    if !matches_char(str, 2, ':') || !matches_char(str, 5, ':') {
+        Err("missing colon in correct place")?
+    }
+
+    // parse hh:mm:ss
+    if !str.is_char_boundary(8) {
+        Err("contains non-ascii char")?
+    }
+    let mut hms = (str[..8])
+        .splitn(3, ':')
+        .filter_map(|t| t.parse::<usize>().ok());
+    let (Some(mut h), Some(mut m), Some(s)) = (hms.next(), hms.next(), hms.next()) else {
+        Err("non-positive hour/min/sec")?
+    };
+    if h > 23 || m > 59 || s > 60 {
+        Err("hour/min/sec out of range")?
+    }
+    str = &str[8..];
+
+    // parse sec-frac if present
+    if let Some(rem) = str.strip_prefix('.') {
+        let n_digits = rem.chars().take_while(char::is_ascii_digit).count();
+        if n_digits == 0 {
+            Err("no digits in second fraction")?;
+        }
+        str = &rem[n_digits..];
+    }
+
+    if str != "z" && str != "Z" {
+        // parse time-numoffset
+        if str.len() != 6 {
+            Err("offset must be 6 characters long")?;
+        }
+        let sign: isize = match str.chars().next() {
+            Some('+') => -1,
+            Some('-') => 1,
+            _ => return Err("offset must begin with plus/minus")?,
+        };
+        str = &str[1..];
+        if !matches_char(str, 2, ':') {
+            Err("missing colon in offset at correct place")?
+        }
+
+        let mut zhm = str.splitn(2, ':').filter_map(|t| t.parse::<usize>().ok());
+        let (Some(zh), Some(zm)) = (zhm.next(), zhm.next()) else {
+            Err("non-positive hour/min in offset")?
+        };
+        if zh > 23 || zm > 59 {
+            Err("hour/min in offset out of range")?
+        }
+
+        // apply timezone
+        let mut hm = (h * 60 + m) as isize + sign * (zh * 60 + zm) as isize;
+        if hm < 0 {
+            hm += 24 * 60;
+            debug_assert!(hm >= 0);
+        }
+        let hm = hm as usize;
+        (h, m) = (hm / 60, hm % 60);
+    }
+
+    // check leap second
+    if !(s < 60 || (h == 23 && m == 59)) {
+        Err("invalid leap second")?
+    }
+    Ok(())
+}
+
+fn validate_date_time(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_date_time(s)
+}
+
+fn check_date_time(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // min: yyyy-mm-ddThh:mm:ssZ
+    if s.len() < 20 {
+        Err("less than 20 characters long")?;
+    }
+    if !s.is_char_boundary(10) || !s[10..].starts_with(['t', 'T']) {
+        Err("11th character must be t or T")?;
+    }
+    if let Err(e) = check_date(&s[..10]) {
+        Err(format!("invalid date element: {e}"))?;
+    }
+    if let Err(e) = check_time(&s[11..]) {
+        Err(format!("invalid time element: {e}"))?;
+    }
+    Ok(())
+}
+
+fn validate_duration(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_duration(s)?;
+    Ok(())
+}
+
+// see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
+fn check_duration(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // must start with 'P'
+    let Some(s) = s.strip_prefix('P') else {
+        Err("must start with P")?
+    };
+    if s.is_empty() {
+        Err("nothing after P")?
+    }
+
+    // dur-week
+    if let Some(s) = s.strip_suffix('W') {
+        if s.is_empty() {
+            Err("no number in week")?
+        }
+        if !s.chars().all(|c| c.is_ascii_digit()) {
+            Err("invalid week")?
+        }
+        return Ok(());
+    }
+
+    static UNITS: [&str; 2] = ["YMD", "HMS"];
+    for (i, s) in s.split('T').enumerate() {
+        let mut s = s;
+        if i != 0 && s.is_empty() {
+            Err("no time elements")?
+        }
+        let Some(mut units) = UNITS.get(i).cloned() else {
+            Err("more than one T")?
+        };
+        while !s.is_empty() {
+            let digit_count = s.chars().take_while(char::is_ascii_digit).count();
+            if digit_count == 0 {
+                Err("missing number")?
+            }
+            s = &s[digit_count..];
+            let Some(unit) = s.chars().next() else {
+                Err("missing unit")?
+            };
+            let Some(j) = units.find(unit) else {
+                if UNITS[i].contains(unit) {
+                    Err(format!("unit {unit} out of order"))?
+                }
+                Err(format!("invalid unit {unit}"))?
+            };
+            units = &units[j + 1..];
+            s = &s[1..];
+        }
+    }
+
+    Ok(())
+}
+
+// see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
+fn validate_period(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+
+    let Some(slash) = s.find('/') else {
+        Err("missing slash")?
+    };
+
+    let (start, end) = (&s[..slash], &s[slash + 1..]);
+    if start.starts_with('P') {
+        if let Err(e) = check_duration(start) {
+            Err(format!("invalid start duration: {e}"))?
+        }
+        if let Err(e) = check_date_time(end) {
+            Err(format!("invalid end date-time: {e}"))?
+        }
+    } else {
+        if let Err(e) = check_date_time(start) {
+            Err(format!("invalid start date-time: {e}"))?
+        }
+        if end.starts_with('P') {
+            if let Err(e) = check_duration(end) {
+                Err(format!("invalid end duration: {e}"))?;
+            }
+        } else if let Err(e) = check_date_time(end) {
+            Err(format!("invalid end date-time: {e}"))?;
+        }
+    }
+    Ok(())
+}
+
+fn validate_hostname(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_hostname(s)?;
+    Ok(())
+}
+
+// see https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
+fn check_hostname(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // entire hostname (including the delimiting dots but not a trailing dot) has a maximum of 253 ASCII characters
+
+    if s.len() > 253 {
+        Err("more than 253 characters long")?
+    }
+
+    // Hostnames are composed of series of labels concatenated with dots, as are all domain names
+    for label in s.split('.') {
+        // Each label must be from 1 to 63 characters long
+        if !matches!(label.len(), 1..=63) {
+            Err("label must be 1 to 63 characters long")?;
+        }
+
+        // labels must not start or end with a hyphen
+        if label.starts_with('-') {
+            Err("label starts with hyphen")?;
+        }
+
+        if label.ends_with('-') {
+            Err("label ends with hyphen")?;
+        }
+
+        // labels may contain only the ASCII letters 'a' through 'z' (in a case-insensitive manner),
+        // the digits '0' through '9', and the hyphen ('-')
+        if let Some(ch) = label
+            .chars()
+            .find(|c| !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-'))
+        {
+            Err(format!("invalid character {ch:?}"))?;
+        }
+
+        // labels must not contain "--" in 3rd and 4th position unless they start with "xn--"
+        if label.len() >= 4 && &label[2..4] == "--" {
+            if !label.starts_with("xn--") {
+                Err("label has -- in 3rd/4th position but does not start with xn--")?;
+            } else {
+                let (unicode, errors) = idna::domain_to_unicode(label);
+                if let Err(_) = errors {
+                    Err("invalid punycode")?;
+                }
+                check_unicode_idn_constraints(&unicode).map_err(|e| format!("invalid punycode/IDN: {e}"))?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn validate_idn_hostname(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_idn_hostname(s)?;
+    Ok(())
+}
+
+static DISALLOWED: [char; 10] = [
+    '\u{0640}', //  ARABIC TATWEEL
+    '\u{07FA}', //  NKO LAJANYALAN
+    '\u{302E}', //  HANGUL SINGLE DOT TONE MARK
+    '\u{302F}', //  HANGUL DOUBLE DOT TONE MARK
+    '\u{3031}', //  VERTICAL KANA REPEAT MARK
+    '\u{3032}', //  VERTICAL KANA REPEAT WITH VOICED SOUND MARK
+    '\u{3033}', //  VERTICAL KANA REPEAT MARK UPPER HALF
+    '\u{3034}', //  VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HA
+    '\u{3035}', //  VERTICAL KANA REPEAT MARK LOWER HALF
+    '\u{303B}', //  VERTICAL IDEOGRAPHIC ITERATION MARK
+];
+
+fn check_idn_hostname(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let s = idna::domain_to_ascii_strict(s).map_err(|e| format!("idna error: {:?}", e))?;
+    let (unicode, errors) = idna::domain_to_unicode(&s);
+    if let Err(e) = errors {
+        Err(format!("idna decoding error: {:?}", e))?;
+    }
+    check_unicode_idn_constraints(&unicode)?;
+    check_hostname(&s)?;
+    Ok(())
+}
+
+fn check_unicode_idn_constraints(unicode: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // see https://www.rfc-editor.org/rfc/rfc5892#section-2.6
+    {
+        if unicode.contains(DISALLOWED) {
+            Err("contains disallowed character")?;
+        }
+    }
+
+    // unicode string must not contain "--" in 3rd and 4th position
+    // and must not start and end with a '-'
+    // see https://www.rfc-editor.org/rfc/rfc5891#section-4.2.3.1
+    {
+        let count: usize = unicode
+            .chars()
+            .skip(2)
+            .take(2)
+            .map(|c| if c == '-' { 1 } else { 0 })
+            .sum();
+        if count == 2 {
+            Err("unicode string must not contain '--' in 3rd and 4th position")?;
+        }
+    }
+
+    // MIDDLE DOT is allowed between 'l' characters only
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.3
+    {
+        let middle_dot = '\u{00b7}';
+        let mut s = unicode;
+        while let Some(i) = s.find(middle_dot) {
+            let prefix = &s[..i];
+            let suffix = &s[i + middle_dot.len_utf8()..];
+            if !prefix.ends_with('l') || !suffix.ends_with('l') {
+                Err("MIDDLE DOT is allowed between 'l' characters only")?;
+            }
+            s = suffix;
+        }
+    }
+
+    // Greek KERAIA must be followed by Greek character
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.4
+    {
+        let keralia = '\u{0375}';
+        let greek = '\u{0370}'..='\u{03FF}';
+        let mut s = unicode;
+        while let Some(i) = s.find(keralia) {
+            let suffix = &s[i + keralia.len_utf8()..];
+            if !suffix.starts_with(|c| greek.contains(&c)) {
+                Err("Greek KERAIA must be followed by Greek character")?;
+            }
+            s = suffix;
+        }
+    }
+
+    // Hebrew GERESH must be preceded by Hebrew character
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.5
+    //
+    // Hebrew GERSHAYIM must be preceded by Hebrew character
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.6
+    {
+        let geresh = '\u{05F3}';
+        let gereshayim = '\u{05F4}';
+        let hebrew = '\u{0590}'..='\u{05FF}';
+        for ch in [geresh, gereshayim] {
+            let mut s = unicode;
+            while let Some(i) = s.find(ch) {
+                let prefix = &s[..i];
+                if !prefix.ends_with(|c| hebrew.contains(&c)) {
+                    if i == 0 {
+                        Err("Hebrew GERESH must be preceded by Hebrew character")?;
+                    } else {
+                        Err("Hebrew GERESHYIM must be preceded by Hebrew character")?;
+                    }
+                }
+                let suffix = &s[i + ch.len_utf8()..];
+                s = suffix;
+            }
+        }
+    }
+
+    // KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.7
+    {
+        let katakana_middle_dot = '\u{30FB}';
+        if unicode.contains(katakana_middle_dot) {
+            let hiragana = '\u{3040}'..='\u{309F}';
+            let katakana = '\u{30A0}'..='\u{30FF}';
+            let han = '\u{4E00}'..='\u{9FFF}'; // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block): is this range correct??
+            if unicode.contains(|c| hiragana.contains(&c))
+                || unicode.contains(|c| c != katakana_middle_dot && katakana.contains(&c))
+                || unicode.contains(|c| han.contains(&c))
+            {
+                // ok
+            } else {
+                Err("KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han")?;
+            }
+        }
+    }
+
+    // ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.8
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.9
+    {
+        let arabic_indic_digits = '\u{0660}'..='\u{0669}';
+        let extended_arabic_indic_digits = '\u{06F0}'..='\u{06F9}';
+        if unicode.contains(|c| arabic_indic_digits.contains(&c))
+            && unicode.contains(|c| extended_arabic_indic_digits.contains(&c))
+        {
+            Err("ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed")?;
+        }
+    }
+
+    // ZERO WIDTH JOINER must be preceded by Virama
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.2
+    {
+        let zero_width_jointer = '\u{200D}';
+        static VIRAMA: [char; 61] = [
+            '\u{094D}',
+            '\u{09CD}',
+            '\u{0A4D}',
+            '\u{0ACD}',
+            '\u{0B4D}',
+            '\u{0BCD}',
+            '\u{0C4D}',
+            '\u{0CCD}',
+            '\u{0D3B}',
+            '\u{0D3C}',
+            '\u{0D4D}',
+            '\u{0DCA}',
+            '\u{0E3A}',
+            '\u{0EBA}',
+            '\u{0F84}',
+            '\u{1039}',
+            '\u{103A}',
+            '\u{1714}',
+            '\u{1734}',
+            '\u{17D2}',
+            '\u{1A60}',
+            '\u{1B44}',
+            '\u{1BAA}',
+            '\u{1BAB}',
+            '\u{1BF2}',
+            '\u{1BF3}',
+            '\u{2D7F}',
+            '\u{A806}',
+            '\u{A82C}',
+            '\u{A8C4}',
+            '\u{A953}',
+            '\u{A9C0}',
+            '\u{AAF6}',
+            '\u{ABED}',
+            '\u{10A3F}',
+            '\u{11046}',
+            '\u{1107F}',
+            '\u{110B9}',
+            '\u{11133}',
+            '\u{11134}',
+            '\u{111C0}',
+            '\u{11235}',
+            '\u{112EA}',
+            '\u{1134D}',
+            '\u{11442}',
+            '\u{114C2}',
+            '\u{115BF}',
+            '\u{1163F}',
+            '\u{116B6}',
+            '\u{1172B}',
+            '\u{11839}',
+            '\u{1193D}',
+            '\u{1193E}',
+            '\u{119E0}',
+            '\u{11A34}',
+            '\u{11A47}',
+            '\u{11A99}',
+            '\u{11C3F}',
+            '\u{11D44}',
+            '\u{11D45}',
+            '\u{11D97}',
+        ]; // https://www.compart.com/en/unicode/combining/9
+        let mut s = unicode;
+        while let Some(i) = s.find(zero_width_jointer) {
+            let prefix = &s[..i];
+            if !prefix.ends_with(VIRAMA) {
+                Err("ZERO WIDTH JOINER must be preceded by Virama")?;
+            }
+            let suffix = &s[i + zero_width_jointer.len_utf8()..];
+            s = suffix;
+        }
+    }
+
+    Ok(())
+}
+
+fn validate_email(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_email(s)?;
+    Ok(())
+}
+
+// see https://en.wikipedia.org/wiki/Email_address
+fn check_email(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // entire email address to be no more than 254 characters long
+    if s.len() > 254 {
+        Err("more than 254 characters long")?
+    }
+
+    // email address is generally recognized as having two parts joined with an at-sign
+    let Some(at) = s.rfind('@') else {
+        Err("missing @")?
+    };
+    let (local, domain) = (&s[..at], &s[at + 1..]);
+
+    // local part may be up to 64 characters long
+    if local.len() > 64 {
+        Err("local part more than 64 characters long")?
+    }
+
+    if local.len() > 1 && local.starts_with('"') && local.ends_with('"') {
+        // quoted
+        let local = &local[1..local.len() - 1];
+        if local.contains(['\\', '"']) {
+            Err("backslash and quote not allowed within quoted local part")?
+        }
+    } else {
+        // unquoted
+
+        if local.starts_with('.') {
+            Err("starts with dot")?
+        }
+        if local.ends_with('.') {
+            Err("ends with dot")?
+        }
+
+        // consecutive dots not allowed
+        if local.contains("..") {
+            Err("consecutive dots")?
+        }
+
+        // check allowd chars
+        if let Some(ch) = local
+            .chars()
+            .find(|c| !(c.is_ascii_alphanumeric() || ".!#$%&'*+-/=?^_`{|}~".contains(*c)))
+        {
+            Err(format!("invalid character {ch:?}"))?
+        }
+    }
+
+    // domain if enclosed in brackets, must match an IP address
+    if domain.starts_with('[') && domain.ends_with(']') {
+        let s = &domain[1..domain.len() - 1];
+        if let Some(s) = s.strip_prefix("IPv6:") {
+            if let Err(e) = s.parse::<Ipv6Addr>() {
+                Err(format!("invalid ipv6 address: {e}"))?
+            }
+            return Ok(());
+        }
+        if let Err(e) = s.parse::<Ipv4Addr>() {
+            Err(format!("invalid ipv4 address: {e}"))?
+        }
+        return Ok(());
+    }
+
+    // domain must match the requirements for a hostname
+    if let Err(e) = check_hostname(domain) {
+        Err(format!("invalid domain: {e}"))?
+    }
+
+    Ok(())
+}
+
+fn validate_idn_email(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+
+    let Some(at) = s.rfind('@') else {
+        Err("missing @")?
+    };
+    let (local, domain) = (&s[..at], &s[at + 1..]);
+
+    let local = idna::domain_to_ascii_strict(local).map_err(|e| format!("idna error: {:?}", e))?;
+    let domain = idna::domain_to_ascii_strict(domain).map_err(|e| format!("idna error: {:?}", e))?;
+    if let Err(e) = check_idn_hostname(&domain) {
+        Err(format!("invalid domain: {e}"))?
+    }
+    check_email(&format!("{local}@{domain}"))
+}
+
+fn validate_json_pointer(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_json_pointer(s)?;
+    Ok(())
+}
+
+// see https://www.rfc-editor.org/rfc/rfc6901#section-3
+fn check_json_pointer(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    if s.is_empty() {
+        return Ok(());
+    }
+    if !s.starts_with('/') {
+        Err("not starting with slash")?;
+    }
+    for token in s.split('/').skip(1) {
+        let mut chars = token.chars();
+        while let Some(ch) = chars.next() {
+            if ch == '~' {
+                if !matches!(chars.next(), Some('0' | '1')) {
+                    Err("~ must be followed by 0 or 1")?;
+                }
+            } else if !matches!(ch, '\x00'..='\x2E' | '\x30'..='\x7D' | '\x7F'..='\u{10FFFF}') {
+                Err("contains disallowed character")?;
+            }
+        }
+    }
+    Ok(())
+}
+
+// see https://tools.ietf.org/html/draft-handrews-relative-json-pointer-01#section-3
+fn validate_relative_json_pointer(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+
+    // start with non-negative-integer
+    let num_digits = s.chars().take_while(char::is_ascii_digit).count();
+    if num_digits == 0 {
+        Err("must start with non-negative integer")?;
+    }
+    if num_digits > 1 && s.starts_with('0') {
+        Err("starts with zero")?;
+    }
+    let s = &s[num_digits..];
+
+    // followed by either json-pointer or '#'
+    if s == "#" {
+        return Ok(());
+    }
+    if let Err(e) = check_json_pointer(s) {
+        Err(format!("invalid json-pointer element: {e}"))?;
+    }
+    Ok(())
+}
+
+// see https://datatracker.ietf.org/doc/html/rfc4122#page-4
+fn validate_uuid(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+
+    static HEX_GROUPS: [usize; 5] = [8, 4, 4, 4, 12];
+    let mut i = 0;
+    for group in s.split('-') {
+        if i >= HEX_GROUPS.len() {
+            Err("more than 5 elements")?;
+        }
+        if group.len() != HEX_GROUPS[i] {
+            Err(format!(
+                "element {} must be {} characters long",
+                i + 1,
+                HEX_GROUPS[i]
+            ))?;
+        }
+        if let Some(ch) = group.chars().find(|c| !c.is_ascii_hexdigit()) {
+            Err(format!("non-hex character {ch:?}"))?;
+        }
+        i += 1;
+    }
+    if i != HEX_GROUPS.len() {
+        Err("must have 5 elements")?;
+    }
+    Ok(())
+}
+
+fn validate_uri(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    if fluent_uri::UriRef::parse(s.as_str()).map_err(|e| e.to_string())?.scheme().is_none() {
+        Err("relative url")?;
+    };
+    Ok(())
+}
+
+fn validate_iri(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    match Url::parse(s) {
+        Ok(_) => Ok(()),
+        Err(url::ParseError::RelativeUrlWithoutBase) => Err("relative url")?,
+        Err(e) => Err(e)?,
+    }
+}
+
+lazy_static! {
+    static ref TEMP_URL: Url = Url::parse("http://temp.com").unwrap();
+}
+
+fn parse_uri_reference(s: &str) -> Result<Url, Box<dyn Error + Send + Sync>> {
+    if s.contains('\\') {
+        Err("contains \\\\")?;
+    }
+    Ok(TEMP_URL.join(s)?)
+}
+
+fn validate_uri_reference(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    fluent_uri::UriRef::parse(s.as_str()).map_err(|e| e.to_string())?;
+    Ok(())
+}
+
+fn validate_iri_reference(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    parse_uri_reference(s)?;
+    Ok(())
+}
+
+fn validate_uri_template(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+
+    let url = parse_uri_reference(s)?;
+
+    let path = url.path();
+    // path we got has curly bases percent encoded
+    let path = percent_decode_str(path).decode_utf8()?;
+
+    // ensure curly brackets are not nested and balanced
+    for part in path.as_ref().split('/') {
+        let mut want = true;
+        for got in part
+            .chars()
+            .filter(|c| matches!(c, '{' | '}'))
+            .map(|c| c == '{')
+        {
+            if got != want {
+                Err("nested curly braces")?;
+            }
+            want = !want;
+        }
+        if !want {
+            Err("no matching closing brace")?
+        }
+    }
+    Ok(())
+}
--- a/src/helpers.rs
+++ b/src/helpers.rs
@ -1,88 +0,0 @@
-use serde_json::Value;
-use pgrx::JsonB; 
-
-// Simple test helpers for cleaner test code
-pub fn assert_success(result: &JsonB) {
-  let json = &result.0;
-  if !json.get("response").is_some() || json.get("errors").is_some() {
-    let pretty = serde_json::to_string_pretty(json).unwrap_or_else(|_| format!("{:?}", json));
-    panic!("Expected success but got:\n{}", pretty);
-  }
-}
-
-pub fn assert_failure(result: &JsonB) {
-  let json = &result.0;
-  if json.get("response").is_some() || !json.get("errors").is_some() {
-    let pretty = serde_json::to_string_pretty(json).unwrap_or_else(|_| format!("{:?}", json));
-    panic!("Expected failure but got:\n{}", pretty);
-  }
-}
-
-pub fn assert_error_count(result: &JsonB, expected_count: usize) {
-  assert_failure(result);
-  let errors = get_errors(result);
-  if errors.len() != expected_count {
-    let pretty = serde_json::to_string_pretty(&result.0).unwrap_or_else(|_| format!("{:?}", result.0));
-    panic!("Expected {} errors, got {}:\n{}", expected_count, errors.len(), pretty);
-  }
-}
-
-pub fn get_errors(result: &JsonB) -> &Vec<Value> {
-  result.0["errors"].as_array().expect("errors should be an array")
-}
-
-pub fn has_error_with_code(result: &JsonB, code: &str) -> bool {
-  get_errors(result).iter().any(|e| e["code"] == code)
-}
-
-
-pub fn has_error_with_code_and_path(result: &JsonB, code: &str, path: &str) -> bool {
-  get_errors(result).iter().any(|e| e["code"] == code && e["details"]["path"] == path)
-}
-
-pub fn assert_has_error(result: &JsonB, code: &str, path: &str) {
-  if !has_error_with_code_and_path(result, code, path) {
-    let pretty = serde_json::to_string_pretty(&result.0).unwrap_or_else(|_| format!("{:?}", result.0));
-    panic!("Expected error with code='{}' and path='{}' but not found:\n{}", code, path, pretty);
-  }
-}
-
-pub fn find_error_with_code<'a>(result: &'a JsonB, code: &str) -> &'a Value {
-  get_errors(result).iter().find(|e| e["code"] == code)
-    .unwrap_or_else(|| panic!("No error found with code '{}'", code))
-}
-
-
-pub fn find_error_with_code_and_path<'a>(result: &'a JsonB, code: &str, path: &str) -> &'a Value {
-  get_errors(result).iter().find(|e| e["code"] == code && e["details"]["path"] == path)
-    .unwrap_or_else(|| panic!("No error found with code '{}' and path '{}'", code, path))
-}
-
-pub fn assert_error_detail(error: &Value, detail_key: &str, expected_value: &str) {
-  let actual = error["details"][detail_key].as_str()
-    .unwrap_or_else(|| panic!("Error detail '{}' is not a string", detail_key));
-  assert_eq!(actual, expected_value, "Error detail '{}' mismatch", detail_key);
-}
-
-
-// Additional convenience helpers for common patterns
-
-pub fn assert_error_message_contains(error: &Value, substring: &str) {
-  let message = error["message"].as_str().expect("error should have message");
-  assert!(message.contains(substring), "Expected message to contain '{}', got '{}'", substring, message);
-}
-
-pub fn assert_error_cause_json(error: &Value, expected_cause: &Value) {
-  let cause = &error["details"]["cause"];
-  assert!(cause.is_object(), "cause should be JSON object");
-  assert_eq!(cause, expected_cause, "cause mismatch");
-}
-
-pub fn assert_error_context(error: &Value, expected_context: &Value) {
-  assert_eq!(&error["details"]["context"], expected_context, "context mismatch");
-}
-
-
-pub fn jsonb(val: Value) -> JsonB {
-  JsonB(val)
-}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -2,859 +2,144 @@ use pgrx::*;

 pg_module_magic!();

-use boon::{CompileError, Compiler, ErrorKind, SchemaIndex, Schemas, ValidationError, Type, Types, ValidationOptions};
-use lazy_static::lazy_static;
-use serde_json::{json, Value, Number};
-use std::borrow::Cow;
-use std::collections::hash_map::Entry;
-use std::{collections::{HashMap, HashSet}, sync::RwLock};
+pub mod compiler;
+pub mod drop;
+pub mod formats;

-#[derive(Clone, Copy, Debug, PartialEq)]
-enum SchemaType {
- Enum,
- Type,
- Family, // Added for generated hierarchy schemas
- PublicPunc,
- PrivatePunc,
-}
+pub mod registry;
+mod schema;
+pub mod util;
+mod validator;

-struct Schema {
- index: SchemaIndex,
- t: SchemaType,
-}
-
-struct Cache {
- schemas: Schemas,
- map: HashMap<String, Schema>,
-}
-
-// Structure to hold error information without lifetimes
-#[derive(Debug)]
-struct Error {
- path: String,
- code: String,
- message: String,
- cause: Value, // Changed from String to Value to store JSON
-}
-
-lazy_static! {
- static ref SCHEMA_CACHE: RwLock<Cache> = RwLock::new(Cache {
-  schemas: Schemas::new(),
-  map: HashMap::new(),
- });
-}
+use crate::registry::REGISTRY;
+use crate::schema::Schema;
+use serde_json::{Value, json};

 #[pg_extern(strict)]
 fn cache_json_schemas(enums: JsonB, types: JsonB, puncs: JsonB) -> JsonB {
-  let mut cache = SCHEMA_CACHE.write().unwrap();
-  let enums_value: Value = enums.0;
-  let types_value: Value = types.0;
-  let puncs_value: Value = puncs.0;
+  let mut registry = REGISTRY.write().unwrap();
+  registry.clear();

-  *cache = Cache {
-    schemas: Schemas::new(),
-    map: HashMap::new(),
-  };
-
-  let mut compiler = Compiler::new();
-  compiler.enable_format_assertions();
-
-  let mut errors = Vec::new();
-  let mut schemas_to_compile = Vec::new();
-
-  // Phase 1: Enums
-  if let Some(enums_array) = enums_value.as_array() {
-    for enum_row in enums_array {
-      if let Some(schemas_raw) = enum_row.get("schemas") {
-        if let Some(schemas_array) = schemas_raw.as_array() {
-          for schema_def in schemas_array {
-            if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
-              schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), SchemaType::Enum));
-            }
-          }
-        }
-      }
-    }
-  }
-
-  // Phase 2: Types & Hierarchy Pre-processing
-  let mut hierarchy_map: HashMap<String, HashSet<String>> = HashMap::new();
-  if let Some(types_array) = types_value.as_array() {
-    for type_row in types_array {
-      // Process main schemas for the type
-      if let Some(schemas_raw) = type_row.get("schemas") {
-        if let Some(schemas_array) = schemas_raw.as_array() {
-          for schema_def in schemas_array {
-            if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
-              schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), SchemaType::Type));
-            }
-          }
-        }
-      }
-
-      // Process hierarchy to build .family enums
-      if let Some(type_name) = type_row.get("name").and_then(|v| v.as_str()) {
-        if let Some(hierarchy_raw) = type_row.get("hierarchy") {
-          if let Some(hierarchy_array) = hierarchy_raw.as_array() {
-            for ancestor_val in hierarchy_array {
-              if let Some(ancestor_name) = ancestor_val.as_str() {
-                hierarchy_map
-                  .entry(ancestor_name.to_string())
+  // Generate Family Schemas from Types
+  {
+    let mut family_map: std::collections::HashMap<String, std::collections::HashSet<String>> =
+      std::collections::HashMap::new();
+    if let Value::Array(arr) = &types.0 {
+      for item in arr {
+        if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
+          if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) {
+            for ancestor in hierarchy {
+              if let Some(anc_str) = ancestor.as_str() {
+                family_map
+                  .entry(anc_str.to_string())
                  .or_default()
-                  .insert(type_name.to_string());
+                  .insert(name.to_string());
              }
            }
          }
        }
      }
    }
-  }

-  // Generate and add the .family schemas
-  for (base_type, descendant_types) in hierarchy_map {
-    let family_schema_id = format!("{}.family", base_type);
-    let enum_values: Vec<String> = descendant_types.into_iter().collect();
-    let family_schema = json!({
-      "$id": family_schema_id,
-      "type": "string",
-      "enum": enum_values
-    });
-    schemas_to_compile.push((family_schema_id, family_schema, SchemaType::Family));
-  }
+    for (family_name, members) in family_map {
+      let id = format!("{}.family", family_name);

-  // Phase 3: Puncs
-  if let Some(puncs_array) = puncs_value.as_array() {
-    for punc_row in puncs_array {
-      if let Some(punc_obj) = punc_row.as_object() {
-        if let Some(punc_name) = punc_obj.get("name").and_then(|v| v.as_str()) {
-          let is_public = punc_obj.get("public").and_then(|v| v.as_bool()).unwrap_or(false);
-          let punc_schema_type = if is_public { SchemaType::PublicPunc } else { SchemaType::PrivatePunc };
-          if let Some(schemas_raw) = punc_obj.get("schemas") {
-            if let Some(schemas_array) = schemas_raw.as_array() {
-              for schema_def in schemas_array {
-                if let Some(schema_id) = schema_def.get("$id").and_then(|v| v.as_str()) {
-                  let request_schema_id = format!("{}.request", punc_name);
-                  let response_schema_id = format!("{}.response", punc_name);
-                  let schema_type_for_def = if schema_id == request_schema_id || schema_id == response_schema_id {
-                    punc_schema_type
-                  } else {
-                    SchemaType::Type
-                  };
-                  schemas_to_compile.push((schema_id.to_string(), schema_def.clone(), schema_type_for_def));
+      // Object Union (for polymorphic object validation)
+      // This allows the schema to match ANY of the types in the family hierarchy
+      let object_refs: Vec<Value> = members.iter().map(|s| json!({ "$ref": s })).collect();
+
+      let schema_json = json!({
+          "$id": id,
+          "oneOf": object_refs
+      });
+
+      if let Ok(schema) = serde_json::from_value::<Schema>(schema_json) {
+        let compiled = crate::compiler::Compiler::compile(schema, Some(id.clone()));
+        registry.insert(id, compiled);
+      }
+    }
+
+    // Helper to parse and cache a list of items
+    let mut cache_items = |items: JsonB| {
+      if let Value::Array(arr) = items.0 {
+        for item in arr {
+          // For now, we assume the item structure matches what the generator expects
+          // or what `json_schemas.sql` sends.
+          // The `Schema` struct in `schema.rs` is designed to deserialize standard JSON Schema.
+          // However, the input here is an array of objects that *contain* a `schemas` array.
+          // We need to extract those inner schemas.
+
+          if let Some(schemas_val) = item.get("schemas") {
+            if let Value::Array(schemas) = schemas_val {
+              for schema_val in schemas {
+                // Deserialize into our robust Schema struct to ensure validity/parsing
+                if let Ok(schema) = serde_json::from_value::<Schema>(schema_val.clone()) {
+                  if let Some(id) = &schema.obj.id {
+                    let id_clone = id.clone();
+                    // Store the compiled Schema in the registry.
+                    // The registry.insert method now handles simple insertion of CompiledSchema
+                    let compiled =
+                      crate::compiler::Compiler::compile(schema, Some(id_clone.clone()));
+                    registry.insert(id_clone, compiled);
+                  }
                }
              }
            }
          }
        }
      }
-    }
-  }
-
-  // Add all resources to compiler first
-  for (id, value, schema_type) in &schemas_to_compile {
-    add_schema_resource(&mut compiler, id, value.clone(), *schema_type, &mut errors);
-  }
-
-  if !errors.is_empty() {
-    return JsonB(json!({ "errors": errors }));
-  }
-
-  // Compile all schemas
-  compile_all_schemas(&mut compiler, &mut cache, &schemas_to_compile, &mut errors);
-
-  if errors.is_empty() {
-    JsonB(json!({ "response": "success" }))
-  } else {
-    JsonB(json!({ "errors": errors }))
-  }
-}
-
-// Helper function to add a schema resource (without compiling)
-fn add_schema_resource(
- compiler: &mut Compiler, 
- schema_id: &str, 
- schema_value: Value, 
- _schema_type: SchemaType,
- errors: &mut Vec<Value>
-) {
- if let Err(e) = compiler.add_resource(schema_id, schema_value) {
-  errors.push(json!({ 
-   "code": "SCHEMA_RESOURCE_FAILED",
-   "message": format!("Failed to add schema resource '{}'", schema_id),
-   "details": { "schema": schema_id, "cause": format!("{}", e) }
-  }));
- }
-}
-
-// Helper function to compile all added resources
-fn compile_all_schemas(
-  compiler: &mut Compiler,
-  cache: &mut Cache,
-  schemas_to_compile: &[(String, Value, SchemaType)],
-  errors: &mut Vec<Value>,
-) {
-  for (id, value, schema_type) in schemas_to_compile {
-    match compiler.compile(id, &mut cache.schemas) {
-      Ok(index) => {
-        cache.map.insert(id.clone(), Schema { index, t: *schema_type });
-      }
-      Err(e) => {
-        match &e {
-          CompileError::ValidationError { src, .. } => {
-            let mut error_list = Vec::new();
-            collect_errors(src, &mut error_list);
-            let formatted_errors = format_errors(error_list, value, id);
-            errors.extend(formatted_errors);
-          }
-          _ => {
-            errors.push(json!({ 
-              "code": "SCHEMA_COMPILATION_FAILED",
-              "message": format!("Schema '{}' compilation failed", id),
-              "details": { "schema": id, "cause": format!("{:?}", e) }
-            }));
-          }
-        };
-      }
-    }
+    };
+
+    cache_items(enums);
+    cache_items(types);
+    cache_items(puncs); // public/private distinction logic to come later
  }
+  JsonB(json!({ "response": "success" }))
 }

 #[pg_extern(strict, parallel_safe)]
 fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB {
-  let cache = SCHEMA_CACHE.read().unwrap();
-  match cache.map.get(schema_id) {
-    None => JsonB(json!({ 
-      "errors": [{
-        "code": "SCHEMA_NOT_FOUND",
-        "message": format!("Schema '{}' not found in cache", schema_id),
-        "details": {
-          "schema": schema_id,
-          "cause": "Schema was not found in bulk cache - ensure cache_json_schemas was called"
-        }
-      }]
-    })),
-    Some(schema) => {
-      let instance_value: Value = instance.0;
-      let options = match schema.t {
-        SchemaType::PublicPunc => Some(ValidationOptions { be_strict: true }),
-        _ => None,
-      };
-
-      match cache.schemas.validate(&instance_value, schema.index, options) {
-        Ok(_) => {
-            JsonB(json!({ "response": "success" }))
-        }
-        Err(validation_error) => {
-          let mut error_list = Vec::new();
-          collect_errors(&validation_error, &mut error_list);
-          let errors = format_errors(error_list, &instance_value, schema_id);
-          if errors.is_empty() {
-            JsonB(json!({ "response": "success" }))
-          } else {
-            JsonB(json!({ "errors": errors }))
-          }
-        }
-      }
-    }
-  }
-}
-
-// Recursively collects validation errors
-fn collect_errors(error: &ValidationError, errors_list: &mut Vec<Error>) {
-    // Check if this is a structural error that we should skip
-    let is_structural = matches!(
-        &error.kind,
-        ErrorKind::Group | ErrorKind::AllOf | ErrorKind::AnyOf | ErrorKind::Not | ErrorKind::OneOf(_)
-    );
-
-    if !error.causes.is_empty() || is_structural {
-      for cause in &error.causes {
-          collect_errors(cause, errors_list);
-      }
-      return
-    }
-
-    let base_path = error.instance_location.to_string();
-    let errors_to_add = match &error.kind {
-        ErrorKind::Type { got, want } => handle_type_error(&base_path, got, want),
-        ErrorKind::Required { want } => handle_required_error(&base_path, want),
-        ErrorKind::Dependency { prop, missing } => handle_dependency_error(&base_path, prop, missing, false),
-        ErrorKind::DependentRequired { prop, missing } => handle_dependency_error(&base_path, prop, missing, true),
-        ErrorKind::AdditionalProperties { got } => handle_additional_properties_error(&base_path, got),
-        ErrorKind::Enum { want } => handle_enum_error(&base_path, want),
-        ErrorKind::Const { want } => handle_const_error(&base_path, want),
-        ErrorKind::MinLength { got, want } => handle_min_length_error(&base_path, *got, *want),
-        ErrorKind::MaxLength { got, want } => handle_max_length_error(&base_path, *got, *want),
-        ErrorKind::Pattern { got, want } => handle_pattern_error(&base_path, got, want),
-        ErrorKind::Minimum { got, want } => handle_minimum_error(&base_path, got, want),
-        ErrorKind::Maximum { got, want } => handle_maximum_error(&base_path, got, want),
-        ErrorKind::ExclusiveMinimum { got, want } => handle_exclusive_minimum_error(&base_path, got, want),
-        ErrorKind::ExclusiveMaximum { got, want } => handle_exclusive_maximum_error(&base_path, got, want),
-        ErrorKind::MultipleOf { got, want } => handle_multiple_of_error(&base_path, got, want),
-        ErrorKind::MinItems { got, want } => handle_min_items_error(&base_path, *got, *want),
-        ErrorKind::MaxItems { got, want } => handle_max_items_error(&base_path, *got, *want),
-        ErrorKind::UniqueItems { got } => handle_unique_items_error(&base_path, got),
-        ErrorKind::MinProperties { got, want } => handle_min_properties_error(&base_path, *got, *want),
-        ErrorKind::MaxProperties { got, want } => handle_max_properties_error(&base_path, *got, *want),
-        ErrorKind::AdditionalItems { got } => handle_additional_items_error(&base_path, *got),
-        ErrorKind::Format { want, got, err } => handle_format_error(&base_path, want, got, err),
-        ErrorKind::PropertyName { prop } => handle_property_name_error(&base_path, prop),
-        ErrorKind::Contains => handle_contains_error(&base_path),
-        ErrorKind::MinContains { got, want } => handle_min_contains_error(&base_path, got, *want),
-        ErrorKind::MaxContains { got, want } => handle_max_contains_error(&base_path, got, *want),
-        ErrorKind::ContentEncoding { want, err } => handle_content_encoding_error(&base_path, want, err),
-        ErrorKind::ContentMediaType { want, err, .. } => handle_content_media_type_error(&base_path, want, err),
-        ErrorKind::FalseSchema => handle_false_schema_error(&base_path),
-        ErrorKind::Not => handle_not_error(&base_path),
-        ErrorKind::RefCycle { url, kw_loc1, kw_loc2 } => handle_ref_cycle_error(&base_path, url, kw_loc1, kw_loc2),
-        ErrorKind::Reference { kw, url } => handle_reference_error(&base_path, kw, url),
-        ErrorKind::Schema { url } => handle_schema_error(&base_path, url),
-        ErrorKind::ContentSchema => handle_content_schema_error(&base_path),
-        ErrorKind::Group => handle_group_error(&base_path),
-        ErrorKind::AllOf => handle_all_of_error(&base_path),
-        ErrorKind::AnyOf => handle_any_of_error(&base_path),
-        ErrorKind::OneOf(matched) => handle_one_of_error(&base_path, matched),
-    };
-
-    errors_list.extend(errors_to_add);
-}
-
-// Handler functions for each error kind
-fn handle_type_error(base_path: &str, got: &Type, want: &Types) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "TYPE_MISMATCH".to_string(),
-  message: format!("Expected {} but got {}", 
-   want.iter().map(|t| t.to_string()).collect::<Vec<_>>().join(" or "), 
-   got
-  ),
-  cause: json!({ 
-   "got": got.to_string(), 
-   "want": want.iter().map(|t| t.to_string()).collect::<Vec<_>>() 
-  }),
- }]
-}
-
-fn handle_required_error(base_path: &str, want: &[&str]) -> Vec<Error> {
- // Create a separate error for each missing required field
- want.iter().map(|missing_field| {
-  let field_path = if base_path.is_empty() {
-   format!("/{}", missing_field)
-  } else {
-   format!("{}/{}", base_path, missing_field)
-  };
-  
-  Error {
-   path: field_path,
-   code: "REQUIRED_FIELD_MISSING".to_string(),
-   message: format!("Required field '{}' is missing", missing_field),
-   cause: json!({ "want": [missing_field] }),
-  }
- }).collect()
-}
-
-fn handle_dependency_error(base_path: &str, prop: &str, missing: &[&str], is_dependent_required: bool) -> Vec<Error> {
- // Create a separate error for each missing field
- missing.iter().map(|missing_field| {
-  let field_path = if base_path.is_empty() {
-   format!("/{}", missing_field)
-  } else {
-   format!("{}/{}", base_path, missing_field)
-  };
-  
-  let (code, message) = if is_dependent_required {
-   (
-    "DEPENDENT_REQUIRED_MISSING".to_string(),
-    format!("Field '{}' is required when '{}' is present", missing_field, prop),
-   )
-  } else {
-   (
-    "DEPENDENCY_FAILED".to_string(),
-    format!("Field '{}' is required when '{}' is present", missing_field, prop),
-   )
-  };
-  
-  Error {
-   path: field_path,
-   code,
-   message,
-   cause: json!({ "prop": prop, "missing": [missing_field] }),
-  }
- }).collect()
-}
-
-fn handle_additional_properties_error(base_path: &str, got: &[Cow<str>]) -> Vec<Error> {
-    let mut errors = Vec::new();
-    for extra_prop in got {
-        let field_path = if base_path.is_empty() {
-            format!("/{}", extra_prop)
-        } else {
-            format!("{}/{}", base_path, extra_prop)
-        };
-        errors.push(Error {
-            path: field_path,
-            code: "ADDITIONAL_PROPERTIES_NOT_ALLOWED".to_string(),
-            message: format!("Property '{}' is not allowed", extra_prop),
-            cause: json!({ "got": [extra_prop.to_string()] }),
-        });
-    }
-    errors
-}
-
-fn handle_enum_error(base_path: &str, want: &[Value]) -> Vec<Error> {
- let message = if want.len() == 1 {
-  format!("Value must be {}", serde_json::to_string(&want[0]).unwrap_or_else(|_| "unknown".to_string()))
- } else {
-  format!("Value must be one of: {}", 
-   want.iter()
-    .map(|v| serde_json::to_string(v).unwrap_or_else(|_| "unknown".to_string()))
-    .collect::<Vec<_>>()
-    .join(", ")
-  )
- };
- 
- vec![Error {
-  path: base_path.to_string(),
-  code: "ENUM_VIOLATED".to_string(),
-  message,
-  cause: json!({ "want": want }),
- }]
-}
-
-fn handle_const_error(base_path: &str, want: &Value) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "CONST_VIOLATED".to_string(),
-  message: format!("Value must be exactly {}", serde_json::to_string(want).unwrap_or_else(|_| "unknown".to_string())),
-  cause: json!({ "want": want }),
- }]
-}
-
-fn handle_min_length_error(base_path: &str, got: usize, want: usize) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "MIN_LENGTH_VIOLATED".to_string(),
-  message: format!("String length must be at least {} characters, but got {}", want, got),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_max_length_error(base_path: &str, got: usize, want: usize) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "MAX_LENGTH_VIOLATED".to_string(),
-  message: format!("String length must be at most {} characters, but got {}", want, got),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_pattern_error(base_path: &str, got: &Cow<str>, want: &str) -> Vec<Error> {
- let display_value = if got.len() > 50 { 
-  format!("{}...", &got[..50]) 
- } else { 
-  got.to_string() 
- };
- 
- vec![Error {
-  path: base_path.to_string(),
-  code: "PATTERN_VIOLATED".to_string(),
-  message: format!("Value '{}' does not match pattern '{}'", display_value, want),
-  cause: json!({ "got": got.to_string(), "want": want }),
- }]
-}
-
-fn handle_minimum_error(base_path: &str, got: &Cow<Number>, want: &Number) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "MINIMUM_VIOLATED".to_string(),
-  message: format!("Value must be at least {}, but got {}", want, got),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_maximum_error(base_path: &str, got: &Cow<Number>, want: &Number) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "MAXIMUM_VIOLATED".to_string(),
-  message: format!("Value must be at most {}, but got {}", want, got),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_exclusive_minimum_error(base_path: &str, got: &Cow<Number>, want: &Number) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "EXCLUSIVE_MINIMUM_VIOLATED".to_string(),
-  message: format!("Value must be greater than {}, but got {}", want, got),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_exclusive_maximum_error(base_path: &str, got: &Cow<Number>, want: &Number) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "EXCLUSIVE_MAXIMUM_VIOLATED".to_string(),
-  message: format!("Value must be less than {}, but got {}", want, got),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_multiple_of_error(base_path: &str, got: &Cow<Number>, want: &Number) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "MULTIPLE_OF_VIOLATED".to_string(),
-  message: format!("{} is not a multiple of {}", got, want),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_min_items_error(base_path: &str, got: usize, want: usize) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "MIN_ITEMS_VIOLATED".to_string(),
-  message: format!("Array must have at least {} items, but has {}", want, got),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_max_items_error(base_path: &str, got: usize, want: usize) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "MAX_ITEMS_VIOLATED".to_string(),
-  message: format!("Array must have at most {} items, but has {}", want, got),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_unique_items_error(base_path: &str, got: &[usize; 2]) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "UNIQUE_ITEMS_VIOLATED".to_string(),
-  message: format!("Array items at positions {} and {} are duplicates", got[0], got[1]),
-  cause: json!({ "got": got }),
- }]
-}
-
-fn handle_min_properties_error(base_path: &str, got: usize, want: usize) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "MIN_PROPERTIES_VIOLATED".to_string(),
-  message: format!("Object must have at least {} properties, but has {}", want, got),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_max_properties_error(base_path: &str, got: usize, want: usize) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "MAX_PROPERTIES_VIOLATED".to_string(),
-  message: format!("Object must have at most {} properties, but has {}", want, got),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_additional_items_error(base_path: &str, got: usize) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "ADDITIONAL_ITEMS_NOT_ALLOWED".to_string(),
-  message: format!("Last {} array items are not allowed", got),
-  cause: json!({ "got": got }),
- }]
-}
-
-fn handle_format_error(base_path: &str, want: &str, got: &Cow<Value>, err: &Box<dyn std::error::Error>) -> Vec<Error> {
- // If the value is an empty string, skip format validation.
- if let Value::String(s) = got.as_ref() {
-  if s.is_empty() {
-   return vec![];
-  }
- }
-
- vec![Error {
-  path: base_path.to_string(),
-  code: "FORMAT_INVALID".to_string(),
-  message: format!("Value {} is not a valid {} format", 
-   serde_json::to_string(got.as_ref()).unwrap_or_else(|_| "unknown".to_string()),
-   want
-  ),
-  cause: json!({ "got": got, "want": want, "err": err.to_string() }),
- }]
-}
-
-fn handle_property_name_error(base_path: &str, prop: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "INVALID_PROPERTY_NAME".to_string(),
-  message: format!("Property name '{}' is invalid", prop),
-  cause: json!({ "prop": prop }),
- }]
-}
-
-fn handle_contains_error(base_path: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "CONTAINS_FAILED".to_string(),
-  message: "No array items match the required schema".to_string(),
-  cause: json!({}),
- }]
-}
-
-fn handle_min_contains_error(base_path: &str, got: &[usize], want: usize) -> Vec<Error> {
- let message = if got.is_empty() {
-  format!("At least {} array items must match the schema, but none do", want)
- } else {
-  format!("At least {} array items must match the schema, but only {} do (at positions {})", 
-   want, 
-   got.len(), 
-   got.iter().map(|i| i.to_string()).collect::<Vec<_>>().join(", ")
-  )
- };
- 
- vec![Error {
-  path: base_path.to_string(),
-  code: "MIN_CONTAINS_VIOLATED".to_string(),
-  message,
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_max_contains_error(base_path: &str, got: &[usize], want: usize) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "MAX_CONTAINS_VIOLATED".to_string(),
-  message: format!("At most {} array items can match the schema, but {} do (at positions {})", 
-   want, 
-   got.len(), 
-   got.iter().map(|i| i.to_string()).collect::<Vec<_>>().join(", ")
-  ),
-  cause: json!({ "got": got, "want": want }),
- }]
-}
-
-fn handle_content_encoding_error(base_path: &str, want: &str, err: &Box<dyn std::error::Error>) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "CONTENT_ENCODING_INVALID".to_string(),
-  message: format!("Content is not valid {} encoding: {}", want, err),
-  cause: json!({ "want": want, "err": err.to_string() }),
- }]
-}
-
-fn handle_content_media_type_error(base_path: &str, want: &str, err: &Box<dyn std::error::Error>) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "CONTENT_MEDIA_TYPE_INVALID".to_string(),
-  message: format!("Content is not valid {} media type: {}", want, err),
-  cause: json!({ "want": want, "err": err.to_string() }),
- }]
-}
-
-fn handle_false_schema_error(base_path: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "FALSE_SCHEMA".to_string(),
-  message: "This schema always fails validation".to_string(),
-  cause: json!({}),
- }]
-}
-
-fn handle_not_error(base_path: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "NOT_VIOLATED".to_string(),
-  message: "Value matches a schema that it should not match".to_string(),
-  cause: json!({}),
- }]
-}
-
-fn handle_ref_cycle_error(base_path: &str, url: &str, kw_loc1: &str, kw_loc2: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "REFERENCE_CYCLE".to_string(),
-  message: format!("Reference cycle detected: both '{}' and '{}' resolve to '{}'", kw_loc1, kw_loc2, url),
-  cause: json!({ "url": url, "kw_loc1": kw_loc1, "kw_loc2": kw_loc2 }),
- }]
-}
-
-fn handle_reference_error(base_path: &str, kw: &str, url: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "REFERENCE_FAILED".to_string(),
-  message: format!("{} reference to '{}' failed validation", kw, url),
-  cause: json!({ "kw": kw, "url": url }),
- }]
-}
-
-fn handle_schema_error(base_path: &str, url: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "SCHEMA_FAILED".to_string(),
-  message: format!("Schema '{}' validation failed", url),
-  cause: json!({ "url": url }),
- }]
-}
-
-fn handle_content_schema_error(base_path: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "CONTENT_SCHEMA_FAILED".to_string(),
-  message: "Content schema validation failed".to_string(),
-  cause: json!({}),
- }]
-}
-
-fn handle_group_error(base_path: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "VALIDATION_FAILED".to_string(),
-  message: "Validation failed".to_string(),
-  cause: json!({}),
- }]
-}
-
-fn handle_all_of_error(base_path: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "ALL_OF_VIOLATED".to_string(),
-  message: "Value does not match all required schemas".to_string(),
-  cause: json!({}),
- }]
-}
-
-fn handle_any_of_error(base_path: &str) -> Vec<Error> {
- vec![Error {
-  path: base_path.to_string(),
-  code: "ANY_OF_VIOLATED".to_string(),
-  message: "Value does not match any of the allowed schemas".to_string(),
-  cause: json!({}),
- }]
-}
-
-fn handle_one_of_error(base_path: &str, matched: &Option<(usize, usize)>) -> Vec<Error> {
- let (message, cause) = match matched {
-  None => (
-   "Value must match exactly one schema, but matches none".to_string(),
-   json!({ "matched_indices": null })
-  ),
-  Some((i, j)) => (
-   format!("Value must match exactly one schema, but matches schemas at positions {} and {}", i, j),
-   json!({ "matched_indices": [i, j] })
-  ),
- };
- 
- vec![Error {
-  path: base_path.to_string(),
-  code: "ONE_OF_VIOLATED".to_string(),
-  message,
-  cause,
- }]
-}
-
-// Formats errors according to DropError structure
-fn format_errors(errors: Vec<Error>, instance: &Value, schema_id: &str) -> Vec<Value> {
-    let mut unique_errors: HashMap<String, Value> = HashMap::new();
-    for error in errors {
-        let error_path = error.path.clone();
-        if let Entry::Vacant(entry) = unique_errors.entry(error_path.clone()) {
-            let failing_value = extract_value_at_path(instance, &error.path);
-            entry.insert(json!({
-                "code": error.code,
-                "message": error.message,
-                "details": {
-                    "path": error.path,
-                    "context": failing_value,
-                    "cause": error.cause,
-                    "schema": schema_id
-                }
-            }));
-        }
-    }
-    
-    unique_errors.into_values().collect::<Vec<Value>>()
-}
-
-// Helper function to extract value at a JSON pointer path
-fn extract_value_at_path(instance: &Value, path: &str) -> Value {
- let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
- let mut current = instance;
-
- for part in parts {
-  match current {
-   Value::Object(map) => {
-    if let Some(value) = map.get(part) {
-     current = value;
-    } else {
-     return Value::Null;
-    }
-   }
-   Value::Array(arr) => {
-    if let Ok(index) = part.parse::<usize>() {
-     if let Some(value) = arr.get(index) {
-      current = value;
-     } else {
-      return Value::Null;
-     }
-    } else {
-     return Value::Null;
-    }
-   }
-   _ => return Value::Null,
-  }
- }
-
- current.clone()
+  let drop = validator::Validator::validate(schema_id, &instance.0);
+  JsonB(serde_json::to_value(drop).unwrap())
 }

 #[pg_extern(strict, parallel_safe)]
 fn json_schema_cached(schema_id: &str) -> bool {
- let cache = SCHEMA_CACHE.read().unwrap();
- cache.map.contains_key(schema_id)
+  let registry = REGISTRY.read().unwrap();
+  registry.get(schema_id).is_some()
 }

 #[pg_extern(strict)]
 fn clear_json_schemas() -> JsonB {
- let mut cache = SCHEMA_CACHE.write().unwrap();
- *cache = Cache {
-  schemas: Schemas::new(),
-  map: HashMap::new(),
- };
- JsonB(json!({ "response": "success" }))
+  let mut registry = REGISTRY.write().unwrap();
+  registry.clear();
+  JsonB(json!({ "response": "success" }))
 }

 #[pg_extern(strict, parallel_safe)]
 fn show_json_schemas() -> JsonB {
- let cache = SCHEMA_CACHE.read().unwrap();
- let ids: Vec<String> = cache.map.keys().cloned().collect();
- JsonB(json!({ "response": ids }))
-}
-
-/// This module is required by `cargo pgrx test` invocations.
-/// It must be visible at the root of your extension crate.
-#[cfg(test)]
-pub mod pg_test {
- pub fn setup(_options: Vec<&str>) {
-  // perform one-off initialization when the pg_test framework starts
- }
-
- #[must_use]
- pub fn postgresql_conf_options() -> Vec<&'static str> {
-  // return any postgresql.conf settings that are required for your tests
-  vec![]
- }
-}
-
-#[cfg(any(test, feature = "pg_test"))]
-mod helpers {
- include!("helpers.rs");
-}
-
-#[cfg(any(test, feature = "pg_test"))]
-mod schemas {
- include!("schemas.rs");
+  let registry = REGISTRY.read().unwrap();
+  // Debug dump
+  // In a real scenario we might return the whole map, but for now just success
+  // or maybe a list of keys
+  JsonB(json!({ "response": "success", "count": registry.len() }))
 }

 #[cfg(any(test, feature = "pg_test"))]
 #[pg_schema]
 mod tests {
- include!("tests.rs");
-}
+  use pgrx::prelude::*;
+  include!("tests.rs");
+}
+
+#[cfg(test)]
+pub mod pg_test {
+  pub fn setup(_options: Vec<&str>) {
+    // perform any initialization common to all tests
+  }
+
+  pub fn postgresql_conf_options() -> Vec<&'static str> {
+    // return any postgresql.conf settings that are required for your tests
+    vec![]
+  }
+}
--- a/src/registry.rs
+++ b/src/registry.rs
@ -0,0 +1,41 @@
+use crate::compiler::CompiledSchema; // Changed from crate::schema::Schema
+use lazy_static::lazy_static;
+use std::collections::HashMap;
+use std::sync::RwLock;
+
+lazy_static! {
+  pub static ref REGISTRY: RwLock<Registry> = RwLock::new(Registry::new());
+}
+
+use std::sync::Arc;
+
+pub struct Registry {
+  pub schemas: HashMap<String, Arc<CompiledSchema>>, // Changed from Schema
+}
+
+impl Registry {
+  pub fn new() -> Self {
+    Registry {
+      schemas: HashMap::new(),
+    }
+  }
+
+  pub fn insert(&mut self, id: String, compiled: CompiledSchema) {
+    if self.schemas.contains_key(&id) {
+      panic!("Duplicate schema ID inserted into registry: '{}'", id);
+    }
+    self.schemas.insert(id, Arc::new(compiled));
+  }
+
+  pub fn get(&self, id: &str) -> Option<Arc<CompiledSchema>> {
+    self.schemas.get(id).cloned()
+  }
+
+  pub fn clear(&mut self) {
+    self.schemas.clear();
+  }
+
+  pub fn len(&self) -> usize {
+    self.schemas.len()
+  }
+}
--- a/src/schema.rs
+++ b/src/schema.rs
@ -0,0 +1,212 @@
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::BTreeMap;
+use std::sync::Arc;
+
+// Schema mirrors the Go Punc Generator's schema struct for consistency.
+// It is an order-preserving representation of a JSON Schema.
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct SchemaObject {
+  // Core Schema Keywords
+  #[serde(rename = "$id")]
+  pub id: Option<String>,
+  #[serde(rename = "$ref")]
+  pub ref_string: Option<String>,
+  #[serde(rename = "$anchor")]
+  pub anchor: Option<String>,
+  #[serde(rename = "$dynamicAnchor")]
+  pub dynamic_anchor: Option<String>,
+  #[serde(rename = "$dynamicRef")]
+  pub dynamic_ref: Option<String>,
+  /*
+     Note: The `Ref` field in the Go struct is a pointer populated by the linker.
+     In Rust, we might handle this differently (e.g., separate lookup or Rc/Arc),
+     so we omit the direct recursive `Ref` field for now and rely on `ref_string`.
+  */
+  pub description: Option<String>,
+  pub title: Option<String>,
+  #[serde(default)] // Allow missing type
+  #[serde(rename = "type")]
+  pub type_: Option<SchemaTypeOrArray>, // Handles string or array of strings
+
+  // Object Keywords
+  pub properties: Option<BTreeMap<String, Arc<Schema>>>,
+  #[serde(rename = "patternProperties")]
+  pub pattern_properties: Option<BTreeMap<String, Arc<Schema>>>,
+  pub required: Option<Vec<String>>,
+  // additionalProperties can be checks against a schema or boolean (handled by Schema wrapper)
+
+  // dependencies can be schema dependencies or property dependencies
+  pub dependencies: Option<BTreeMap<String, Dependency>>,
+
+  // Definitions (for $ref resolution)
+  #[serde(rename = "$defs")]
+  pub defs: Option<BTreeMap<String, Arc<Schema>>>,
+  #[serde(rename = "definitions")]
+  pub definitions: Option<BTreeMap<String, Arc<Schema>>>,
+
+  // Array Keywords
+  #[serde(rename = "items")]
+  pub items: Option<Arc<Schema>>,
+  #[serde(rename = "prefixItems")]
+  pub prefix_items: Option<Vec<Arc<Schema>>>,
+
+  // String Validation
+  #[serde(rename = "minLength")]
+  pub min_length: Option<f64>,
+  #[serde(rename = "maxLength")]
+  pub max_length: Option<f64>,
+  pub pattern: Option<String>,
+
+  // Array Validation
+  #[serde(rename = "minItems")]
+  pub min_items: Option<f64>,
+  #[serde(rename = "maxItems")]
+  pub max_items: Option<f64>,
+  #[serde(rename = "uniqueItems")]
+  pub unique_items: Option<bool>,
+  #[serde(rename = "contains")]
+  pub contains: Option<Arc<Schema>>,
+  #[serde(rename = "minContains")]
+  pub min_contains: Option<f64>,
+  #[serde(rename = "maxContains")]
+  pub max_contains: Option<f64>,
+
+  // Object Validation
+  #[serde(rename = "minProperties")]
+  pub min_properties: Option<f64>,
+  #[serde(rename = "maxProperties")]
+  pub max_properties: Option<f64>,
+  #[serde(rename = "propertyNames")]
+  pub property_names: Option<Arc<Schema>>,
+  #[serde(rename = "dependentRequired")]
+  pub dependent_required: Option<BTreeMap<String, Vec<String>>>,
+  #[serde(rename = "dependentSchemas")]
+  pub dependent_schemas: Option<BTreeMap<String, Arc<Schema>>>,
+
+  // Numeric Validation
+  pub format: Option<String>,
+  #[serde(rename = "enum")]
+  pub enum_: Option<Vec<Value>>, // `enum` is a reserved keyword in Rust
+  #[serde(default, rename = "const")]
+  pub const_: Option<Value>,
+
+  // Numeric Validation
+  #[serde(rename = "multipleOf")]
+  pub multiple_of: Option<f64>,
+  pub minimum: Option<f64>,
+  pub maximum: Option<f64>,
+  #[serde(rename = "exclusiveMinimum")]
+  pub exclusive_minimum: Option<f64>,
+  #[serde(rename = "exclusiveMaximum")]
+  pub exclusive_maximum: Option<f64>,
+
+  // Combining Keywords
+  #[serde(rename = "allOf")]
+  pub all_of: Option<Vec<Arc<Schema>>>,
+  #[serde(rename = "anyOf")]
+  pub any_of: Option<Vec<Arc<Schema>>>,
+  #[serde(rename = "oneOf")]
+  pub one_of: Option<Vec<Arc<Schema>>>,
+  #[serde(rename = "not")]
+  pub not: Option<Arc<Schema>>,
+  #[serde(rename = "if")]
+  pub if_: Option<Arc<Schema>>,
+  #[serde(rename = "then")]
+  pub then_: Option<Arc<Schema>>,
+  #[serde(rename = "else")]
+  pub else_: Option<Arc<Schema>>,
+
+  // Custom Vocabularies
+  pub form: Option<Vec<String>>,
+  pub display: Option<Vec<String>>,
+  #[serde(rename = "enumNames")]
+  pub enum_names: Option<Vec<String>>,
+  pub control: Option<String>,
+  pub actions: Option<BTreeMap<String, Action>>,
+  pub computer: Option<String>,
+  #[serde(default)]
+  pub extensible: Option<bool>,
+
+  // Compiled Fields (Hidden from JSON/Serde)
+  #[serde(skip)]
+  pub compiled_format: Option<crate::compiler::CompiledFormat>,
+  #[serde(skip)]
+  pub compiled_pattern: Option<crate::compiler::CompiledRegex>,
+  #[serde(skip)]
+  pub compiled_pattern_properties: Option<Vec<(crate::compiler::CompiledRegex, Arc<Schema>)>>,
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct Schema {
+  #[serde(flatten)]
+  pub obj: SchemaObject,
+  #[serde(skip)]
+  pub always_fail: bool,
+}
+
+impl Default for Schema {
+  fn default() -> Self {
+    Schema {
+      obj: SchemaObject::default(),
+      always_fail: false,
+    }
+  }
+}
+
+impl std::ops::Deref for Schema {
+  type Target = SchemaObject;
+  fn deref(&self) -> &Self::Target {
+    &self.obj
+  }
+}
+impl std::ops::DerefMut for Schema {
+  fn deref_mut(&mut self) -> &mut Self::Target {
+    &mut self.obj
+  }
+}
+
+impl<'de> Deserialize<'de> for Schema {
+  fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+  where
+    D: serde::Deserializer<'de>,
+  {
+    let v: Value = Deserialize::deserialize(deserializer)?;
+
+    if let Some(b) = v.as_bool() {
+      let mut obj = SchemaObject::default();
+      if b {
+        obj.extensible = Some(true);
+      }
+      return Ok(Schema {
+        obj,
+        always_fail: !b,
+      });
+    }
+    let obj: SchemaObject = serde_json::from_value(v).map_err(serde::de::Error::custom)?;
+
+    Ok(Schema {
+      obj,
+      always_fail: false,
+    })
+  }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum SchemaTypeOrArray {
+  Single(String),
+  Multiple(Vec<String>),
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Action {
+  pub navigate: Option<String>,
+  pub punc: Option<String>,
+}
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Dependency {
+  Props(Vec<String>),
+  Schema(Arc<Schema>),
+}
--- a/src/schemas.rs
+++ b/src/schemas.rs
--- a/src/tests.rs
+++ b/src/tests.rs
--- a/src/util.rs
+++ b/src/util.rs
@ -0,0 +1,406 @@
+use serde::Deserialize;
+use std::fs;
+
+#[derive(Debug, Deserialize)]
+struct TestSuite {
+  #[allow(dead_code)]
+  description: String,
+  schema: Option<serde_json::Value>,
+  // Support JSPG-style test suites with explicit types/enums/puncs
+  types: Option<serde_json::Value>,
+  enums: Option<serde_json::Value>,
+  puncs: Option<serde_json::Value>,
+  tests: Vec<TestCase>,
+}
+
+#[derive(Debug, Deserialize)]
+struct TestCase {
+  description: String,
+  data: serde_json::Value,
+  valid: bool,
+  // Support explicit schema ID target for test case
+  schema_id: Option<String>,
+  // Expected output for masking tests
+  #[allow(dead_code)]
+  expected: Option<serde_json::Value>,
+}
+
+use crate::registry::REGISTRY;
+use crate::validator::Validator;
+use serde_json::Value;
+
+pub fn deserialize_some<'de, D>(deserializer: D) -> Result<Option<Value>, D::Error>
+where
+  D: serde::Deserializer<'de>,
+{
+  let v = Value::deserialize(deserializer)?;
+  Ok(Some(v))
+}
+
+pub fn run_test_file_at_index(path: &str, index: usize) -> Result<(), String> {
+  // Clear registry to ensure isolation
+  {
+    let mut registry = REGISTRY.write().unwrap();
+    registry.clear();
+  }
+
+  let content =
+    fs::read_to_string(path).unwrap_or_else(|_| panic!("Failed to read file: {}", path));
+  let suite: Vec<TestSuite> = serde_json::from_str(&content)
+    .unwrap_or_else(|e| panic!("Failed to parse JSON in {}: {}", path, e));
+
+  if index >= suite.len() {
+    panic!("Index {} out of bounds for file {}", index, path);
+  }
+
+  let group = &suite[index];
+  let mut failures = Vec::<String>::new();
+
+  // Helper to register items with 'schemas'
+  let register_schemas = |items_val: Option<&Value>| {
+    if let Some(val) = items_val {
+      if let Value::Array(arr) = val {
+        for item in arr {
+          if let Some(schemas_val) = item.get("schemas") {
+            if let Value::Array(schemas) = schemas_val {
+              for schema_val in schemas {
+                if let Ok(schema) =
+                  serde_json::from_value::<crate::schema::Schema>(schema_val.clone())
+                {
+                  // Clone ID upfront to avoid borrow issues
+                  if let Some(id_clone) = schema.obj.id.clone() {
+                    let mut registry = REGISTRY.write().unwrap();
+                    // Utilize the new compile method which handles strictness
+                    let compiled =
+                      crate::compiler::Compiler::compile(schema, Some(id_clone.clone()));
+                    registry.insert(id_clone, compiled);
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  };
+
+  // 1. Register Family Schemas if 'types' is present
+  if let Some(types_val) = &group.types {
+    if let Value::Array(arr) = types_val {
+      let mut family_map: std::collections::HashMap<String, std::collections::HashSet<String>> =
+        std::collections::HashMap::new();
+
+      for item in arr {
+        if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
+          if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) {
+            for ancestor in hierarchy {
+              if let Some(anc_str) = ancestor.as_str() {
+                family_map
+                  .entry(anc_str.to_string())
+                  .or_default()
+                  .insert(name.to_string());
+              }
+            }
+          }
+        }
+      }
+
+      for (family_name, members) in family_map {
+        let id = format!("{}.family", family_name);
+        let object_refs: Vec<Value> = members
+          .iter()
+          .map(|s| serde_json::json!({ "$ref": s }))
+          .collect();
+
+        let schema_json = serde_json::json!({
+            "$id": id,
+            "oneOf": object_refs
+        });
+
+        if let Ok(schema) = serde_json::from_value::<crate::schema::Schema>(schema_json) {
+          let mut registry = REGISTRY.write().unwrap();
+          let compiled = crate::compiler::Compiler::compile(schema, Some(id.clone()));
+          registry.insert(id, compiled);
+        }
+      }
+    }
+  }
+
+  // 2. Register items directly
+  register_schemas(group.enums.as_ref());
+  register_schemas(group.types.as_ref());
+  register_schemas(group.puncs.as_ref());
+
+  // 3. Register root 'schemas' if present (generic test support)
+  // Some tests use a raw 'schema' or 'schemas' field at the group level
+  if let Some(schema_val) = &group.schema {
+    if let Ok(schema) = serde_json::from_value::<crate::schema::Schema>(schema_val.clone()) {
+      let id = schema
+        .obj
+        .id
+        .clone()
+        .or_else(|| {
+          // Fallback ID if none provided in schema
+          Some("root".to_string())
+        })
+        .unwrap();
+
+      let mut registry = REGISTRY.write().unwrap();
+      let compiled = crate::compiler::Compiler::compile(schema, Some(id.clone()));
+      registry.insert(id, compiled);
+    }
+  }
+
+  // 4. Run Tests
+  for (test_index, test) in group.tests.iter().enumerate() {
+    let mut schema_id = test.schema_id.clone();
+
+    // If no explicit schema_id, try to infer from the single schema in the group
+    if schema_id.is_none() {
+      if let Some(s) = &group.schema {
+        // If 'schema' is a single object, use its ID or "root"
+        if let Some(obj) = s.as_object() {
+          if let Some(id_val) = obj.get("$id") {
+            schema_id = id_val.as_str().map(|s| s.to_string());
+          }
+        }
+        if schema_id.is_none() {
+          schema_id = Some("root".to_string());
+        }
+      }
+    }
+
+    // Default to the first punc if present (for puncs.json style)
+    if schema_id.is_none() {
+      if let Some(Value::Array(puncs)) = &group.puncs {
+        if let Some(first_punc) = puncs.first() {
+          if let Some(Value::Array(schemas)) = first_punc.get("schemas") {
+            if let Some(first_schema) = schemas.first() {
+              if let Some(id) = first_schema.get("$id").and_then(|v| v.as_str()) {
+                schema_id = Some(id.to_string());
+              }
+            }
+          }
+        }
+      }
+    }
+
+    if let Some(sid) = schema_id {
+      let result = Validator::validate(&sid, &test.data);
+
+      if !result.errors.is_empty() != !test.valid {
+        failures.push(format!(
+          "[{}] Test '{}' failed. Expected: {}, Got: {}. Errors: {:?}",
+          group.description,
+          test.description,
+          test.valid,
+          !result.errors.is_empty(),
+          result.errors
+        ));
+      }
+    } else {
+      failures.push(format!(
+        "[{}] Test '{}' skipped: No schema ID found.",
+        group.description, test.description
+      ));
+    }
+  }
+
+  if !failures.is_empty() {
+    return Err(failures.join("\n"));
+  }
+
+  Ok(())
+}
+pub fn run_test_file(path: &str) -> Result<(), String> {
+  let content =
+    fs::read_to_string(path).unwrap_or_else(|_| panic!("Failed to read file: {}", path));
+  let suite: Vec<TestSuite> = serde_json::from_str(&content)
+    .unwrap_or_else(|e| panic!("Failed to parse JSON in {}: {}", path, e));
+
+  let mut failures = Vec::<String>::new();
+  for (group_index, group) in suite.into_iter().enumerate() {
+    // Helper to register items with 'schemas'
+    let register_schemas = |items_val: Option<Value>| {
+      if let Some(val) = items_val {
+        if let Value::Array(arr) = val {
+          for item in arr {
+            if let Some(schemas_val) = item.get("schemas") {
+              if let Value::Array(schemas) = schemas_val {
+                for schema_val in schemas {
+                  if let Ok(schema) =
+                    serde_json::from_value::<crate::schema::Schema>(schema_val.clone())
+                  {
+                    // Clone ID upfront to avoid borrow issues
+                    if let Some(id_clone) = schema.obj.id.clone() {
+                      let mut registry = REGISTRY.write().unwrap();
+                      // Utilize the new compile method which handles strictness
+                      let compiled =
+                        crate::compiler::Compiler::compile(schema, Some(id_clone.clone()));
+                      registry.insert(id_clone, compiled);
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    };
+
+    // 1. Register Family Schemas if 'types' is present
+    if let Some(types_val) = &group.types {
+      if let Value::Array(arr) = types_val {
+        let mut family_map: std::collections::HashMap<String, std::collections::HashSet<String>> =
+          std::collections::HashMap::new();
+
+        for item in arr {
+          if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
+            // Default hierarchy contains self if not specified?
+            // Usually hierarchy is explicit in these tests.
+            if let Some(hierarchy) = item.get("hierarchy").and_then(|v| v.as_array()) {
+              for ancestor in hierarchy {
+                if let Some(anc_str) = ancestor.as_str() {
+                  family_map
+                    .entry(anc_str.to_string())
+                    .or_default()
+                    .insert(name.to_string());
+                }
+              }
+            }
+          }
+        }
+
+        for (family_name, members) in family_map {
+          let id = format!("{}.family", family_name);
+          let object_refs: Vec<Value> = members
+            .into_iter()
+            .map(|s| serde_json::json!({ "$ref": s }))
+            .collect();
+
+          let schema_json = serde_json::json!({
+              "$id": id,
+              "oneOf": object_refs
+          });
+
+          if let Ok(schema) = serde_json::from_value::<crate::schema::Schema>(schema_json) {
+            let mut registry = REGISTRY.write().unwrap();
+            let compiled = crate::compiler::Compiler::compile(schema, Some(id.clone()));
+            registry.insert(id, compiled);
+          }
+        }
+      }
+    }
+
+    // Register 'types', 'enums', and 'puncs' if present (JSPG style)
+    register_schemas(group.types);
+    register_schemas(group.enums);
+    register_schemas(group.puncs);
+
+    // Register main 'schema' if present (Standard style)
+    // Ensure ID is a valid URI to avoid Url::parse errors in Compiler
+    let unique_id = format!("test:{}:{}", path, group_index);
+
+    // Register main 'schema' if present (Standard style)
+    if let Some(ref schema_val) = group.schema {
+      let mut registry = REGISTRY.write().unwrap();
+      let schema: crate::schema::Schema =
+        serde_json::from_value(schema_val.clone()).expect("Failed to parse test schema");
+      let compiled = crate::compiler::Compiler::compile(schema, Some(unique_id.clone()));
+      registry.insert(unique_id.clone(), compiled);
+    }
+
+    for test in group.tests {
+      // Use explicit schema_id from test, or default to unique_id
+      let schema_id = test.schema_id.as_deref().unwrap_or(&unique_id).to_string();
+
+      let drop = Validator::validate(&schema_id, &test.data);
+
+      if test.valid {
+        if !drop.errors.is_empty() {
+          let msg = format!(
+            "Test failed (expected valid): {}\nSchema: {:?}\nData: {:?}\nErrors: {:?}",
+            test.description,
+            group.schema, // We might need to find the actual schema used if schema_id is custom
+            test.data,
+            drop.errors
+          );
+          eprintln!("{}", msg);
+          failures.push(msg);
+        }
+      } else {
+        if drop.errors.is_empty() {
+          let msg = format!(
+            "Test failed (expected invalid): {}\nSchema: {:?}\nData: {:?}\nErrors: (Empty)",
+            test.description, group.schema, test.data
+          );
+          println!("{}", msg);
+          failures.push(msg);
+        }
+      }
+    }
+  }
+
+  if !failures.is_empty() {
+    return Err(format!(
+      "{} tests failed in file {}:\n\n{}",
+      failures.len(),
+      path,
+      failures.join("\n\n")
+    ));
+  }
+  Ok(())
+}
+
+pub fn is_integer(v: &Value) -> bool {
+  match v {
+    Value::Number(n) => {
+      n.is_i64() || n.is_u64() || n.as_f64().filter(|n| n.fract() == 0.0).is_some()
+    }
+    _ => false,
+  }
+}
+
+/// serde_json treats 0 and 0.0 not equal. so we cannot simply use v1==v2
+pub fn equals(v1: &Value, v2: &Value) -> bool {
+  // eprintln!("Comparing {:?} with {:?}", v1, v2);
+  match (v1, v2) {
+    (Value::Null, Value::Null) => true,
+    (Value::Bool(b1), Value::Bool(b2)) => b1 == b2,
+    (Value::Number(n1), Value::Number(n2)) => {
+      if let (Some(n1), Some(n2)) = (n1.as_u64(), n2.as_u64()) {
+        return n1 == n2;
+      }
+      if let (Some(n1), Some(n2)) = (n1.as_i64(), n2.as_i64()) {
+        return n1 == n2;
+      }
+      if let (Some(n1), Some(n2)) = (n1.as_f64(), n2.as_f64()) {
+        return (n1 - n2).abs() < f64::EPSILON;
+      }
+      false
+    }
+    (Value::String(s1), Value::String(s2)) => s1 == s2,
+    (Value::Array(arr1), Value::Array(arr2)) => {
+      if arr1.len() != arr2.len() {
+        return false;
+      }
+      arr1.iter().zip(arr2).all(|(e1, e2)| equals(e1, e2))
+    }
+    (Value::Object(obj1), Value::Object(obj2)) => {
+      if obj1.len() != obj2.len() {
+        return false;
+      }
+      for (k1, v1) in obj1 {
+        if let Some(v2) = obj2.get(k1) {
+          if !equals(v1, v2) {
+            return false;
+          }
+        } else {
+          return false;
+        }
+      }
+      true
+    }
+    _ => false,
+  }
+}
--- a/src/validator.rs
+++ b/src/validator.rs