validator refactor progress

2026-03-03 00:13:37 -05:00
parent e14f53e7d9
commit 3898c43742
81 changed files with 6331 additions and 7934 deletions
--- a/src/database/enum.rs
+++ b/src/database/enum.rs
@ -0,0 +1,12 @@
+use crate::database::schema::Schema;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct Enum {
+  pub name: String,
+  pub module: String,
+  pub source: String,
+  pub values: Vec<String>,
+  pub schemas: Vec<Schema>,
+}
--- a/src/database/formats.rs
+++ b/src/database/formats.rs
@ -0,0 +1,875 @@
+use std::{
+    collections::HashMap,
+    error::Error,
+    net::{Ipv4Addr, Ipv6Addr},
+};
+
+use lazy_static::lazy_static;
+use percent_encoding::percent_decode_str;
+use serde_json::Value;
+use url::Url;
+
+// use crate::ecma; // Assuming ecma is not yet available, stubbing regex for now
+
+/// Defines format for `format` keyword.
+#[derive(Clone, Copy)]
+pub struct Format {
+    /// Name of the format
+    pub name: &'static str,
+
+    /// validates given value.
+    pub func: fn(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>>, // Ensure thread safety if needed
+}
+
+lazy_static! {
+    pub(crate) static ref FORMATS: HashMap<&'static str, Format> = {
+        let mut m = HashMap::<&'static str, Format>::new();
+        // Helper to register formats
+        let mut register = |name, func| m.insert(name, Format { name, func });
+        
+        // register("regex", validate_regex); // Stubbed
+        register("ipv4", validate_ipv4);
+        register("ipv6", validate_ipv6);
+        register("hostname", validate_hostname);
+        register("idn-hostname", validate_idn_hostname);
+        register("email", validate_email);
+        register("idn-email", validate_idn_email);
+        register("date", validate_date);
+        register("time", validate_time);
+        register("date-time", validate_date_time);
+        register("duration", validate_duration);
+        register("period", validate_period);
+        register("json-pointer", validate_json_pointer);
+        register("relative-json-pointer", validate_relative_json_pointer);
+        register("uuid", validate_uuid);
+        register("uri", validate_uri);
+        register("iri", validate_iri);
+        register("uri-reference", validate_uri_reference);
+        register("iri-reference", validate_iri_reference);
+        register("uri-template", validate_uri_template);
+        m
+    };
+}
+
+/*
+fn validate_regex(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    // ecma::convert(s).map(|_| ())
+    Ok(())
+}
+*/
+
+fn validate_ipv4(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    s.parse::<Ipv4Addr>()?;
+    Ok(())
+}
+
+fn validate_ipv6(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    s.parse::<Ipv6Addr>()?;
+    Ok(())
+}
+
+fn validate_date(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_date(s)?;
+    Ok(())
+}
+
+fn matches_char(s: &str, index: usize, ch: char) -> bool {
+    s.is_char_boundary(index) && s[index..].starts_with(ch)
+}
+
+// see https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
+fn check_date(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // yyyy-mm-dd
+    if s.len() != 10 {
+        Err("must be 10 characters long")?;
+    }
+    if !matches_char(s, 4, '-') || !matches_char(s, 7, '-') {
+        Err("missing hyphen in correct place")?;
+    }
+
+    let mut ymd = s.splitn(3, '-').filter_map(|t| t.parse::<usize>().ok());
+    let (Some(y), Some(m), Some(d)) = (ymd.next(), ymd.next(), ymd.next()) else {
+        Err("non-positive year/month/day")?
+    };
+
+    if !matches!(m, 1..=12) {
+        Err(format!("{m} months in year"))?;
+    }
+    if !matches!(d, 1..=31) {
+        Err(format!("{d} days in month"))?;
+    }
+
+    match m {
+        2 => {
+            let mut feb_days = 28;
+            if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
+                feb_days += 1; // leap year
+            };
+            if d > feb_days {
+                Err(format!("february has {feb_days} days only"))?;
+            }
+        }
+        4 | 6 | 9 | 11 => {
+            if d > 30 {
+                Err("month has 30 days only")?;
+            }
+        }
+        _ => {}
+    }
+    Ok(())
+}
+
+fn validate_time(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_time(s)
+}
+
+fn check_time(mut str: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // min: hh:mm:ssZ
+    if str.len() < 9 {
+        Err("less than 9 characters long")?
+    }
+    if !matches_char(str, 2, ':') || !matches_char(str, 5, ':') {
+        Err("missing colon in correct place")?
+    }
+
+    // parse hh:mm:ss
+    if !str.is_char_boundary(8) {
+        Err("contains non-ascii char")?
+    }
+    let mut hms = (str[..8])
+        .splitn(3, ':')
+        .filter_map(|t| t.parse::<usize>().ok());
+    let (Some(mut h), Some(mut m), Some(s)) = (hms.next(), hms.next(), hms.next()) else {
+        Err("non-positive hour/min/sec")?
+    };
+    if h > 23 || m > 59 || s > 60 {
+        Err("hour/min/sec out of range")?
+    }
+    str = &str[8..];
+
+    // parse sec-frac if present
+    if let Some(rem) = str.strip_prefix('.') {
+        let n_digits = rem.chars().take_while(char::is_ascii_digit).count();
+        if n_digits == 0 {
+            Err("no digits in second fraction")?;
+        }
+        str = &rem[n_digits..];
+    }
+
+    if str != "z" && str != "Z" {
+        // parse time-numoffset
+        if str.len() != 6 {
+            Err("offset must be 6 characters long")?;
+        }
+        let sign: isize = match str.chars().next() {
+            Some('+') => -1,
+            Some('-') => 1,
+            _ => return Err("offset must begin with plus/minus")?,
+        };
+        str = &str[1..];
+        if !matches_char(str, 2, ':') {
+            Err("missing colon in offset at correct place")?
+        }
+
+        let mut zhm = str.splitn(2, ':').filter_map(|t| t.parse::<usize>().ok());
+        let (Some(zh), Some(zm)) = (zhm.next(), zhm.next()) else {
+            Err("non-positive hour/min in offset")?
+        };
+        if zh > 23 || zm > 59 {
+            Err("hour/min in offset out of range")?
+        }
+
+        // apply timezone
+        let mut hm = (h * 60 + m) as isize + sign * (zh * 60 + zm) as isize;
+        if hm < 0 {
+            hm += 24 * 60;
+            debug_assert!(hm >= 0);
+        }
+        let hm = hm as usize;
+        (h, m) = (hm / 60, hm % 60);
+    }
+
+    // check leap second
+    if !(s < 60 || (h == 23 && m == 59)) {
+        Err("invalid leap second")?
+    }
+    Ok(())
+}
+
+fn validate_date_time(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_date_time(s)
+}
+
+fn check_date_time(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // min: yyyy-mm-ddThh:mm:ssZ
+    if s.len() < 20 {
+        Err("less than 20 characters long")?;
+    }
+    if !s.is_char_boundary(10) || !s[10..].starts_with(['t', 'T']) {
+        Err("11th character must be t or T")?;
+    }
+    if let Err(e) = check_date(&s[..10]) {
+        Err(format!("invalid date element: {e}"))?;
+    }
+    if let Err(e) = check_time(&s[11..]) {
+        Err(format!("invalid time element: {e}"))?;
+    }
+    Ok(())
+}
+
+fn validate_duration(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_duration(s)?;
+    Ok(())
+}
+
+// see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
+fn check_duration(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // must start with 'P'
+    let Some(s) = s.strip_prefix('P') else {
+        Err("must start with P")?
+    };
+    if s.is_empty() {
+        Err("nothing after P")?
+    }
+
+    // dur-week
+    if let Some(s) = s.strip_suffix('W') {
+        if s.is_empty() {
+            Err("no number in week")?
+        }
+        if !s.chars().all(|c| c.is_ascii_digit()) {
+            Err("invalid week")?
+        }
+        return Ok(());
+    }
+
+    static UNITS: [&str; 2] = ["YMD", "HMS"];
+    for (i, s) in s.split('T').enumerate() {
+        let mut s = s;
+        if i != 0 && s.is_empty() {
+            Err("no time elements")?
+        }
+        let Some(mut units) = UNITS.get(i).cloned() else {
+            Err("more than one T")?
+        };
+        while !s.is_empty() {
+            let digit_count = s.chars().take_while(char::is_ascii_digit).count();
+            if digit_count == 0 {
+                Err("missing number")?
+            }
+            s = &s[digit_count..];
+            let Some(unit) = s.chars().next() else {
+                Err("missing unit")?
+            };
+            let Some(j) = units.find(unit) else {
+                if UNITS[i].contains(unit) {
+                    Err(format!("unit {unit} out of order"))?
+                }
+                Err(format!("invalid unit {unit}"))?
+            };
+            units = &units[j + 1..];
+            s = &s[1..];
+        }
+    }
+
+    Ok(())
+}
+
+// see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
+fn validate_period(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+
+    let Some(slash) = s.find('/') else {
+        Err("missing slash")?
+    };
+
+    let (start, end) = (&s[..slash], &s[slash + 1..]);
+    if start.starts_with('P') {
+        if let Err(e) = check_duration(start) {
+            Err(format!("invalid start duration: {e}"))?
+        }
+        if let Err(e) = check_date_time(end) {
+            Err(format!("invalid end date-time: {e}"))?
+        }
+    } else {
+        if let Err(e) = check_date_time(start) {
+            Err(format!("invalid start date-time: {e}"))?
+        }
+        if end.starts_with('P') {
+            if let Err(e) = check_duration(end) {
+                Err(format!("invalid end duration: {e}"))?;
+            }
+        } else if let Err(e) = check_date_time(end) {
+            Err(format!("invalid end date-time: {e}"))?;
+        }
+    }
+    Ok(())
+}
+
+fn validate_hostname(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_hostname(s)?;
+    Ok(())
+}
+
+// see https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
+fn check_hostname(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // entire hostname (including the delimiting dots but not a trailing dot) has a maximum of 253 ASCII characters
+
+    if s.len() > 253 {
+        Err("more than 253 characters long")?
+    }
+
+    // Hostnames are composed of series of labels concatenated with dots, as are all domain names
+    for label in s.split('.') {
+        // Each label must be from 1 to 63 characters long
+        if !matches!(label.len(), 1..=63) {
+            Err("label must be 1 to 63 characters long")?;
+        }
+
+        // labels must not start or end with a hyphen
+        if label.starts_with('-') {
+            Err("label starts with hyphen")?;
+        }
+
+        if label.ends_with('-') {
+            Err("label ends with hyphen")?;
+        }
+
+        // labels may contain only the ASCII letters 'a' through 'z' (in a case-insensitive manner),
+        // the digits '0' through '9', and the hyphen ('-')
+        if let Some(ch) = label
+            .chars()
+            .find(|c| !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-'))
+        {
+            Err(format!("invalid character {ch:?}"))?;
+        }
+
+        // labels must not contain "--" in 3rd and 4th position unless they start with "xn--"
+        if label.len() >= 4 && &label[2..4] == "--" {
+            if !label.starts_with("xn--") {
+                Err("label has -- in 3rd/4th position but does not start with xn--")?;
+            } else {
+                let (unicode, errors) = idna::domain_to_unicode(label);
+                if let Err(_) = errors {
+                    Err("invalid punycode")?;
+                }
+                check_unicode_idn_constraints(&unicode).map_err(|e| format!("invalid punycode/IDN: {e}"))?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn validate_idn_hostname(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_idn_hostname(s)?;
+    Ok(())
+}
+
+static DISALLOWED: [char; 10] = [
+    '\u{0640}', //  ARABIC TATWEEL
+    '\u{07FA}', //  NKO LAJANYALAN
+    '\u{302E}', //  HANGUL SINGLE DOT TONE MARK
+    '\u{302F}', //  HANGUL DOUBLE DOT TONE MARK
+    '\u{3031}', //  VERTICAL KANA REPEAT MARK
+    '\u{3032}', //  VERTICAL KANA REPEAT WITH VOICED SOUND MARK
+    '\u{3033}', //  VERTICAL KANA REPEAT MARK UPPER HALF
+    '\u{3034}', //  VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HA
+    '\u{3035}', //  VERTICAL KANA REPEAT MARK LOWER HALF
+    '\u{303B}', //  VERTICAL IDEOGRAPHIC ITERATION MARK
+];
+
+fn check_idn_hostname(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let s = idna::domain_to_ascii_strict(s).map_err(|e| format!("idna error: {:?}", e))?;
+    let (unicode, errors) = idna::domain_to_unicode(&s);
+    if let Err(e) = errors {
+        Err(format!("idna decoding error: {:?}", e))?;
+    }
+    check_unicode_idn_constraints(&unicode)?;
+    check_hostname(&s)?;
+    Ok(())
+}
+
+fn check_unicode_idn_constraints(unicode: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // see https://www.rfc-editor.org/rfc/rfc5892#section-2.6
+    {
+        if unicode.contains(DISALLOWED) {
+            Err("contains disallowed character")?;
+        }
+    }
+
+    // unicode string must not contain "--" in 3rd and 4th position
+    // and must not start and end with a '-'
+    // see https://www.rfc-editor.org/rfc/rfc5891#section-4.2.3.1
+    {
+        let count: usize = unicode
+            .chars()
+            .skip(2)
+            .take(2)
+            .map(|c| if c == '-' { 1 } else { 0 })
+            .sum();
+        if count == 2 {
+            Err("unicode string must not contain '--' in 3rd and 4th position")?;
+        }
+    }
+
+    // MIDDLE DOT is allowed between 'l' characters only
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.3
+    {
+        let middle_dot = '\u{00b7}';
+        let mut s = unicode;
+        while let Some(i) = s.find(middle_dot) {
+            let prefix = &s[..i];
+            let suffix = &s[i + middle_dot.len_utf8()..];
+            if !prefix.ends_with('l') || !suffix.ends_with('l') {
+                Err("MIDDLE DOT is allowed between 'l' characters only")?;
+            }
+            s = suffix;
+        }
+    }
+
+    // Greek KERAIA must be followed by Greek character
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.4
+    {
+        let keralia = '\u{0375}';
+        let greek = '\u{0370}'..='\u{03FF}';
+        let mut s = unicode;
+        while let Some(i) = s.find(keralia) {
+            let suffix = &s[i + keralia.len_utf8()..];
+            if !suffix.starts_with(|c| greek.contains(&c)) {
+                Err("Greek KERAIA must be followed by Greek character")?;
+            }
+            s = suffix;
+        }
+    }
+
+    // Hebrew GERESH must be preceded by Hebrew character
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.5
+    //
+    // Hebrew GERSHAYIM must be preceded by Hebrew character
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.6
+    {
+        let geresh = '\u{05F3}';
+        let gereshayim = '\u{05F4}';
+        let hebrew = '\u{0590}'..='\u{05FF}';
+        for ch in [geresh, gereshayim] {
+            let mut s = unicode;
+            while let Some(i) = s.find(ch) {
+                let prefix = &s[..i];
+                if !prefix.ends_with(|c| hebrew.contains(&c)) {
+                    if i == 0 {
+                        Err("Hebrew GERESH must be preceded by Hebrew character")?;
+                    } else {
+                        Err("Hebrew GERESHYIM must be preceded by Hebrew character")?;
+                    }
+                }
+                let suffix = &s[i + ch.len_utf8()..];
+                s = suffix;
+            }
+        }
+    }
+
+    // KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.7
+    {
+        let katakana_middle_dot = '\u{30FB}';
+        if unicode.contains(katakana_middle_dot) {
+            let hiragana = '\u{3040}'..='\u{309F}';
+            let katakana = '\u{30A0}'..='\u{30FF}';
+            let han = '\u{4E00}'..='\u{9FFF}'; // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block): is this range correct??
+            if unicode.contains(|c| hiragana.contains(&c))
+                || unicode.contains(|c| c != katakana_middle_dot && katakana.contains(&c))
+                || unicode.contains(|c| han.contains(&c))
+            {
+                // ok
+            } else {
+                Err("KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han")?;
+            }
+        }
+    }
+
+    // ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.8
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.9
+    {
+        let arabic_indic_digits = '\u{0660}'..='\u{0669}';
+        let extended_arabic_indic_digits = '\u{06F0}'..='\u{06F9}';
+        if unicode.contains(|c| arabic_indic_digits.contains(&c))
+            && unicode.contains(|c| extended_arabic_indic_digits.contains(&c))
+        {
+            Err("ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed")?;
+        }
+    }
+
+    // ZERO WIDTH JOINER must be preceded by Virama
+    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.2
+    {
+        let zero_width_jointer = '\u{200D}';
+        static VIRAMA: [char; 61] = [
+            '\u{094D}',
+            '\u{09CD}',
+            '\u{0A4D}',
+            '\u{0ACD}',
+            '\u{0B4D}',
+            '\u{0BCD}',
+            '\u{0C4D}',
+            '\u{0CCD}',
+            '\u{0D3B}',
+            '\u{0D3C}',
+            '\u{0D4D}',
+            '\u{0DCA}',
+            '\u{0E3A}',
+            '\u{0EBA}',
+            '\u{0F84}',
+            '\u{1039}',
+            '\u{103A}',
+            '\u{1714}',
+            '\u{1734}',
+            '\u{17D2}',
+            '\u{1A60}',
+            '\u{1B44}',
+            '\u{1BAA}',
+            '\u{1BAB}',
+            '\u{1BF2}',
+            '\u{1BF3}',
+            '\u{2D7F}',
+            '\u{A806}',
+            '\u{A82C}',
+            '\u{A8C4}',
+            '\u{A953}',
+            '\u{A9C0}',
+            '\u{AAF6}',
+            '\u{ABED}',
+            '\u{10A3F}',
+            '\u{11046}',
+            '\u{1107F}',
+            '\u{110B9}',
+            '\u{11133}',
+            '\u{11134}',
+            '\u{111C0}',
+            '\u{11235}',
+            '\u{112EA}',
+            '\u{1134D}',
+            '\u{11442}',
+            '\u{114C2}',
+            '\u{115BF}',
+            '\u{1163F}',
+            '\u{116B6}',
+            '\u{1172B}',
+            '\u{11839}',
+            '\u{1193D}',
+            '\u{1193E}',
+            '\u{119E0}',
+            '\u{11A34}',
+            '\u{11A47}',
+            '\u{11A99}',
+            '\u{11C3F}',
+            '\u{11D44}',
+            '\u{11D45}',
+            '\u{11D97}',
+        ]; // https://www.compart.com/en/unicode/combining/9
+        let mut s = unicode;
+        while let Some(i) = s.find(zero_width_jointer) {
+            let prefix = &s[..i];
+            if !prefix.ends_with(VIRAMA) {
+                Err("ZERO WIDTH JOINER must be preceded by Virama")?;
+            }
+            let suffix = &s[i + zero_width_jointer.len_utf8()..];
+            s = suffix;
+        }
+    }
+
+    Ok(())
+}
+
+fn validate_email(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_email(s)?;
+    Ok(())
+}
+
+// see https://en.wikipedia.org/wiki/Email_address
+fn check_email(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    // entire email address to be no more than 254 characters long
+    if s.len() > 254 {
+        Err("more than 254 characters long")?
+    }
+
+    // email address is generally recognized as having two parts joined with an at-sign
+    let Some(at) = s.rfind('@') else {
+        Err("missing @")?
+    };
+    let (local, domain) = (&s[..at], &s[at + 1..]);
+
+    // local part may be up to 64 characters long
+    if local.len() > 64 {
+        Err("local part more than 64 characters long")?
+    }
+
+    if local.len() > 1 && local.starts_with('"') && local.ends_with('"') {
+        // quoted
+        let local = &local[1..local.len() - 1];
+        if local.contains(['\\', '"']) {
+            Err("backslash and quote not allowed within quoted local part")?
+        }
+    } else {
+        // unquoted
+
+        if local.starts_with('.') {
+            Err("starts with dot")?
+        }
+        if local.ends_with('.') {
+            Err("ends with dot")?
+        }
+
+        // consecutive dots not allowed
+        if local.contains("..") {
+            Err("consecutive dots")?
+        }
+
+        // check allowd chars
+        if let Some(ch) = local
+            .chars()
+            .find(|c| !(c.is_ascii_alphanumeric() || ".!#$%&'*+-/=?^_`{|}~".contains(*c)))
+        {
+            Err(format!("invalid character {ch:?}"))?
+        }
+    }
+
+    // domain if enclosed in brackets, must match an IP address
+    if domain.starts_with('[') && domain.ends_with(']') {
+        let s = &domain[1..domain.len() - 1];
+        if let Some(s) = s.strip_prefix("IPv6:") {
+            if let Err(e) = s.parse::<Ipv6Addr>() {
+                Err(format!("invalid ipv6 address: {e}"))?
+            }
+            return Ok(());
+        }
+        if let Err(e) = s.parse::<Ipv4Addr>() {
+            Err(format!("invalid ipv4 address: {e}"))?
+        }
+        return Ok(());
+    }
+
+    // domain must match the requirements for a hostname
+    if let Err(e) = check_hostname(domain) {
+        Err(format!("invalid domain: {e}"))?
+    }
+
+    Ok(())
+}
+
+fn validate_idn_email(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+
+    let Some(at) = s.rfind('@') else {
+        Err("missing @")?
+    };
+    let (local, domain) = (&s[..at], &s[at + 1..]);
+
+    let local = idna::domain_to_ascii_strict(local).map_err(|e| format!("idna error: {:?}", e))?;
+    let domain = idna::domain_to_ascii_strict(domain).map_err(|e| format!("idna error: {:?}", e))?;
+    if let Err(e) = check_idn_hostname(&domain) {
+        Err(format!("invalid domain: {e}"))?
+    }
+    check_email(&format!("{local}@{domain}"))
+}
+
+fn validate_json_pointer(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    check_json_pointer(s)?;
+    Ok(())
+}
+
+// see https://www.rfc-editor.org/rfc/rfc6901#section-3
+fn check_json_pointer(s: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    if s.is_empty() {
+        return Ok(());
+    }
+    if !s.starts_with('/') {
+        Err("not starting with slash")?;
+    }
+    for token in s.split('/').skip(1) {
+        let mut chars = token.chars();
+        while let Some(ch) = chars.next() {
+            if ch == '~' {
+                if !matches!(chars.next(), Some('0' | '1')) {
+                    Err("~ must be followed by 0 or 1")?;
+                }
+            } else if !matches!(ch, '\x00'..='\x2E' | '\x30'..='\x7D' | '\x7F'..='\u{10FFFF}') {
+                Err("contains disallowed character")?;
+            }
+        }
+    }
+    Ok(())
+}
+
+// see https://tools.ietf.org/html/draft-handrews-relative-json-pointer-01#section-3
+fn validate_relative_json_pointer(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+
+    // start with non-negative-integer
+    let num_digits = s.chars().take_while(char::is_ascii_digit).count();
+    if num_digits == 0 {
+        Err("must start with non-negative integer")?;
+    }
+    if num_digits > 1 && s.starts_with('0') {
+        Err("starts with zero")?;
+    }
+    let s = &s[num_digits..];
+
+    // followed by either json-pointer or '#'
+    if s == "#" {
+        return Ok(());
+    }
+    if let Err(e) = check_json_pointer(s) {
+        Err(format!("invalid json-pointer element: {e}"))?;
+    }
+    Ok(())
+}
+
+// see https://datatracker.ietf.org/doc/html/rfc4122#page-4
+fn validate_uuid(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+
+    static HEX_GROUPS: [usize; 5] = [8, 4, 4, 4, 12];
+    let mut i = 0;
+    for group in s.split('-') {
+        if i >= HEX_GROUPS.len() {
+            Err("more than 5 elements")?;
+        }
+        if group.len() != HEX_GROUPS[i] {
+            Err(format!(
+                "element {} must be {} characters long",
+                i + 1,
+                HEX_GROUPS[i]
+            ))?;
+        }
+        if let Some(ch) = group.chars().find(|c| !c.is_ascii_hexdigit()) {
+            Err(format!("non-hex character {ch:?}"))?;
+        }
+        i += 1;
+    }
+    if i != HEX_GROUPS.len() {
+        Err("must have 5 elements")?;
+    }
+    Ok(())
+}
+
+fn validate_uri(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    if fluent_uri::UriRef::parse(s.as_str()).map_err(|e| e.to_string())?.scheme().is_none() {
+        Err("relative url")?;
+    };
+    Ok(())
+}
+
+fn validate_iri(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    match Url::parse(s) {
+        Ok(_) => Ok(()),
+        Err(url::ParseError::RelativeUrlWithoutBase) => Err("relative url")?,
+        Err(e) => Err(e)?,
+    }
+}
+
+lazy_static! {
+    static ref TEMP_URL: Url = Url::parse("http://temp.com").unwrap();
+}
+
+fn parse_uri_reference(s: &str) -> Result<Url, Box<dyn Error + Send + Sync>> {
+    if s.contains('\\') {
+        Err("contains \\\\")?;
+    }
+    Ok(TEMP_URL.join(s)?)
+}
+
+fn validate_uri_reference(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    fluent_uri::UriRef::parse(s.as_str()).map_err(|e| e.to_string())?;
+    Ok(())
+}
+
+fn validate_iri_reference(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+    parse_uri_reference(s)?;
+    Ok(())
+}
+
+fn validate_uri_template(v: &Value) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let Value::String(s) = v else {
+        return Ok(());
+    };
+
+    let url = parse_uri_reference(s)?;
+
+    let path = url.path();
+    // path we got has curly bases percent encoded
+    let path = percent_decode_str(path).decode_utf8()?;
+
+    // ensure curly brackets are not nested and balanced
+    for part in path.as_ref().split('/') {
+        let mut want = true;
+        for got in part
+            .chars()
+            .filter(|c| matches!(c, '{' | '}'))
+            .map(|c| c == '{')
+        {
+            if got != want {
+                Err("nested curly braces")?;
+            }
+            want = !want;
+        }
+        if !want {
+            Err("no matching closing brace")?
+        }
+    }
+    Ok(())
+}
--- a/src/database/mod.rs
+++ b/src/database/mod.rs
@ -0,0 +1,220 @@
+pub mod r#enum;
+pub mod formats;
+pub mod page;
+pub mod punc;
+pub mod schema;
+pub mod r#type;
+
+use crate::database::r#enum::Enum;
+use crate::database::punc::Punc;
+use crate::database::schema::Schema;
+use crate::database::r#type::Type;
+use std::collections::HashMap;
+
+pub struct Database {
+  pub enums: HashMap<String, Enum>,
+  pub types: HashMap<String, Type>,
+  pub puncs: HashMap<String, Punc>,
+  pub schemas: HashMap<String, Schema>,
+  pub descendants: HashMap<String, Vec<String>>,
+}
+
+impl Database {
+  pub fn new(val: &serde_json::Value) -> Self {
+    let mut db = Self {
+      enums: HashMap::new(),
+      types: HashMap::new(),
+      puncs: HashMap::new(),
+      schemas: HashMap::new(),
+      descendants: HashMap::new(),
+    };
+
+    if let Some(arr) = val.get("enums").and_then(|v| v.as_array()) {
+      for item in arr {
+        if let Ok(def) = serde_json::from_value::<Enum>(item.clone()) {
+          db.enums.insert(def.name.clone(), def);
+        }
+      }
+    }
+
+    if let Some(arr) = val.get("types").and_then(|v| v.as_array()) {
+      for item in arr {
+        if let Ok(def) = serde_json::from_value::<Type>(item.clone()) {
+          db.types.insert(def.name.clone(), def);
+        }
+      }
+    }
+
+    if let Some(arr) = val.get("puncs").and_then(|v| v.as_array()) {
+      for item in arr {
+        if let Ok(def) = serde_json::from_value::<Punc>(item.clone()) {
+          db.puncs.insert(def.name.clone(), def);
+        }
+      }
+    }
+
+    if let Some(arr) = val.get("schemas").and_then(|v| v.as_array()) {
+      for (i, item) in arr.iter().enumerate() {
+        if let Ok(mut schema) = serde_json::from_value::<Schema>(item.clone()) {
+          let id = schema
+            .obj
+            .id
+            .clone()
+            .unwrap_or_else(|| format!("schema_{}", i));
+          schema.obj.id = Some(id.clone());
+          db.schemas.insert(id, schema);
+        }
+      }
+    }
+
+    let _ = db.compile();
+    db
+  }
+
+  /// Organizes the graph of the database, compiling regex, format functions, and pointing schema references.
+  fn compile(&mut self) -> Result<(), String> {
+    self.collect_schemas();
+
+    // 1. Compile regex and formats sequentially
+    for schema in self.schemas.values_mut() {
+      schema.compile();
+    }
+
+    // 2. Compute the Unified Semantic Graph (descendants)
+    self.collect_descendents();
+
+    // 3. For any schema representing a Postgres table, cache its allowed subclasses
+    self.compile_allowed_types();
+
+    // 4. Finally, securely link all string $refs into memory pointers (Arc)
+    self.compile_pointers();
+
+    Ok(())
+  }
+
+  fn collect_schemas(&mut self) {
+    let mut to_insert = Vec::new();
+    for (_, type_def) in &self.types {
+      for schema in &type_def.schemas {
+        if let Some(id) = &schema.obj.id {
+          to_insert.push((id.clone(), schema.clone()));
+        }
+      }
+    }
+    for (_, punc_def) in &self.puncs {
+      for schema in &punc_def.schemas {
+        if let Some(id) = &schema.obj.id {
+          to_insert.push((id.clone(), schema.clone()));
+        }
+      }
+    }
+    for (_, enum_def) in &self.enums {
+      for schema in &enum_def.schemas {
+        if let Some(id) = &schema.obj.id {
+          to_insert.push((id.clone(), schema.clone()));
+        }
+      }
+    }
+    for (id, schema) in to_insert {
+      self.schemas.insert(id, schema);
+    }
+  }
+
+  fn collect_descendents(&mut self) {
+    let mut direct_children: HashMap<String, Vec<String>> = HashMap::new();
+
+    // First pass: Find all schemas that have a $ref to another schema
+    let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
+    for id in schema_ids {
+      if let Some(ref_str) = self.schemas.get(&id).and_then(|s| s.obj.ref_string.clone()) {
+        if self.schemas.contains_key(&ref_str) {
+          direct_children.entry(ref_str).or_default().push(id.clone());
+        }
+      }
+    }
+
+    // Now compute descendants for all schemas
+    let mut descendants_map: HashMap<String, Vec<String>> = HashMap::new();
+    for key in self.schemas.keys() {
+      let mut descendants = Vec::new();
+      let mut queue = Vec::new();
+      if let Some(children) = direct_children.get(key) {
+        queue.extend(children.iter().cloned());
+      }
+
+      let mut visited = std::collections::HashSet::new();
+      while let Some(child) = queue.pop() {
+        if visited.insert(child.clone()) {
+          descendants.push(child.clone());
+          if let Some(grandchildren) = direct_children.get(&child) {
+            queue.extend(grandchildren.iter().cloned());
+          }
+        }
+      }
+      descendants_map.insert(key.clone(), descendants);
+    }
+    self.descendants = descendants_map;
+  }
+
+  fn compile_allowed_types(&mut self) {
+    // 1. Identify which types act as bases (table-backed schemas)
+    let mut entity_bases = HashMap::new();
+    for type_def in self.types.values() {
+      for type_schema in &type_def.schemas {
+        if let Some(id) = &type_schema.obj.id {
+          entity_bases.insert(id.clone(), type_def.name.clone());
+        }
+      }
+    }
+
+    // 2. Compute compiled_allowed_types for all descendants of entity bases
+    let mut allowed_types_map: HashMap<String, std::collections::HashSet<String>> = HashMap::new();
+    for base_id in entity_bases.keys() {
+      allowed_types_map.insert(
+        base_id.clone(),
+        self
+          .descendants
+          .get(base_id)
+          .unwrap_or(&vec![])
+          .iter()
+          .cloned()
+          .collect(),
+      );
+      if let Some(descendants) = self.descendants.get(base_id) {
+        let set: std::collections::HashSet<String> = descendants.iter().cloned().collect();
+        for desc_id in descendants {
+          allowed_types_map.insert(desc_id.clone(), set.clone());
+        }
+      }
+    }
+
+    // 3. Inject types into the schemas
+    let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
+    for id in schema_ids {
+      if let Some(set) = allowed_types_map.get(&id) {
+        if let Some(schema) = self.schemas.get_mut(&id) {
+          schema.obj.compiled_allowed_types = Some(set.clone());
+        }
+      }
+    }
+  }
+
+  fn compile_pointers(&mut self) {
+    let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
+    for id in schema_ids {
+      let mut compiled_ref = None;
+
+      if let Some(schema) = self.schemas.get(&id) {
+        if let Some(ref_str) = &schema.obj.ref_string {
+          if let Some(target) = self.schemas.get(ref_str) {
+            compiled_ref = Some(std::sync::Arc::new(target.clone()));
+          }
+        }
+      }
+
+      if let Some(schema) = self.schemas.get_mut(&id) {
+        schema.obj.compiled_ref = compiled_ref;
+      }
+    }
+  }
+}
--- a/src/database/page.rs
+++ b/src/database/page.rs
@ -0,0 +1,35 @@
+use indexmap::IndexMap;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct Page {
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub path: Option<String>,
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub title: Option<String>,
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub sidebar: Option<Sidebar>,
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub actions: Option<IndexMap<String, Action>>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct Sidebar {
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub category: Option<String>,
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub priority: Option<i32>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct Action {
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub punc: Option<String>,
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub navigate: Option<String>,
+  #[serde(skip_serializing_if = "Option::is_none")]
+  pub present: Option<String>,
+}
--- a/src/database/punc.rs
+++ b/src/database/punc.rs
@ -0,0 +1,20 @@
+use crate::database::page::Page;
+use crate::database::schema::Schema;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct Punc {
+  pub id: String,
+  pub r#type: String,
+  pub name: String,
+  pub module: String,
+  pub source: String,
+  pub description: Option<String>,
+  pub public: bool,
+  pub form: bool,
+  pub get: Option<String>,
+  pub page: Option<Page>,
+  #[serde(default)]
+  pub schemas: Vec<Schema>,
+}
--- a/src/database/relation.rs
+++ b/src/database/relation.rs
@ -0,0 +1,15 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct Relation {
+  pub id: String,
+  pub constraint_name: String,
+  pub source_type: String,
+  #[serde(default)]
+  pub source_columns: Vec<String>,
+  pub destination_type: String,
+  #[serde(default)]
+  pub destination_columns: Vec<String>,
+  pub prefix: Option<String>,
+}
--- a/src/database/schema.rs
+++ b/src/database/schema.rs
@ -0,0 +1,356 @@
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::BTreeMap;
+use std::sync::Arc;
+
+// Schema mirrors the Go Punc Generator's schema struct for consistency.
+// It is an order-preserving representation of a JSON Schema.
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct SchemaObject {
+  // Core Schema Keywords
+  #[serde(rename = "$id")]
+  pub id: Option<String>,
+  #[serde(rename = "$ref")]
+  pub ref_string: Option<String>,
+  /*
+     Note: The `Ref` field in the Go struct is a pointer populated by the linker.
+     In Rust, we might handle this differently (e.g., separate lookup or Rc/Arc),
+     so we omit the direct recursive `Ref` field for now and rely on `ref_string`.
+  */
+  pub description: Option<String>,
+  pub title: Option<String>,
+  #[serde(default)] // Allow missing type
+  #[serde(rename = "type")]
+  pub type_: Option<SchemaTypeOrArray>, // Handles string or array of strings
+
+  // Object Keywords
+  pub properties: Option<BTreeMap<String, Arc<Schema>>>,
+  #[serde(rename = "patternProperties")]
+  pub pattern_properties: Option<BTreeMap<String, Arc<Schema>>>,
+  #[serde(rename = "additionalProperties")]
+  pub additional_properties: Option<Arc<Schema>>,
+  #[serde(rename = "$family")]
+  pub family: Option<String>,
+
+  pub required: Option<Vec<String>>,
+
+  // dependencies can be schema dependencies or property dependencies
+  pub dependencies: Option<BTreeMap<String, Dependency>>,
+
+  // Array Keywords
+  #[serde(rename = "items")]
+  pub items: Option<Arc<Schema>>,
+  #[serde(rename = "prefixItems")]
+  pub prefix_items: Option<Vec<Arc<Schema>>>,
+
+  // String Validation
+  #[serde(rename = "minLength")]
+  pub min_length: Option<f64>,
+  #[serde(rename = "maxLength")]
+  pub max_length: Option<f64>,
+  pub pattern: Option<String>,
+
+  // Array Validation
+  #[serde(rename = "minItems")]
+  pub min_items: Option<f64>,
+  #[serde(rename = "maxItems")]
+  pub max_items: Option<f64>,
+  #[serde(rename = "uniqueItems")]
+  pub unique_items: Option<bool>,
+  #[serde(rename = "contains")]
+  pub contains: Option<Arc<Schema>>,
+  #[serde(rename = "minContains")]
+  pub min_contains: Option<f64>,
+  #[serde(rename = "maxContains")]
+  pub max_contains: Option<f64>,
+
+  // Object Validation
+  #[serde(rename = "minProperties")]
+  pub min_properties: Option<f64>,
+  #[serde(rename = "maxProperties")]
+  pub max_properties: Option<f64>,
+  #[serde(rename = "propertyNames")]
+  pub property_names: Option<Arc<Schema>>,
+
+  // Numeric Validation
+  pub format: Option<String>,
+  #[serde(rename = "enum")]
+  pub enum_: Option<Vec<Value>>, // `enum` is a reserved keyword in Rust
+  #[serde(
+    default,
+    rename = "const",
+    deserialize_with = "crate::validator::util::deserialize_some"
+  )]
+  pub const_: Option<Value>,
+
+  // Numeric Validation
+  #[serde(rename = "multipleOf")]
+  pub multiple_of: Option<f64>,
+  pub minimum: Option<f64>,
+  pub maximum: Option<f64>,
+  #[serde(rename = "exclusiveMinimum")]
+  pub exclusive_minimum: Option<f64>,
+  #[serde(rename = "exclusiveMaximum")]
+  pub exclusive_maximum: Option<f64>,
+
+  // Combining Keywords
+  #[serde(rename = "allOf")]
+  pub all_of: Option<Vec<Arc<Schema>>>,
+  #[serde(rename = "anyOf")]
+  pub any_of: Option<Vec<Arc<Schema>>>,
+  #[serde(rename = "oneOf")]
+  pub one_of: Option<Vec<Arc<Schema>>>,
+  #[serde(rename = "not")]
+  pub not: Option<Arc<Schema>>,
+  #[serde(rename = "if")]
+  pub if_: Option<Arc<Schema>>,
+  #[serde(rename = "then")]
+  pub then_: Option<Arc<Schema>>,
+  #[serde(rename = "else")]
+  pub else_: Option<Arc<Schema>>,
+
+  // Custom Vocabularies
+  pub form: Option<Vec<String>>,
+  pub display: Option<Vec<String>>,
+  #[serde(rename = "enumNames")]
+  pub enum_names: Option<Vec<String>>,
+  pub control: Option<String>,
+  pub actions: Option<BTreeMap<String, Action>>,
+  pub computer: Option<String>,
+  #[serde(default)]
+  pub extensible: Option<bool>,
+
+  // Compiled Fields (Hidden from JSON/Serde)
+  #[serde(skip)]
+  pub compiled_ref: Option<Arc<Schema>>,
+  #[serde(skip)]
+  pub compiled_allowed_types: Option<std::collections::HashSet<String>>,
+  #[serde(skip)]
+  pub compiled_format: Option<CompiledFormat>,
+  #[serde(skip)]
+  pub compiled_pattern: Option<CompiledRegex>,
+  #[serde(skip)]
+  pub compiled_pattern_properties: Option<Vec<(CompiledRegex, Arc<Schema>)>>,
+}
+
+pub enum ResolvedRef<'a> {
+  Local(&'a Schema),
+  Global(&'a Schema, &'a Schema),
+}
+
+/// Represents a compiled format validator
+#[derive(Clone)]
+pub enum CompiledFormat {
+  Func(fn(&serde_json::Value) -> Result<(), Box<dyn std::error::Error + Send + Sync>>),
+  Regex(regex::Regex),
+}
+
+impl std::fmt::Debug for CompiledFormat {
+  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    match self {
+      CompiledFormat::Func(_) => write!(f, "CompiledFormat::Func(...)"),
+      CompiledFormat::Regex(r) => write!(f, "CompiledFormat::Regex({:?})", r),
+    }
+  }
+}
+
+/// A wrapper for compiled regex patterns
+#[derive(Debug, Clone)]
+pub struct CompiledRegex(pub regex::Regex);
+
+#[derive(Debug, Clone, Serialize)]
+pub struct Schema {
+  #[serde(flatten)]
+  pub obj: SchemaObject,
+  #[serde(skip)]
+  pub always_fail: bool,
+}
+
+impl Default for Schema {
+  fn default() -> Self {
+    Schema {
+      obj: SchemaObject::default(),
+      always_fail: false,
+    }
+  }
+}
+
+impl std::ops::Deref for Schema {
+  type Target = SchemaObject;
+  fn deref(&self) -> &Self::Target {
+    &self.obj
+  }
+}
+impl std::ops::DerefMut for Schema {
+  fn deref_mut(&mut self) -> &mut Self::Target {
+    &mut self.obj
+  }
+}
+
+impl Schema {
+  pub fn resolve_ref(&self, _ref_string: &str) -> Option<&Arc<Schema>> {
+    // This is vestigial for now. References are global pointers. We will remove this shortly.
+    None
+  }
+
+  pub fn compile(&mut self) {
+    if let Some(format_str) = &self.obj.format {
+      if let Some(fmt) = crate::database::formats::FORMATS.get(format_str.as_str()) {
+        self.obj.compiled_format = Some(crate::database::schema::CompiledFormat::Func(fmt.func));
+      }
+    }
+
+    if let Some(pattern_str) = &self.obj.pattern {
+      if let Ok(re) = regex::Regex::new(pattern_str) {
+        self.obj.compiled_pattern = Some(crate::database::schema::CompiledRegex(re));
+      }
+    }
+
+    if let Some(pattern_props) = &self.obj.pattern_properties {
+      let mut compiled = Vec::new();
+      for (k, v) in pattern_props {
+        if let Ok(re) = regex::Regex::new(k) {
+          compiled.push((crate::database::schema::CompiledRegex(re), v.clone()));
+        }
+      }
+      if !compiled.is_empty() {
+        self.obj.compiled_pattern_properties = Some(compiled);
+      }
+    }
+
+    // Crawl children recursively to compile their internals
+    if let Some(props) = &mut self.obj.properties {
+      for (_, v) in props {
+        // Safe deep mutation workaround without unsafe Arc unwrap
+        let mut inner = (**v).clone();
+        inner.compile();
+        *v = Arc::new(inner);
+      }
+    }
+
+    if let Some(arr) = &mut self.obj.prefix_items {
+      for v in arr.iter_mut() {
+        let mut inner = (**v).clone();
+        inner.compile();
+        *v = Arc::new(inner);
+      }
+    }
+
+    if let Some(arr) = &mut self.obj.all_of {
+      for v in arr.iter_mut() {
+        let mut inner = (**v).clone();
+        inner.compile();
+        *v = Arc::new(inner);
+      }
+    }
+
+    if let Some(arr) = &mut self.obj.any_of {
+      for v in arr.iter_mut() {
+        let mut inner = (**v).clone();
+        inner.compile();
+        *v = Arc::new(inner);
+      }
+    }
+
+    if let Some(arr) = &mut self.obj.one_of {
+      for v in arr.iter_mut() {
+        let mut inner = (**v).clone();
+        inner.compile();
+        *v = Arc::new(inner);
+      }
+    }
+
+    if let Some(v) = &mut self.obj.additional_properties {
+      let mut inner = (**v).clone();
+      inner.compile();
+      *v = Arc::new(inner);
+    }
+
+    if let Some(v) = &mut self.obj.items {
+      let mut inner = (**v).clone();
+      inner.compile();
+      *v = Arc::new(inner);
+    }
+
+    if let Some(v) = &mut self.obj.contains {
+      let mut inner = (**v).clone();
+      inner.compile();
+      *v = Arc::new(inner);
+    }
+
+    if let Some(v) = &mut self.obj.property_names {
+      let mut inner = (**v).clone();
+      inner.compile();
+      *v = Arc::new(inner);
+    }
+
+    if let Some(v) = &mut self.obj.not {
+      let mut inner = (**v).clone();
+      inner.compile();
+      *v = Arc::new(inner);
+    }
+
+    if let Some(v) = &mut self.obj.if_ {
+      let mut inner = (**v).clone();
+      inner.compile();
+      *v = Arc::new(inner);
+    }
+
+    if let Some(v) = &mut self.obj.then_ {
+      let mut inner = (**v).clone();
+      inner.compile();
+      *v = Arc::new(inner);
+    }
+
+    if let Some(v) = &mut self.obj.else_ {
+      let mut inner = (**v).clone();
+      inner.compile();
+      *v = Arc::new(inner);
+    }
+  }
+}
+
+impl<'de> Deserialize<'de> for Schema {
+  fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+  where
+    D: serde::Deserializer<'de>,
+  {
+    let v: Value = Deserialize::deserialize(deserializer)?;
+
+    if let Some(b) = v.as_bool() {
+      let mut obj = SchemaObject::default();
+      if b {
+        obj.extensible = Some(true);
+      }
+      return Ok(Schema {
+        obj,
+        always_fail: !b,
+      });
+    }
+    let obj: SchemaObject = serde_json::from_value(v.clone()).map_err(serde::de::Error::custom)?;
+
+    Ok(Schema {
+      obj,
+      always_fail: false,
+    })
+  }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum SchemaTypeOrArray {
+  Single(String),
+  Multiple(Vec<String>),
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Action {
+  pub navigate: Option<String>,
+  pub punc: Option<String>,
+}
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Dependency {
+  Props(Vec<String>),
+  Schema(Arc<Schema>),
+}
--- a/src/database/type.rs
+++ b/src/database/type.rs
@ -0,0 +1,35 @@
+use crate::database::schema::Schema;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct Type {
+  pub id: String,
+  pub r#type: String,
+  pub name: String,
+  pub module: String,
+  pub source: String,
+  #[serde(default)]
+  pub historical: bool,
+  #[serde(default)]
+  pub sensitive: bool,
+  #[serde(default)]
+  pub ownable: bool,
+  pub longevity: Option<i32>,
+  #[serde(default)]
+  pub hierarchy: Vec<String>,
+  pub relationship: Option<bool>,
+  #[serde(default)]
+  pub fields: Vec<String>,
+  pub grouped_fields: Option<Value>,
+  #[serde(default)]
+  pub lookup_fields: Vec<String>,
+  #[serde(default)]
+  pub null_fields: Vec<String>,
+  #[serde(default)]
+  pub default_fields: Vec<String>,
+  pub field_types: Option<Value>,
+  #[serde(default)]
+  pub schemas: Vec<Schema>,
+}