use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::BTreeMap; use std::sync::Arc; use std::sync::OnceLock; pub fn serialize_once_lock( lock: &OnceLock, serializer: S, ) -> Result { if let Some(val) = lock.get() { val.serialize(serializer) } else { serializer.serialize_none() } } pub fn is_once_lock_map_empty(lock: &OnceLock>) -> bool { lock.get().map_or(true, |m| m.is_empty()) } pub fn is_once_lock_vec_empty(lock: &OnceLock>) -> bool { lock.get().map_or(true, |v| v.is_empty()) } // Schema mirrors the Go Punc Generator's schema struct for consistency. // It is an order-preserving representation of a JSON Schema. pub fn deserialize_some<'de, D>(deserializer: D) -> Result, D::Error> where D: serde::Deserializer<'de>, { let v = Value::deserialize(deserializer)?; Ok(Some(v)) } #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct SchemaObject { // Core Schema Keywords #[serde(rename = "$id")] #[serde(skip_serializing_if = "Option::is_none")] pub id: Option, #[serde(rename = "$ref")] #[serde(skip_serializing_if = "Option::is_none")] pub r#ref: Option, #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, #[serde(skip_serializing_if = "Option::is_none")] pub title: Option, #[serde(default)] // Allow missing type #[serde(rename = "type")] #[serde(skip_serializing_if = "Option::is_none")] pub type_: Option, // Handles string or array of strings // Object Keywords #[serde(skip_serializing_if = "Option::is_none")] pub properties: Option>>, #[serde(rename = "patternProperties")] #[serde(skip_serializing_if = "Option::is_none")] pub pattern_properties: Option>>, #[serde(rename = "additionalProperties")] #[serde(skip_serializing_if = "Option::is_none")] pub additional_properties: Option>, #[serde(rename = "$family")] #[serde(skip_serializing_if = "Option::is_none")] pub family: Option, #[serde(skip_serializing_if = "Option::is_none")] pub required: Option>, // dependencies can be schema dependencies or property dependencies #[serde(skip_serializing_if = "Option::is_none")] pub dependencies: Option>, // Array Keywords #[serde(rename = "items")] #[serde(skip_serializing_if = "Option::is_none")] pub items: Option>, #[serde(rename = "prefixItems")] #[serde(skip_serializing_if = "Option::is_none")] pub prefix_items: Option>>, // String Validation #[serde(rename = "minLength")] #[serde(skip_serializing_if = "Option::is_none")] pub min_length: Option, #[serde(rename = "maxLength")] #[serde(skip_serializing_if = "Option::is_none")] pub max_length: Option, #[serde(skip_serializing_if = "Option::is_none")] pub pattern: Option, // Array Validation #[serde(rename = "minItems")] #[serde(skip_serializing_if = "Option::is_none")] pub min_items: Option, #[serde(rename = "maxItems")] #[serde(skip_serializing_if = "Option::is_none")] pub max_items: Option, #[serde(rename = "uniqueItems")] #[serde(skip_serializing_if = "Option::is_none")] pub unique_items: Option, #[serde(rename = "contains")] #[serde(skip_serializing_if = "Option::is_none")] pub contains: Option>, #[serde(rename = "minContains")] #[serde(skip_serializing_if = "Option::is_none")] pub min_contains: Option, #[serde(rename = "maxContains")] #[serde(skip_serializing_if = "Option::is_none")] pub max_contains: Option, // Object Validation #[serde(rename = "minProperties")] #[serde(skip_serializing_if = "Option::is_none")] pub min_properties: Option, #[serde(rename = "maxProperties")] #[serde(skip_serializing_if = "Option::is_none")] pub max_properties: Option, #[serde(rename = "propertyNames")] #[serde(skip_serializing_if = "Option::is_none")] pub property_names: Option>, // Numeric Validation #[serde(skip_serializing_if = "Option::is_none")] pub format: Option, #[serde(rename = "enum")] #[serde(skip_serializing_if = "Option::is_none")] pub enum_: Option>, // `enum` is a reserved keyword in Rust #[serde( default, rename = "const", deserialize_with = "crate::database::schema::deserialize_some" )] #[serde(skip_serializing_if = "Option::is_none")] pub const_: Option, // Numeric Validation #[serde(rename = "multipleOf")] #[serde(skip_serializing_if = "Option::is_none")] pub multiple_of: Option, #[serde(skip_serializing_if = "Option::is_none")] pub minimum: Option, #[serde(skip_serializing_if = "Option::is_none")] pub maximum: Option, #[serde(rename = "exclusiveMinimum")] #[serde(skip_serializing_if = "Option::is_none")] pub exclusive_minimum: Option, #[serde(rename = "exclusiveMaximum")] #[serde(skip_serializing_if = "Option::is_none")] pub exclusive_maximum: Option, // Combining Keywords #[serde(rename = "allOf")] #[serde(skip_serializing_if = "Option::is_none")] pub all_of: Option>>, #[serde(rename = "oneOf")] #[serde(skip_serializing_if = "Option::is_none")] pub one_of: Option>>, #[serde(rename = "not")] #[serde(skip_serializing_if = "Option::is_none")] pub not: Option>, #[serde(rename = "if")] #[serde(skip_serializing_if = "Option::is_none")] pub if_: Option>, #[serde(rename = "then")] #[serde(skip_serializing_if = "Option::is_none")] pub then_: Option>, #[serde(rename = "else")] #[serde(skip_serializing_if = "Option::is_none")] pub else_: Option>, // Custom Vocabularies #[serde(skip_serializing_if = "Option::is_none")] pub form: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub display: Option>, #[serde(rename = "enumNames")] #[serde(skip_serializing_if = "Option::is_none")] pub enum_names: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub control: Option, #[serde(skip_serializing_if = "Option::is_none")] pub actions: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub computer: Option, #[serde(default)] #[serde(skip_serializing_if = "Option::is_none")] pub extensible: Option, #[serde(rename = "compiledProperties")] #[serde(skip_deserializing)] #[serde(skip_serializing_if = "crate::database::schema::is_once_lock_vec_empty")] #[serde(serialize_with = "crate::database::schema::serialize_once_lock")] pub compiled_property_names: OnceLock>, #[serde(skip)] pub compiled_properties: OnceLock>>, #[serde(rename = "compiledEdges")] #[serde(skip_deserializing)] #[serde(skip_serializing_if = "crate::database::schema::is_once_lock_map_empty")] #[serde(serialize_with = "crate::database::schema::serialize_once_lock")] pub compiled_edges: OnceLock>, #[serde(skip)] pub compiled_format: OnceLock, #[serde(skip)] pub compiled_pattern: OnceLock, #[serde(skip)] pub compiled_pattern_properties: OnceLock)>>, } /// Represents a compiled format validator #[derive(Clone)] pub enum CompiledFormat { Func(fn(&serde_json::Value) -> Result<(), Box>), Regex(regex::Regex), } impl std::fmt::Debug for CompiledFormat { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { CompiledFormat::Func(_) => write!(f, "CompiledFormat::Func(...)"), CompiledFormat::Regex(r) => write!(f, "CompiledFormat::Regex({:?})", r), } } } /// A wrapper for compiled regex patterns #[derive(Debug, Clone)] pub struct CompiledRegex(pub regex::Regex); #[derive(Debug, Clone, Serialize, Default)] pub struct Schema { #[serde(flatten)] pub obj: SchemaObject, #[serde(skip)] pub always_fail: bool, } impl std::ops::Deref for Schema { type Target = SchemaObject; fn deref(&self) -> &Self::Target { &self.obj } } impl std::ops::DerefMut for Schema { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.obj } } impl Schema { pub fn compile( &self, db: &crate::database::Database, visited: &mut std::collections::HashSet, ) { if self.obj.compiled_properties.get().is_some() { return; } if let Some(id) = &self.obj.id { if !visited.insert(id.clone()) { return; // Break cyclical resolution } } if let Some(format_str) = &self.obj.format { if let Some(fmt) = crate::database::formats::FORMATS.get(format_str.as_str()) { let _ = self .obj .compiled_format .set(crate::database::schema::CompiledFormat::Func(fmt.func)); } } if let Some(pattern_str) = &self.obj.pattern { if let Ok(re) = regex::Regex::new(pattern_str) { let _ = self .obj .compiled_pattern .set(crate::database::schema::CompiledRegex(re)); } } if let Some(pattern_props) = &self.obj.pattern_properties { let mut compiled = Vec::new(); for (k, v) in pattern_props { if let Ok(re) = regex::Regex::new(k) { compiled.push((crate::database::schema::CompiledRegex(re), v.clone())); } } if !compiled.is_empty() { let _ = self.obj.compiled_pattern_properties.set(compiled); } } let mut props = std::collections::BTreeMap::new(); // 1. Resolve INHERITANCE dependencies first if let Some(ref_id) = &self.obj.r#ref { if let Some(parent) = db.schemas.get(ref_id) { parent.compile(db, visited); if let Some(p_props) = parent.obj.compiled_properties.get() { props.extend(p_props.clone()); } } } if let Some(all_of) = &self.obj.all_of { for ao in all_of { ao.compile(db, visited); if let Some(ao_props) = ao.obj.compiled_properties.get() { props.extend(ao_props.clone()); } } } if let Some(then_schema) = &self.obj.then_ { then_schema.compile(db, visited); if let Some(t_props) = then_schema.obj.compiled_properties.get() { props.extend(t_props.clone()); } } if let Some(else_schema) = &self.obj.else_ { else_schema.compile(db, visited); if let Some(e_props) = else_schema.obj.compiled_properties.get() { props.extend(e_props.clone()); } } // 2. Add local properties if let Some(local_props) = &self.obj.properties { for (k, v) in local_props { props.insert(k.clone(), v.clone()); } } // 3. Set the OnceLock! let _ = self.obj.compiled_properties.set(props.clone()); let mut names: Vec = props.keys().cloned().collect(); names.sort(); let _ = self.obj.compiled_property_names.set(names); // 4. Compute Edges natively let schema_edges = self.compile_edges(db, visited, &props); let _ = self.obj.compiled_edges.set(schema_edges); // 5. Build our inline children properties recursively NOW! (Depth-first search) if let Some(local_props) = &self.obj.properties { for child in local_props.values() { child.compile(db, visited); } } if let Some(items) = &self.obj.items { items.compile(db, visited); } if let Some(pattern_props) = &self.obj.pattern_properties { for child in pattern_props.values() { child.compile(db, visited); } } if let Some(additional_props) = &self.obj.additional_properties { additional_props.compile(db, visited); } if let Some(one_of) = &self.obj.one_of { for child in one_of { child.compile(db, visited); } } if let Some(arr) = &self.obj.prefix_items { for child in arr { child.compile(db, visited); } } if let Some(child) = &self.obj.not { child.compile(db, visited); } if let Some(child) = &self.obj.contains { child.compile(db, visited); } if let Some(child) = &self.obj.property_names { child.compile(db, visited); } if let Some(child) = &self.obj.if_ { child.compile(db, visited); } if let Some(id) = &self.obj.id { visited.remove(id); } } #[allow(unused_variables)] fn validate_identifier(id: &str, field_name: &str) -> Result<(), String> { #[cfg(not(test))] for c in id.chars() { if !c.is_ascii_lowercase() && !c.is_ascii_digit() && c != '_' && c != '.' { return Err(format!("Invalid character '{}' in JSON Schema '{}' property: '{}'. Identifiers must exclusively contain [a-z0-9_.]", c, field_name, id)); } } Ok(()) } pub fn collect_schemas( &mut self, tracking_path: Option, to_insert: &mut Vec<(String, Schema)>, ) -> Result<(), String> { if let Some(id) = &self.obj.id { Self::validate_identifier(id, "$id")?; to_insert.push((id.clone(), self.clone())); } if let Some(r#ref) = &self.obj.r#ref { Self::validate_identifier(r#ref, "$ref")?; } if let Some(family) = &self.obj.family { Self::validate_identifier(family, "$family")?; } // Is this schema an inline ad-hoc composition? // Meaning it has a tracking context, lacks an explicit $id, but extends an Entity ref with explicit properties! if self.obj.id.is_none() && self.obj.r#ref.is_some() && self.obj.properties.is_some() { if let Some(ref path) = tracking_path { to_insert.push((path.clone(), self.clone())); } } // Provide the path origin to children natively, prioritizing the explicit `$id` boundary if one exists let origin_path = self.obj.id.clone().or(tracking_path); self.collect_child_schemas(origin_path, to_insert)?; Ok(()) } pub fn collect_child_schemas( &mut self, origin_path: Option, to_insert: &mut Vec<(String, Schema)>, ) -> Result<(), String> { if let Some(props) = &mut self.obj.properties { for (k, v) in props.iter_mut() { let mut inner = (**v).clone(); let next_path = origin_path.as_ref().map(|o| format!("{}/{}", o, k)); inner.collect_schemas(next_path, to_insert)?; *v = Arc::new(inner); } } if let Some(pattern_props) = &mut self.obj.pattern_properties { for (k, v) in pattern_props.iter_mut() { let mut inner = (**v).clone(); let next_path = origin_path.as_ref().map(|o| format!("{}/{}", o, k)); inner.collect_schemas(next_path, to_insert)?; *v = Arc::new(inner); } } let mut map_arr = |arr: &mut Vec>| -> Result<(), String> { for v in arr.iter_mut() { let mut inner = (**v).clone(); inner.collect_schemas(origin_path.clone(), to_insert)?; *v = Arc::new(inner); } Ok(()) }; if let Some(arr) = &mut self.obj.prefix_items { map_arr(arr)?; } if let Some(arr) = &mut self.obj.all_of { map_arr(arr)?; } if let Some(arr) = &mut self.obj.one_of { map_arr(arr)?; } let mut map_opt = |opt: &mut Option>, pass_path: bool| -> Result<(), String> { if let Some(v) = opt { let mut inner = (**v).clone(); let next = if pass_path { origin_path.clone() } else { None }; inner.collect_schemas(next, to_insert)?; *v = Arc::new(inner); } Ok(()) }; map_opt(&mut self.obj.additional_properties, false)?; // `items` absolutely must inherit the EXACT property path assigned to the Array wrapper! // This allows nested Arrays enclosing bare Entity structs to correctly register as the boundary mapping. map_opt(&mut self.obj.items, true)?; map_opt(&mut self.obj.not, false)?; map_opt(&mut self.obj.contains, false)?; map_opt(&mut self.obj.property_names, false)?; map_opt(&mut self.obj.if_, false)?; map_opt(&mut self.obj.then_, false)?; map_opt(&mut self.obj.else_, false)?; Ok(()) } pub fn compile_edges( &self, db: &crate::database::Database, visited: &mut std::collections::HashSet, props: &std::collections::BTreeMap>, ) -> std::collections::BTreeMap { let mut schema_edges = std::collections::BTreeMap::new(); let mut parent_type_name = None; if let Some(family) = &self.obj.family { parent_type_name = Some(family.split('.').next_back().unwrap_or(family).to_string()); } else if let Some(id) = &self.obj.id { parent_type_name = Some(id.split('.').next_back().unwrap_or("").to_string()); } else if let Some(ref_id) = &self.obj.r#ref { parent_type_name = Some(ref_id.split('.').next_back().unwrap_or("").to_string()); } if let Some(p_type) = parent_type_name { if db.types.contains_key(&p_type) { for (prop_name, prop_schema) in props { let mut child_type_name = None; let mut target_schema = prop_schema.clone(); if let Some(crate::database::schema::SchemaTypeOrArray::Single(t)) = &prop_schema.obj.type_ { if t == "array" { if let Some(items) = &prop_schema.obj.items { target_schema = items.clone(); } } } if let Some(family) = &target_schema.obj.family { child_type_name = Some(family.split('.').next_back().unwrap_or(family).to_string()); } else if let Some(ref_id) = target_schema.obj.r#ref.as_ref() { child_type_name = Some(ref_id.split('.').next_back().unwrap_or("").to_string()); } else if let Some(arr) = &target_schema.obj.one_of { if let Some(first) = arr.first() { if let Some(ref_id) = first.obj.id.as_ref().or(first.obj.r#ref.as_ref()) { child_type_name = Some(ref_id.split('.').next_back().unwrap_or("").to_string()); } } } if let Some(c_type) = child_type_name { if db.types.contains_key(&c_type) { target_schema.compile(db, visited); if let Some(compiled_target_props) = target_schema.obj.compiled_properties.get() { let keys_for_ambiguity: Vec = compiled_target_props.keys().cloned().collect(); if let Some((relation, is_forward)) = resolve_relation(db, &p_type, &c_type, prop_name, Some(&keys_for_ambiguity)) { schema_edges.insert( prop_name.clone(), crate::database::edge::Edge { constraint: relation.constraint.clone(), forward: is_forward, }, ); } } } } } } } schema_edges } } pub(crate) fn resolve_relation<'a>( db: &'a crate::database::Database, parent_type: &str, child_type: &str, prop_name: &str, relative_keys: Option<&Vec>, ) -> Option<(&'a crate::database::relation::Relation, bool)> { if parent_type == "entity" && child_type == "entity" { return None; } let p_def = db.types.get(parent_type)?; let c_def = db.types.get(child_type)?; let mut matching_rels = Vec::new(); let mut directions = Vec::new(); for rel in db.relations.values() { let is_forward = p_def.hierarchy.contains(&rel.source_type) && c_def.hierarchy.contains(&rel.destination_type); let is_reverse = p_def.hierarchy.contains(&rel.destination_type) && c_def.hierarchy.contains(&rel.source_type); if is_forward { matching_rels.push(rel); directions.push(true); } else if is_reverse { matching_rels.push(rel); directions.push(false); } } if matching_rels.is_empty() { return None; } if matching_rels.len() == 1 { return Some((matching_rels[0], directions[0])); } let mut chosen_idx = 0; let mut resolved = false; for (i, rel) in matching_rels.iter().enumerate() { if let Some(prefix) = &rel.prefix { if prop_name.starts_with(prefix) || prefix.starts_with(prop_name) || prefix.replace("_", "") == prop_name.replace("_", "") { chosen_idx = i; resolved = true; break; } } } if !resolved && relative_keys.is_some() { let keys = relative_keys.unwrap(); let mut missing_prefix_ids = Vec::new(); for (i, rel) in matching_rels.iter().enumerate() { if let Some(prefix) = &rel.prefix { if !keys.contains(prefix) { missing_prefix_ids.push(i); } } } if missing_prefix_ids.len() == 1 { chosen_idx = missing_prefix_ids[0]; } } Some((matching_rels[chosen_idx], directions[chosen_idx])) } impl<'de> Deserialize<'de> for Schema { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { let v: Value = Deserialize::deserialize(deserializer)?; if let Some(b) = v.as_bool() { let mut obj = SchemaObject::default(); if b { obj.extensible = Some(true); } return Ok(Schema { obj, always_fail: !b, }); } let mut obj: SchemaObject = serde_json::from_value(v.clone()).map_err(serde::de::Error::custom)?; // If a schema is effectively empty (except for potentially carrying an ID), // it functions as a boolean `true` schema in Draft2020 which means it should not // restrict additional properties natively let is_empty = obj.type_.is_none() && obj.properties.is_none() && obj.pattern_properties.is_none() && obj.additional_properties.is_none() && obj.required.is_none() && obj.dependencies.is_none() && obj.items.is_none() && obj.prefix_items.is_none() && obj.contains.is_none() && obj.format.is_none() && obj.enum_.is_none() && obj.const_.is_none() && obj.all_of.is_none() && obj.one_of.is_none() && obj.not.is_none() && obj.if_.is_none() && obj.then_.is_none() && obj.else_.is_none() && obj.r#ref.is_none() && obj.family.is_none(); if is_empty && obj.extensible.is_none() { obj.extensible = Some(true); } Ok(Schema { obj, always_fail: false, }) } } #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(untagged)] pub enum SchemaTypeOrArray { Single(String), Multiple(Vec), } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Action { #[serde(skip_serializing_if = "Option::is_none")] pub navigate: Option, #[serde(skip_serializing_if = "Option::is_none")] pub punc: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(untagged)] pub enum Dependency { Props(Vec), Schema(Arc), }