merger now requires a schema id, queryer and merger now use pre-compiled edges for O(1) relations

This commit is contained in:
2026-03-21 20:33:28 -04:00
parent 9bdb767685
commit 882bdc6271
13 changed files with 1370 additions and 307 deletions

View File

@ -2,6 +2,26 @@ use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::BTreeMap;
use std::sync::Arc;
use std::sync::OnceLock;
pub fn serialize_once_lock<T: serde::Serialize, S: serde::Serializer>(
lock: &OnceLock<T>,
serializer: S,
) -> Result<S::Ok, S::Error> {
if let Some(val) = lock.get() {
val.serialize(serializer)
} else {
serializer.serialize_none()
}
}
pub fn is_once_lock_map_empty<K, V>(lock: &OnceLock<std::collections::BTreeMap<K, V>>) -> bool {
lock.get().map_or(true, |m| m.is_empty())
}
pub fn is_once_lock_vec_empty<T>(lock: &OnceLock<Vec<T>>) -> bool {
lock.get().map_or(true, |v| v.is_empty())
}
// Schema mirrors the Go Punc Generator's schema struct for consistency.
// It is an order-preserving representation of a JSON Schema.
@ -168,18 +188,26 @@ pub struct SchemaObject {
pub extensible: Option<bool>,
#[serde(rename = "compiledProperties")]
#[serde(skip_serializing_if = "Option::is_none")]
pub compiled_property_names: Option<Vec<String>>,
#[serde(skip_deserializing)]
#[serde(skip_serializing_if = "crate::database::schema::is_once_lock_vec_empty")]
#[serde(serialize_with = "crate::database::schema::serialize_once_lock")]
pub compiled_property_names: OnceLock<Vec<String>>,
#[serde(skip)]
pub compiled_properties: Option<BTreeMap<String, Arc<Schema>>>,
pub compiled_properties: OnceLock<BTreeMap<String, Arc<Schema>>>,
#[serde(rename = "compiledEdges")]
#[serde(skip_deserializing)]
#[serde(skip_serializing_if = "crate::database::schema::is_once_lock_map_empty")]
#[serde(serialize_with = "crate::database::schema::serialize_once_lock")]
pub compiled_edges: OnceLock<BTreeMap<String, crate::database::edge::Edge>>,
#[serde(skip)]
pub compiled_format: Option<CompiledFormat>,
pub compiled_format: OnceLock<CompiledFormat>,
#[serde(skip)]
pub compiled_pattern: Option<CompiledRegex>,
pub compiled_pattern: OnceLock<CompiledRegex>,
#[serde(skip)]
pub compiled_pattern_properties: Option<Vec<(CompiledRegex, Arc<Schema>)>>,
pub compiled_pattern_properties: OnceLock<Vec<(CompiledRegex, Arc<Schema>)>>,
}
/// Represents a compiled format validator
@ -223,19 +251,37 @@ impl std::ops::DerefMut for Schema {
}
impl Schema {
pub fn compile_internals(&mut self) {
self.map_children(|child| child.compile_internals());
if let Some(format_str) = &self.obj.format
&& let Some(fmt) = crate::database::formats::FORMATS.get(format_str.as_str())
{
self.obj.compiled_format = Some(crate::database::schema::CompiledFormat::Func(fmt.func));
pub fn compile(
&self,
db: &crate::database::Database,
visited: &mut std::collections::HashSet<String>,
) {
if self.obj.compiled_properties.get().is_some() {
return;
}
if let Some(pattern_str) = &self.obj.pattern
&& let Ok(re) = regex::Regex::new(pattern_str)
{
self.obj.compiled_pattern = Some(crate::database::schema::CompiledRegex(re));
if let Some(id) = &self.obj.id {
if !visited.insert(id.clone()) {
return; // Break cyclical resolution
}
}
if let Some(format_str) = &self.obj.format {
if let Some(fmt) = crate::database::formats::FORMATS.get(format_str.as_str()) {
let _ = self
.obj
.compiled_format
.set(crate::database::schema::CompiledFormat::Func(fmt.func));
}
}
if let Some(pattern_str) = &self.obj.pattern {
if let Ok(re) = regex::Regex::new(pattern_str) {
let _ = self
.obj
.compiled_pattern
.set(crate::database::schema::CompiledRegex(re));
}
}
if let Some(pattern_props) = &self.obj.pattern_properties {
@ -246,19 +292,115 @@ impl Schema {
}
}
if !compiled.is_empty() {
self.obj.compiled_pattern_properties = Some(compiled);
let _ = self.obj.compiled_pattern_properties.set(compiled);
}
}
let mut props = std::collections::BTreeMap::new();
// 1. Resolve INHERITANCE dependencies first
if let Some(ref_id) = &self.obj.r#ref {
if let Some(parent) = db.schemas.get(ref_id) {
parent.compile(db, visited);
if let Some(p_props) = parent.obj.compiled_properties.get() {
props.extend(p_props.clone());
}
}
}
if let Some(all_of) = &self.obj.all_of {
for ao in all_of {
ao.compile(db, visited);
if let Some(ao_props) = ao.obj.compiled_properties.get() {
props.extend(ao_props.clone());
}
}
}
if let Some(then_schema) = &self.obj.then_ {
then_schema.compile(db, visited);
if let Some(t_props) = then_schema.obj.compiled_properties.get() {
props.extend(t_props.clone());
}
}
if let Some(else_schema) = &self.obj.else_ {
else_schema.compile(db, visited);
if let Some(e_props) = else_schema.obj.compiled_properties.get() {
props.extend(e_props.clone());
}
}
// 2. Add local properties
if let Some(local_props) = &self.obj.properties {
for (k, v) in local_props {
props.insert(k.clone(), v.clone());
}
}
// 3. Set the OnceLock!
let _ = self.obj.compiled_properties.set(props.clone());
let mut names: Vec<String> = props.keys().cloned().collect();
names.sort();
let _ = self.obj.compiled_property_names.set(names);
// 4. Compute Edges natively
let schema_edges = self.compile_edges(db, visited, &props);
let _ = self.obj.compiled_edges.set(schema_edges);
// 5. Build our inline children properties recursively NOW! (Depth-first search)
if let Some(local_props) = &self.obj.properties {
for child in local_props.values() {
child.compile(db, visited);
}
}
if let Some(items) = &self.obj.items {
items.compile(db, visited);
}
if let Some(pattern_props) = &self.obj.pattern_properties {
for child in pattern_props.values() {
child.compile(db, visited);
}
}
if let Some(additional_props) = &self.obj.additional_properties {
additional_props.compile(db, visited);
}
if let Some(one_of) = &self.obj.one_of {
for child in one_of {
child.compile(db, visited);
}
}
if let Some(arr) = &self.obj.prefix_items {
for child in arr {
child.compile(db, visited);
}
}
if let Some(child) = &self.obj.not {
child.compile(db, visited);
}
if let Some(child) = &self.obj.contains {
child.compile(db, visited);
}
if let Some(child) = &self.obj.property_names {
child.compile(db, visited);
}
if let Some(child) = &self.obj.if_ {
child.compile(db, visited);
}
if let Some(id) = &self.obj.id {
visited.remove(id);
}
}
pub fn harvest(&mut self, to_insert: &mut Vec<(String, Schema)>) {
if let Some(id) = &self.obj.id {
to_insert.push((id.clone(), self.clone()));
}
self.map_children(|child| child.harvest(to_insert));
self.harvest_children(|child| child.harvest(to_insert));
}
pub fn map_children<F>(&mut self, mut f: F)
pub fn harvest_children<F>(&mut self, mut f: F)
where
F: FnMut(&mut Schema),
{
@ -313,6 +455,76 @@ impl Schema {
map_opt(&mut self.obj.then_);
map_opt(&mut self.obj.else_);
}
pub fn compile_edges(
&self,
db: &crate::database::Database,
visited: &mut std::collections::HashSet<String>,
props: &std::collections::BTreeMap<String, std::sync::Arc<Schema>>,
) -> std::collections::BTreeMap<String, crate::database::edge::Edge> {
let mut schema_edges = std::collections::BTreeMap::new();
let mut parent_type_name = None;
if let Some(family) = &self.obj.family {
parent_type_name = Some(family.split('.').next_back().unwrap_or(family).to_string());
} else if let Some(id) = &self.obj.id {
parent_type_name = Some(id.split('.').next_back().unwrap_or("").to_string());
} else if let Some(ref_id) = &self.obj.r#ref {
parent_type_name = Some(ref_id.split('.').next_back().unwrap_or("").to_string());
}
if let Some(p_type) = parent_type_name {
if db.types.contains_key(&p_type) {
for (prop_name, prop_schema) in props {
let mut child_type_name = None;
let mut target_schema = prop_schema.clone();
if let Some(crate::database::schema::SchemaTypeOrArray::Single(t)) =
&prop_schema.obj.type_
{
if t == "array" {
if let Some(items) = &prop_schema.obj.items {
target_schema = items.clone();
}
}
}
if let Some(family) = &target_schema.obj.family {
child_type_name = Some(family.split('.').next_back().unwrap_or(family).to_string());
} else if let Some(ref_id) = target_schema.obj.r#ref.as_ref() {
child_type_name = Some(ref_id.split('.').next_back().unwrap_or("").to_string());
} else if let Some(arr) = &target_schema.obj.one_of {
if let Some(first) = arr.first() {
if let Some(ref_id) = first.obj.id.as_ref().or(first.obj.r#ref.as_ref()) {
child_type_name = Some(ref_id.split('.').next_back().unwrap_or("").to_string());
}
}
}
if let Some(c_type) = child_type_name {
if db.types.contains_key(&c_type) {
target_schema.compile(db, visited);
if let Some(compiled_target_props) = target_schema.obj.compiled_properties.get() {
let keys_for_ambiguity: Vec<String> =
compiled_target_props.keys().cloned().collect();
if let Some((relation, is_forward)) =
db.resolve_relation(&p_type, &c_type, prop_name, Some(&keys_for_ambiguity))
{
schema_edges.insert(
prop_name.clone(),
crate::database::edge::Edge {
constraint: relation.constraint.clone(),
forward: is_forward,
},
);
}
}
}
}
}
}
}
schema_edges
}
}
impl<'de> Deserialize<'de> for Schema {