significantly simplified the validator and work continues on query

This commit is contained in:
2026-03-03 17:58:31 -05:00
parent 3898c43742
commit e7f20e2cb6
58 changed files with 5446 additions and 5693 deletions

View File

@ -16,7 +16,6 @@ pub struct Database {
pub types: HashMap<String, Type>,
pub puncs: HashMap<String, Punc>,
pub schemas: HashMap<String, Schema>,
pub descendants: HashMap<String, Vec<String>>,
}
impl Database {
@ -26,7 +25,6 @@ impl Database {
types: HashMap::new(),
puncs: HashMap::new(),
schemas: HashMap::new(),
descendants: HashMap::new(),
};
if let Some(arr) = val.get("enums").and_then(|v| v.as_array()) {
@ -75,146 +73,137 @@ impl Database {
fn compile(&mut self) -> Result<(), String> {
self.collect_schemas();
// 1. Compile regex and formats sequentially
for schema in self.schemas.values_mut() {
schema.compile();
// 1. Build a structural descendant graph for $family macro expansion
let mut direct_refs: std::collections::HashMap<String, Vec<String>> =
std::collections::HashMap::new();
for (id, schema) in &self.schemas {
if let Some(ref_str) = &schema.obj.ref_string {
direct_refs
.entry(ref_str.clone())
.or_default()
.push(id.clone());
}
}
// 2. Compute the Unified Semantic Graph (descendants)
self.collect_descendents();
let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
// 3. For any schema representing a Postgres table, cache its allowed subclasses
self.compile_allowed_types();
// 2. Expand $family macros into oneOf blocks
for id in &schema_ids {
if let Some(schema) = self.schemas.get_mut(id) {
schema.map_children(|mut child| {
Self::expand_family(&mut child, &direct_refs);
});
Self::expand_family(schema, &direct_refs);
}
}
// 4. Finally, securely link all string $refs into memory pointers (Arc)
self.compile_pointers();
let schemas_snap = self.schemas.clone();
// 3. Compile internals and link memory pointers
for id in schema_ids {
if let Some(schema) = self.schemas.get_mut(&id) {
schema.compile_internals();
schema.link_refs(&schemas_snap);
}
}
Ok(())
}
fn collect_schemas(&mut self) {
let mut to_insert = Vec::new();
for (_, type_def) in &self.types {
for schema in &type_def.schemas {
if let Some(id) = &schema.obj.id {
to_insert.push((id.clone(), schema.clone()));
}
// Pass A: Entities - Compute Variations from hierarchies
// `hierarchy` is an array of ancestors. E.g. `person` -> `['entity', 'user', 'person']`.
// We map this backward so that `user`'s allowed variations = `['user', 'person']`.
let mut variations_by_entity = std::collections::HashMap::new();
for type_def in self.types.values() {
for ancestor in &type_def.hierarchy {
variations_by_entity
.entry(ancestor.clone())
.or_insert_with(std::collections::HashSet::new)
.insert(type_def.name.clone());
}
}
// Now stamp all exported entity schemas with their precise physical variations
for (_, type_def) in &self.types {
let allowed_strings = variations_by_entity
.get(&type_def.name)
.cloned()
.unwrap_or_default();
for mut schema in type_def.schemas.clone() {
schema.stamp_variations(Some(allowed_strings.clone()));
schema.harvest(&mut to_insert);
}
}
// Pass B: APIs and Enums (No initial variations stamped)
for (_, punc_def) in &self.puncs {
for schema in &punc_def.schemas {
if let Some(id) = &schema.obj.id {
to_insert.push((id.clone(), schema.clone()));
}
for mut schema in punc_def.schemas.clone() {
schema.harvest(&mut to_insert);
}
}
for (_, enum_def) in &self.enums {
for schema in &enum_def.schemas {
if let Some(id) = &schema.obj.id {
to_insert.push((id.clone(), schema.clone()));
}
for mut schema in enum_def.schemas.clone() {
schema.harvest(&mut to_insert);
}
}
for (id, schema) in to_insert {
self.schemas.insert(id, schema);
}
}
fn collect_descendents(&mut self) {
let mut direct_children: HashMap<String, Vec<String>> = HashMap::new();
fn expand_family(
schema: &mut crate::database::schema::Schema,
direct_refs: &std::collections::HashMap<String, Vec<String>>,
) {
if let Some(family_target) = &schema.obj.family {
let mut descendants = std::collections::HashSet::new();
Self::collect_descendants(family_target, direct_refs, &mut descendants);
// First pass: Find all schemas that have a $ref to another schema
let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
for id in schema_ids {
if let Some(ref_str) = self.schemas.get(&id).and_then(|s| s.obj.ref_string.clone()) {
if self.schemas.contains_key(&ref_str) {
direct_children.entry(ref_str).or_default().push(id.clone());
}
}
}
// the "$family" macro is logically replaced by an anyOf of its descendants + itself
let mut derived_any_of = Vec::new();
// Now compute descendants for all schemas
let mut descendants_map: HashMap<String, Vec<String>> = HashMap::new();
for key in self.schemas.keys() {
let mut descendants = Vec::new();
let mut queue = Vec::new();
if let Some(children) = direct_children.get(key) {
queue.extend(children.iter().cloned());
// Include the target base itself if valid (which it always is structurally)
let mut base_ref = crate::database::schema::SchemaObject::default();
base_ref.ref_string = Some(family_target.clone());
derived_any_of.push(std::sync::Arc::new(crate::database::schema::Schema {
obj: base_ref,
always_fail: false,
}));
// Sort descendants for determinism during testing
let mut desc_vec: Vec<String> = descendants.into_iter().collect();
desc_vec.sort();
for child_id in desc_vec {
let mut child_ref = crate::database::schema::SchemaObject::default();
child_ref.ref_string = Some(child_id);
derived_any_of.push(std::sync::Arc::new(crate::database::schema::Schema {
obj: child_ref,
always_fail: false,
}));
}
let mut visited = std::collections::HashSet::new();
while let Some(child) = queue.pop() {
if visited.insert(child.clone()) {
descendants.push(child.clone());
if let Some(grandchildren) = direct_children.get(&child) {
queue.extend(grandchildren.iter().cloned());
}
}
}
descendants_map.insert(key.clone(), descendants);
}
self.descendants = descendants_map;
}
fn compile_allowed_types(&mut self) {
// 1. Identify which types act as bases (table-backed schemas)
let mut entity_bases = HashMap::new();
for type_def in self.types.values() {
for type_schema in &type_def.schemas {
if let Some(id) = &type_schema.obj.id {
entity_bases.insert(id.clone(), type_def.name.clone());
}
}
}
// 2. Compute compiled_allowed_types for all descendants of entity bases
let mut allowed_types_map: HashMap<String, std::collections::HashSet<String>> = HashMap::new();
for base_id in entity_bases.keys() {
allowed_types_map.insert(
base_id.clone(),
self
.descendants
.get(base_id)
.unwrap_or(&vec![])
.iter()
.cloned()
.collect(),
);
if let Some(descendants) = self.descendants.get(base_id) {
let set: std::collections::HashSet<String> = descendants.iter().cloned().collect();
for desc_id in descendants {
allowed_types_map.insert(desc_id.clone(), set.clone());
}
}
}
// 3. Inject types into the schemas
let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
for id in schema_ids {
if let Some(set) = allowed_types_map.get(&id) {
if let Some(schema) = self.schemas.get_mut(&id) {
schema.obj.compiled_allowed_types = Some(set.clone());
}
}
schema.obj.any_of = Some(derived_any_of);
// Remove family so it doesn't cause conflicts or fail the simple validation
schema.obj.family = None;
}
}
fn compile_pointers(&mut self) {
let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
for id in schema_ids {
let mut compiled_ref = None;
if let Some(schema) = self.schemas.get(&id) {
if let Some(ref_str) = &schema.obj.ref_string {
if let Some(target) = self.schemas.get(ref_str) {
compiled_ref = Some(std::sync::Arc::new(target.clone()));
}
fn collect_descendants(
target: &str,
direct_refs: &std::collections::HashMap<String, Vec<String>>,
descendants: &mut std::collections::HashSet<String>,
) {
if let Some(children) = direct_refs.get(target) {
for child in children {
if descendants.insert(child.clone()) {
Self::collect_descendants(child, direct_refs, descendants);
}
}
if let Some(schema) = self.schemas.get_mut(&id) {
schema.obj.compiled_ref = compiled_ref;
}
}
}
}