significantly simplified the validator and work continues on query
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -16,7 +16,6 @@ pub struct Database {
|
||||
pub types: HashMap<String, Type>,
|
||||
pub puncs: HashMap<String, Punc>,
|
||||
pub schemas: HashMap<String, Schema>,
|
||||
pub descendants: HashMap<String, Vec<String>>,
|
||||
}
|
||||
|
||||
impl Database {
|
||||
@ -26,7 +25,6 @@ impl Database {
|
||||
types: HashMap::new(),
|
||||
puncs: HashMap::new(),
|
||||
schemas: HashMap::new(),
|
||||
descendants: HashMap::new(),
|
||||
};
|
||||
|
||||
if let Some(arr) = val.get("enums").and_then(|v| v.as_array()) {
|
||||
@ -75,146 +73,137 @@ impl Database {
|
||||
fn compile(&mut self) -> Result<(), String> {
|
||||
self.collect_schemas();
|
||||
|
||||
// 1. Compile regex and formats sequentially
|
||||
for schema in self.schemas.values_mut() {
|
||||
schema.compile();
|
||||
// 1. Build a structural descendant graph for $family macro expansion
|
||||
let mut direct_refs: std::collections::HashMap<String, Vec<String>> =
|
||||
std::collections::HashMap::new();
|
||||
for (id, schema) in &self.schemas {
|
||||
if let Some(ref_str) = &schema.obj.ref_string {
|
||||
direct_refs
|
||||
.entry(ref_str.clone())
|
||||
.or_default()
|
||||
.push(id.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Compute the Unified Semantic Graph (descendants)
|
||||
self.collect_descendents();
|
||||
let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
|
||||
|
||||
// 3. For any schema representing a Postgres table, cache its allowed subclasses
|
||||
self.compile_allowed_types();
|
||||
// 2. Expand $family macros into oneOf blocks
|
||||
for id in &schema_ids {
|
||||
if let Some(schema) = self.schemas.get_mut(id) {
|
||||
schema.map_children(|mut child| {
|
||||
Self::expand_family(&mut child, &direct_refs);
|
||||
});
|
||||
Self::expand_family(schema, &direct_refs);
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Finally, securely link all string $refs into memory pointers (Arc)
|
||||
self.compile_pointers();
|
||||
let schemas_snap = self.schemas.clone();
|
||||
|
||||
// 3. Compile internals and link memory pointers
|
||||
for id in schema_ids {
|
||||
if let Some(schema) = self.schemas.get_mut(&id) {
|
||||
schema.compile_internals();
|
||||
schema.link_refs(&schemas_snap);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect_schemas(&mut self) {
|
||||
let mut to_insert = Vec::new();
|
||||
for (_, type_def) in &self.types {
|
||||
for schema in &type_def.schemas {
|
||||
if let Some(id) = &schema.obj.id {
|
||||
to_insert.push((id.clone(), schema.clone()));
|
||||
}
|
||||
|
||||
// Pass A: Entities - Compute Variations from hierarchies
|
||||
// `hierarchy` is an array of ancestors. E.g. `person` -> `['entity', 'user', 'person']`.
|
||||
// We map this backward so that `user`'s allowed variations = `['user', 'person']`.
|
||||
let mut variations_by_entity = std::collections::HashMap::new();
|
||||
for type_def in self.types.values() {
|
||||
for ancestor in &type_def.hierarchy {
|
||||
variations_by_entity
|
||||
.entry(ancestor.clone())
|
||||
.or_insert_with(std::collections::HashSet::new)
|
||||
.insert(type_def.name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Now stamp all exported entity schemas with their precise physical variations
|
||||
for (_, type_def) in &self.types {
|
||||
let allowed_strings = variations_by_entity
|
||||
.get(&type_def.name)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
for mut schema in type_def.schemas.clone() {
|
||||
schema.stamp_variations(Some(allowed_strings.clone()));
|
||||
schema.harvest(&mut to_insert);
|
||||
}
|
||||
}
|
||||
|
||||
// Pass B: APIs and Enums (No initial variations stamped)
|
||||
for (_, punc_def) in &self.puncs {
|
||||
for schema in &punc_def.schemas {
|
||||
if let Some(id) = &schema.obj.id {
|
||||
to_insert.push((id.clone(), schema.clone()));
|
||||
}
|
||||
for mut schema in punc_def.schemas.clone() {
|
||||
schema.harvest(&mut to_insert);
|
||||
}
|
||||
}
|
||||
for (_, enum_def) in &self.enums {
|
||||
for schema in &enum_def.schemas {
|
||||
if let Some(id) = &schema.obj.id {
|
||||
to_insert.push((id.clone(), schema.clone()));
|
||||
}
|
||||
for mut schema in enum_def.schemas.clone() {
|
||||
schema.harvest(&mut to_insert);
|
||||
}
|
||||
}
|
||||
|
||||
for (id, schema) in to_insert {
|
||||
self.schemas.insert(id, schema);
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_descendents(&mut self) {
|
||||
let mut direct_children: HashMap<String, Vec<String>> = HashMap::new();
|
||||
fn expand_family(
|
||||
schema: &mut crate::database::schema::Schema,
|
||||
direct_refs: &std::collections::HashMap<String, Vec<String>>,
|
||||
) {
|
||||
if let Some(family_target) = &schema.obj.family {
|
||||
let mut descendants = std::collections::HashSet::new();
|
||||
Self::collect_descendants(family_target, direct_refs, &mut descendants);
|
||||
|
||||
// First pass: Find all schemas that have a $ref to another schema
|
||||
let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
|
||||
for id in schema_ids {
|
||||
if let Some(ref_str) = self.schemas.get(&id).and_then(|s| s.obj.ref_string.clone()) {
|
||||
if self.schemas.contains_key(&ref_str) {
|
||||
direct_children.entry(ref_str).or_default().push(id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
// the "$family" macro is logically replaced by an anyOf of its descendants + itself
|
||||
let mut derived_any_of = Vec::new();
|
||||
|
||||
// Now compute descendants for all schemas
|
||||
let mut descendants_map: HashMap<String, Vec<String>> = HashMap::new();
|
||||
for key in self.schemas.keys() {
|
||||
let mut descendants = Vec::new();
|
||||
let mut queue = Vec::new();
|
||||
if let Some(children) = direct_children.get(key) {
|
||||
queue.extend(children.iter().cloned());
|
||||
// Include the target base itself if valid (which it always is structurally)
|
||||
let mut base_ref = crate::database::schema::SchemaObject::default();
|
||||
base_ref.ref_string = Some(family_target.clone());
|
||||
derived_any_of.push(std::sync::Arc::new(crate::database::schema::Schema {
|
||||
obj: base_ref,
|
||||
always_fail: false,
|
||||
}));
|
||||
|
||||
// Sort descendants for determinism during testing
|
||||
let mut desc_vec: Vec<String> = descendants.into_iter().collect();
|
||||
desc_vec.sort();
|
||||
|
||||
for child_id in desc_vec {
|
||||
let mut child_ref = crate::database::schema::SchemaObject::default();
|
||||
child_ref.ref_string = Some(child_id);
|
||||
derived_any_of.push(std::sync::Arc::new(crate::database::schema::Schema {
|
||||
obj: child_ref,
|
||||
always_fail: false,
|
||||
}));
|
||||
}
|
||||
|
||||
let mut visited = std::collections::HashSet::new();
|
||||
while let Some(child) = queue.pop() {
|
||||
if visited.insert(child.clone()) {
|
||||
descendants.push(child.clone());
|
||||
if let Some(grandchildren) = direct_children.get(&child) {
|
||||
queue.extend(grandchildren.iter().cloned());
|
||||
}
|
||||
}
|
||||
}
|
||||
descendants_map.insert(key.clone(), descendants);
|
||||
}
|
||||
self.descendants = descendants_map;
|
||||
}
|
||||
|
||||
fn compile_allowed_types(&mut self) {
|
||||
// 1. Identify which types act as bases (table-backed schemas)
|
||||
let mut entity_bases = HashMap::new();
|
||||
for type_def in self.types.values() {
|
||||
for type_schema in &type_def.schemas {
|
||||
if let Some(id) = &type_schema.obj.id {
|
||||
entity_bases.insert(id.clone(), type_def.name.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Compute compiled_allowed_types for all descendants of entity bases
|
||||
let mut allowed_types_map: HashMap<String, std::collections::HashSet<String>> = HashMap::new();
|
||||
for base_id in entity_bases.keys() {
|
||||
allowed_types_map.insert(
|
||||
base_id.clone(),
|
||||
self
|
||||
.descendants
|
||||
.get(base_id)
|
||||
.unwrap_or(&vec![])
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect(),
|
||||
);
|
||||
if let Some(descendants) = self.descendants.get(base_id) {
|
||||
let set: std::collections::HashSet<String> = descendants.iter().cloned().collect();
|
||||
for desc_id in descendants {
|
||||
allowed_types_map.insert(desc_id.clone(), set.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Inject types into the schemas
|
||||
let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
|
||||
for id in schema_ids {
|
||||
if let Some(set) = allowed_types_map.get(&id) {
|
||||
if let Some(schema) = self.schemas.get_mut(&id) {
|
||||
schema.obj.compiled_allowed_types = Some(set.clone());
|
||||
}
|
||||
}
|
||||
schema.obj.any_of = Some(derived_any_of);
|
||||
// Remove family so it doesn't cause conflicts or fail the simple validation
|
||||
schema.obj.family = None;
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_pointers(&mut self) {
|
||||
let schema_ids: Vec<String> = self.schemas.keys().cloned().collect();
|
||||
for id in schema_ids {
|
||||
let mut compiled_ref = None;
|
||||
|
||||
if let Some(schema) = self.schemas.get(&id) {
|
||||
if let Some(ref_str) = &schema.obj.ref_string {
|
||||
if let Some(target) = self.schemas.get(ref_str) {
|
||||
compiled_ref = Some(std::sync::Arc::new(target.clone()));
|
||||
}
|
||||
fn collect_descendants(
|
||||
target: &str,
|
||||
direct_refs: &std::collections::HashMap<String, Vec<String>>,
|
||||
descendants: &mut std::collections::HashSet<String>,
|
||||
) {
|
||||
if let Some(children) = direct_refs.get(target) {
|
||||
for child in children {
|
||||
if descendants.insert(child.clone()) {
|
||||
Self::collect_descendants(child, direct_refs, descendants);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(schema) = self.schemas.get_mut(&id) {
|
||||
schema.obj.compiled_ref = compiled_ref;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,7 +124,7 @@ pub struct SchemaObject {
|
||||
#[serde(skip)]
|
||||
pub compiled_ref: Option<Arc<Schema>>,
|
||||
#[serde(skip)]
|
||||
pub compiled_allowed_types: Option<std::collections::HashSet<String>>,
|
||||
pub compiled_variations: Option<std::collections::HashSet<String>>,
|
||||
#[serde(skip)]
|
||||
pub compiled_format: Option<CompiledFormat>,
|
||||
#[serde(skip)]
|
||||
@ -133,11 +133,6 @@ pub struct SchemaObject {
|
||||
pub compiled_pattern_properties: Option<Vec<(CompiledRegex, Arc<Schema>)>>,
|
||||
}
|
||||
|
||||
pub enum ResolvedRef<'a> {
|
||||
Local(&'a Schema),
|
||||
Global(&'a Schema, &'a Schema),
|
||||
}
|
||||
|
||||
/// Represents a compiled format validator
|
||||
#[derive(Clone)]
|
||||
pub enum CompiledFormat {
|
||||
@ -188,12 +183,9 @@ impl std::ops::DerefMut for Schema {
|
||||
}
|
||||
|
||||
impl Schema {
|
||||
pub fn resolve_ref(&self, _ref_string: &str) -> Option<&Arc<Schema>> {
|
||||
// This is vestigial for now. References are global pointers. We will remove this shortly.
|
||||
None
|
||||
}
|
||||
pub fn compile_internals(&mut self) {
|
||||
self.map_children(|child| child.compile_internals());
|
||||
|
||||
pub fn compile(&mut self) {
|
||||
if let Some(format_str) = &self.obj.format {
|
||||
if let Some(fmt) = crate::database::formats::FORMATS.get(format_str.as_str()) {
|
||||
self.obj.compiled_format = Some(crate::database::schema::CompiledFormat::Func(fmt.func));
|
||||
@ -217,96 +209,112 @@ impl Schema {
|
||||
self.obj.compiled_pattern_properties = Some(compiled);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Crawl children recursively to compile their internals
|
||||
pub fn link_refs(&mut self, schemas: &std::collections::HashMap<String, Schema>) {
|
||||
if let Some(ref_str) = &self.obj.ref_string {
|
||||
if let Some(target) = schemas.get(ref_str) {
|
||||
self.obj.compiled_ref = Some(Arc::new(target.clone()));
|
||||
|
||||
// Viral Infection: Inherit physical entity boundaries across the $ref pointer recursively
|
||||
if self.obj.compiled_variations.is_none() {
|
||||
let mut visited = std::collections::HashSet::new();
|
||||
self.obj.compiled_variations = Self::resolve_variations(ref_str, schemas, &mut visited);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.map_children(|child| child.link_refs(schemas));
|
||||
}
|
||||
|
||||
fn resolve_variations(
|
||||
ref_str: &str,
|
||||
schemas: &std::collections::HashMap<String, Schema>,
|
||||
visited: &mut std::collections::HashSet<String>,
|
||||
) -> Option<std::collections::HashSet<String>> {
|
||||
if !visited.insert(ref_str.to_string()) {
|
||||
return None; // Cycle detected
|
||||
}
|
||||
|
||||
if let Some(target) = schemas.get(ref_str) {
|
||||
if let Some(vars) = &target.obj.compiled_variations {
|
||||
return Some(vars.clone());
|
||||
}
|
||||
if let Some(next_ref) = &target.obj.ref_string {
|
||||
return Self::resolve_variations(next_ref, schemas, visited);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn stamp_variations(&mut self, variations: Option<std::collections::HashSet<String>>) {
|
||||
self.obj.compiled_variations = variations.clone();
|
||||
self.map_children(|child| child.stamp_variations(variations.clone()));
|
||||
}
|
||||
|
||||
pub fn harvest(&mut self, to_insert: &mut Vec<(String, Schema)>) {
|
||||
if let Some(id) = &self.obj.id {
|
||||
to_insert.push((id.clone(), self.clone()));
|
||||
}
|
||||
self.map_children(|child| child.harvest(to_insert));
|
||||
}
|
||||
|
||||
pub fn map_children<F>(&mut self, mut f: F)
|
||||
where
|
||||
F: FnMut(&mut Schema),
|
||||
{
|
||||
if let Some(props) = &mut self.obj.properties {
|
||||
for (_, v) in props {
|
||||
// Safe deep mutation workaround without unsafe Arc unwrap
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
f(&mut inner);
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(pattern_props) = &mut self.obj.pattern_properties {
|
||||
for (_, v) in pattern_props {
|
||||
let mut inner = (**v).clone();
|
||||
f(&mut inner);
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
}
|
||||
|
||||
let mut map_arr = |arr: &mut Vec<Arc<Schema>>| {
|
||||
for v in arr.iter_mut() {
|
||||
let mut inner = (**v).clone();
|
||||
f(&mut inner);
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(arr) = &mut self.obj.prefix_items {
|
||||
for v in arr.iter_mut() {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
map_arr(arr);
|
||||
}
|
||||
|
||||
if let Some(arr) = &mut self.obj.all_of {
|
||||
for v in arr.iter_mut() {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
map_arr(arr);
|
||||
}
|
||||
|
||||
if let Some(arr) = &mut self.obj.any_of {
|
||||
for v in arr.iter_mut() {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
map_arr(arr);
|
||||
}
|
||||
|
||||
if let Some(arr) = &mut self.obj.one_of {
|
||||
for v in arr.iter_mut() {
|
||||
map_arr(arr);
|
||||
}
|
||||
|
||||
let mut map_opt = |opt: &mut Option<Arc<Schema>>| {
|
||||
if let Some(v) = opt {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
f(&mut inner);
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(v) = &mut self.obj.additional_properties {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
|
||||
if let Some(v) = &mut self.obj.items {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
|
||||
if let Some(v) = &mut self.obj.contains {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
|
||||
if let Some(v) = &mut self.obj.property_names {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
|
||||
if let Some(v) = &mut self.obj.not {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
|
||||
if let Some(v) = &mut self.obj.if_ {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
|
||||
if let Some(v) = &mut self.obj.then_ {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
|
||||
if let Some(v) = &mut self.obj.else_ {
|
||||
let mut inner = (**v).clone();
|
||||
inner.compile();
|
||||
*v = Arc::new(inner);
|
||||
}
|
||||
map_opt(&mut self.obj.additional_properties);
|
||||
map_opt(&mut self.obj.items);
|
||||
map_opt(&mut self.obj.contains);
|
||||
map_opt(&mut self.obj.property_names);
|
||||
map_opt(&mut self.obj.not);
|
||||
map_opt(&mut self.obj.if_);
|
||||
map_opt(&mut self.obj.then_);
|
||||
map_opt(&mut self.obj.else_);
|
||||
}
|
||||
}
|
||||
|
||||
@ -327,7 +335,37 @@ impl<'de> Deserialize<'de> for Schema {
|
||||
always_fail: !b,
|
||||
});
|
||||
}
|
||||
let obj: SchemaObject = serde_json::from_value(v.clone()).map_err(serde::de::Error::custom)?;
|
||||
let mut obj: SchemaObject =
|
||||
serde_json::from_value(v.clone()).map_err(serde::de::Error::custom)?;
|
||||
|
||||
// If a schema is effectively empty (except for potentially carrying an ID),
|
||||
// it functions as a boolean `true` schema in Draft2020 which means it should not
|
||||
// restrict additional properties natively
|
||||
let is_empty = obj.type_.is_none()
|
||||
&& obj.properties.is_none()
|
||||
&& obj.pattern_properties.is_none()
|
||||
&& obj.additional_properties.is_none()
|
||||
&& obj.required.is_none()
|
||||
&& obj.dependencies.is_none()
|
||||
&& obj.items.is_none()
|
||||
&& obj.prefix_items.is_none()
|
||||
&& obj.contains.is_none()
|
||||
&& obj.format.is_none()
|
||||
&& obj.enum_.is_none()
|
||||
&& obj.const_.is_none()
|
||||
&& obj.all_of.is_none()
|
||||
&& obj.any_of.is_none()
|
||||
&& obj.one_of.is_none()
|
||||
&& obj.not.is_none()
|
||||
&& obj.if_.is_none()
|
||||
&& obj.then_.is_none()
|
||||
&& obj.else_.is_none()
|
||||
&& obj.ref_string.is_none()
|
||||
&& obj.family.is_none();
|
||||
|
||||
if is_empty && obj.extensible.is_none() {
|
||||
obj.extensible = Some(true);
|
||||
}
|
||||
|
||||
Ok(Schema {
|
||||
obj,
|
||||
|
||||
Reference in New Issue
Block a user