significantly simplified the validator and work continues on query

This commit is contained in:
2026-03-03 17:58:31 -05:00
parent 3898c43742
commit e7f20e2cb6
58 changed files with 5446 additions and 5693 deletions

View File

@ -124,7 +124,7 @@ pub struct SchemaObject {
#[serde(skip)]
pub compiled_ref: Option<Arc<Schema>>,
#[serde(skip)]
pub compiled_allowed_types: Option<std::collections::HashSet<String>>,
pub compiled_variations: Option<std::collections::HashSet<String>>,
#[serde(skip)]
pub compiled_format: Option<CompiledFormat>,
#[serde(skip)]
@ -133,11 +133,6 @@ pub struct SchemaObject {
pub compiled_pattern_properties: Option<Vec<(CompiledRegex, Arc<Schema>)>>,
}
pub enum ResolvedRef<'a> {
Local(&'a Schema),
Global(&'a Schema, &'a Schema),
}
/// Represents a compiled format validator
#[derive(Clone)]
pub enum CompiledFormat {
@ -188,12 +183,9 @@ impl std::ops::DerefMut for Schema {
}
impl Schema {
pub fn resolve_ref(&self, _ref_string: &str) -> Option<&Arc<Schema>> {
// This is vestigial for now. References are global pointers. We will remove this shortly.
None
}
pub fn compile_internals(&mut self) {
self.map_children(|child| child.compile_internals());
pub fn compile(&mut self) {
if let Some(format_str) = &self.obj.format {
if let Some(fmt) = crate::database::formats::FORMATS.get(format_str.as_str()) {
self.obj.compiled_format = Some(crate::database::schema::CompiledFormat::Func(fmt.func));
@ -217,96 +209,112 @@ impl Schema {
self.obj.compiled_pattern_properties = Some(compiled);
}
}
}
// Crawl children recursively to compile their internals
pub fn link_refs(&mut self, schemas: &std::collections::HashMap<String, Schema>) {
if let Some(ref_str) = &self.obj.ref_string {
if let Some(target) = schemas.get(ref_str) {
self.obj.compiled_ref = Some(Arc::new(target.clone()));
// Viral Infection: Inherit physical entity boundaries across the $ref pointer recursively
if self.obj.compiled_variations.is_none() {
let mut visited = std::collections::HashSet::new();
self.obj.compiled_variations = Self::resolve_variations(ref_str, schemas, &mut visited);
}
}
}
self.map_children(|child| child.link_refs(schemas));
}
fn resolve_variations(
ref_str: &str,
schemas: &std::collections::HashMap<String, Schema>,
visited: &mut std::collections::HashSet<String>,
) -> Option<std::collections::HashSet<String>> {
if !visited.insert(ref_str.to_string()) {
return None; // Cycle detected
}
if let Some(target) = schemas.get(ref_str) {
if let Some(vars) = &target.obj.compiled_variations {
return Some(vars.clone());
}
if let Some(next_ref) = &target.obj.ref_string {
return Self::resolve_variations(next_ref, schemas, visited);
}
}
None
}
pub fn stamp_variations(&mut self, variations: Option<std::collections::HashSet<String>>) {
self.obj.compiled_variations = variations.clone();
self.map_children(|child| child.stamp_variations(variations.clone()));
}
pub fn harvest(&mut self, to_insert: &mut Vec<(String, Schema)>) {
if let Some(id) = &self.obj.id {
to_insert.push((id.clone(), self.clone()));
}
self.map_children(|child| child.harvest(to_insert));
}
pub fn map_children<F>(&mut self, mut f: F)
where
F: FnMut(&mut Schema),
{
if let Some(props) = &mut self.obj.properties {
for (_, v) in props {
// Safe deep mutation workaround without unsafe Arc unwrap
let mut inner = (**v).clone();
inner.compile();
f(&mut inner);
*v = Arc::new(inner);
}
}
if let Some(pattern_props) = &mut self.obj.pattern_properties {
for (_, v) in pattern_props {
let mut inner = (**v).clone();
f(&mut inner);
*v = Arc::new(inner);
}
}
let mut map_arr = |arr: &mut Vec<Arc<Schema>>| {
for v in arr.iter_mut() {
let mut inner = (**v).clone();
f(&mut inner);
*v = Arc::new(inner);
}
};
if let Some(arr) = &mut self.obj.prefix_items {
for v in arr.iter_mut() {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
map_arr(arr);
}
if let Some(arr) = &mut self.obj.all_of {
for v in arr.iter_mut() {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
map_arr(arr);
}
if let Some(arr) = &mut self.obj.any_of {
for v in arr.iter_mut() {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
map_arr(arr);
}
if let Some(arr) = &mut self.obj.one_of {
for v in arr.iter_mut() {
map_arr(arr);
}
let mut map_opt = |opt: &mut Option<Arc<Schema>>| {
if let Some(v) = opt {
let mut inner = (**v).clone();
inner.compile();
f(&mut inner);
*v = Arc::new(inner);
}
}
};
if let Some(v) = &mut self.obj.additional_properties {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.items {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.contains {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.property_names {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.not {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.if_ {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.then_ {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.else_ {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
map_opt(&mut self.obj.additional_properties);
map_opt(&mut self.obj.items);
map_opt(&mut self.obj.contains);
map_opt(&mut self.obj.property_names);
map_opt(&mut self.obj.not);
map_opt(&mut self.obj.if_);
map_opt(&mut self.obj.then_);
map_opt(&mut self.obj.else_);
}
}
@ -327,7 +335,37 @@ impl<'de> Deserialize<'de> for Schema {
always_fail: !b,
});
}
let obj: SchemaObject = serde_json::from_value(v.clone()).map_err(serde::de::Error::custom)?;
let mut obj: SchemaObject =
serde_json::from_value(v.clone()).map_err(serde::de::Error::custom)?;
// If a schema is effectively empty (except for potentially carrying an ID),
// it functions as a boolean `true` schema in Draft2020 which means it should not
// restrict additional properties natively
let is_empty = obj.type_.is_none()
&& obj.properties.is_none()
&& obj.pattern_properties.is_none()
&& obj.additional_properties.is_none()
&& obj.required.is_none()
&& obj.dependencies.is_none()
&& obj.items.is_none()
&& obj.prefix_items.is_none()
&& obj.contains.is_none()
&& obj.format.is_none()
&& obj.enum_.is_none()
&& obj.const_.is_none()
&& obj.all_of.is_none()
&& obj.any_of.is_none()
&& obj.one_of.is_none()
&& obj.not.is_none()
&& obj.if_.is_none()
&& obj.then_.is_none()
&& obj.else_.is_none()
&& obj.ref_string.is_none()
&& obj.family.is_none();
if is_empty && obj.extensible.is_none() {
obj.extensible = Some(true);
}
Ok(Schema {
obj,