validator refactor progress

This commit is contained in:
2026-03-03 00:13:37 -05:00
parent e14f53e7d9
commit 3898c43742
81 changed files with 6331 additions and 7934 deletions

356
src/database/schema.rs Normal file
View File

@ -0,0 +1,356 @@
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::BTreeMap;
use std::sync::Arc;
// Schema mirrors the Go Punc Generator's schema struct for consistency.
// It is an order-preserving representation of a JSON Schema.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct SchemaObject {
// Core Schema Keywords
#[serde(rename = "$id")]
pub id: Option<String>,
#[serde(rename = "$ref")]
pub ref_string: Option<String>,
/*
Note: The `Ref` field in the Go struct is a pointer populated by the linker.
In Rust, we might handle this differently (e.g., separate lookup or Rc/Arc),
so we omit the direct recursive `Ref` field for now and rely on `ref_string`.
*/
pub description: Option<String>,
pub title: Option<String>,
#[serde(default)] // Allow missing type
#[serde(rename = "type")]
pub type_: Option<SchemaTypeOrArray>, // Handles string or array of strings
// Object Keywords
pub properties: Option<BTreeMap<String, Arc<Schema>>>,
#[serde(rename = "patternProperties")]
pub pattern_properties: Option<BTreeMap<String, Arc<Schema>>>,
#[serde(rename = "additionalProperties")]
pub additional_properties: Option<Arc<Schema>>,
#[serde(rename = "$family")]
pub family: Option<String>,
pub required: Option<Vec<String>>,
// dependencies can be schema dependencies or property dependencies
pub dependencies: Option<BTreeMap<String, Dependency>>,
// Array Keywords
#[serde(rename = "items")]
pub items: Option<Arc<Schema>>,
#[serde(rename = "prefixItems")]
pub prefix_items: Option<Vec<Arc<Schema>>>,
// String Validation
#[serde(rename = "minLength")]
pub min_length: Option<f64>,
#[serde(rename = "maxLength")]
pub max_length: Option<f64>,
pub pattern: Option<String>,
// Array Validation
#[serde(rename = "minItems")]
pub min_items: Option<f64>,
#[serde(rename = "maxItems")]
pub max_items: Option<f64>,
#[serde(rename = "uniqueItems")]
pub unique_items: Option<bool>,
#[serde(rename = "contains")]
pub contains: Option<Arc<Schema>>,
#[serde(rename = "minContains")]
pub min_contains: Option<f64>,
#[serde(rename = "maxContains")]
pub max_contains: Option<f64>,
// Object Validation
#[serde(rename = "minProperties")]
pub min_properties: Option<f64>,
#[serde(rename = "maxProperties")]
pub max_properties: Option<f64>,
#[serde(rename = "propertyNames")]
pub property_names: Option<Arc<Schema>>,
// Numeric Validation
pub format: Option<String>,
#[serde(rename = "enum")]
pub enum_: Option<Vec<Value>>, // `enum` is a reserved keyword in Rust
#[serde(
default,
rename = "const",
deserialize_with = "crate::validator::util::deserialize_some"
)]
pub const_: Option<Value>,
// Numeric Validation
#[serde(rename = "multipleOf")]
pub multiple_of: Option<f64>,
pub minimum: Option<f64>,
pub maximum: Option<f64>,
#[serde(rename = "exclusiveMinimum")]
pub exclusive_minimum: Option<f64>,
#[serde(rename = "exclusiveMaximum")]
pub exclusive_maximum: Option<f64>,
// Combining Keywords
#[serde(rename = "allOf")]
pub all_of: Option<Vec<Arc<Schema>>>,
#[serde(rename = "anyOf")]
pub any_of: Option<Vec<Arc<Schema>>>,
#[serde(rename = "oneOf")]
pub one_of: Option<Vec<Arc<Schema>>>,
#[serde(rename = "not")]
pub not: Option<Arc<Schema>>,
#[serde(rename = "if")]
pub if_: Option<Arc<Schema>>,
#[serde(rename = "then")]
pub then_: Option<Arc<Schema>>,
#[serde(rename = "else")]
pub else_: Option<Arc<Schema>>,
// Custom Vocabularies
pub form: Option<Vec<String>>,
pub display: Option<Vec<String>>,
#[serde(rename = "enumNames")]
pub enum_names: Option<Vec<String>>,
pub control: Option<String>,
pub actions: Option<BTreeMap<String, Action>>,
pub computer: Option<String>,
#[serde(default)]
pub extensible: Option<bool>,
// Compiled Fields (Hidden from JSON/Serde)
#[serde(skip)]
pub compiled_ref: Option<Arc<Schema>>,
#[serde(skip)]
pub compiled_allowed_types: Option<std::collections::HashSet<String>>,
#[serde(skip)]
pub compiled_format: Option<CompiledFormat>,
#[serde(skip)]
pub compiled_pattern: Option<CompiledRegex>,
#[serde(skip)]
pub compiled_pattern_properties: Option<Vec<(CompiledRegex, Arc<Schema>)>>,
}
pub enum ResolvedRef<'a> {
Local(&'a Schema),
Global(&'a Schema, &'a Schema),
}
/// Represents a compiled format validator
#[derive(Clone)]
pub enum CompiledFormat {
Func(fn(&serde_json::Value) -> Result<(), Box<dyn std::error::Error + Send + Sync>>),
Regex(regex::Regex),
}
impl std::fmt::Debug for CompiledFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CompiledFormat::Func(_) => write!(f, "CompiledFormat::Func(...)"),
CompiledFormat::Regex(r) => write!(f, "CompiledFormat::Regex({:?})", r),
}
}
}
/// A wrapper for compiled regex patterns
#[derive(Debug, Clone)]
pub struct CompiledRegex(pub regex::Regex);
#[derive(Debug, Clone, Serialize)]
pub struct Schema {
#[serde(flatten)]
pub obj: SchemaObject,
#[serde(skip)]
pub always_fail: bool,
}
impl Default for Schema {
fn default() -> Self {
Schema {
obj: SchemaObject::default(),
always_fail: false,
}
}
}
impl std::ops::Deref for Schema {
type Target = SchemaObject;
fn deref(&self) -> &Self::Target {
&self.obj
}
}
impl std::ops::DerefMut for Schema {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.obj
}
}
impl Schema {
pub fn resolve_ref(&self, _ref_string: &str) -> Option<&Arc<Schema>> {
// This is vestigial for now. References are global pointers. We will remove this shortly.
None
}
pub fn compile(&mut self) {
if let Some(format_str) = &self.obj.format {
if let Some(fmt) = crate::database::formats::FORMATS.get(format_str.as_str()) {
self.obj.compiled_format = Some(crate::database::schema::CompiledFormat::Func(fmt.func));
}
}
if let Some(pattern_str) = &self.obj.pattern {
if let Ok(re) = regex::Regex::new(pattern_str) {
self.obj.compiled_pattern = Some(crate::database::schema::CompiledRegex(re));
}
}
if let Some(pattern_props) = &self.obj.pattern_properties {
let mut compiled = Vec::new();
for (k, v) in pattern_props {
if let Ok(re) = regex::Regex::new(k) {
compiled.push((crate::database::schema::CompiledRegex(re), v.clone()));
}
}
if !compiled.is_empty() {
self.obj.compiled_pattern_properties = Some(compiled);
}
}
// Crawl children recursively to compile their internals
if let Some(props) = &mut self.obj.properties {
for (_, v) in props {
// Safe deep mutation workaround without unsafe Arc unwrap
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
}
if let Some(arr) = &mut self.obj.prefix_items {
for v in arr.iter_mut() {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
}
if let Some(arr) = &mut self.obj.all_of {
for v in arr.iter_mut() {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
}
if let Some(arr) = &mut self.obj.any_of {
for v in arr.iter_mut() {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
}
if let Some(arr) = &mut self.obj.one_of {
for v in arr.iter_mut() {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
}
if let Some(v) = &mut self.obj.additional_properties {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.items {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.contains {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.property_names {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.not {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.if_ {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.then_ {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
if let Some(v) = &mut self.obj.else_ {
let mut inner = (**v).clone();
inner.compile();
*v = Arc::new(inner);
}
}
}
impl<'de> Deserialize<'de> for Schema {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let v: Value = Deserialize::deserialize(deserializer)?;
if let Some(b) = v.as_bool() {
let mut obj = SchemaObject::default();
if b {
obj.extensible = Some(true);
}
return Ok(Schema {
obj,
always_fail: !b,
});
}
let obj: SchemaObject = serde_json::from_value(v.clone()).map_err(serde::de::Error::custom)?;
Ok(Schema {
obj,
always_fail: false,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum SchemaTypeOrArray {
Single(String),
Multiple(Vec<String>),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Action {
pub navigate: Option<String>,
pub punc: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum Dependency {
Props(Vec<String>),
Schema(Arc<Schema>),
}