Files
jspg/src/database/schema.rs

720 lines
23 KiB
Rust

use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::BTreeMap;
use std::sync::Arc;
use std::sync::OnceLock;
pub fn serialize_once_lock<T: serde::Serialize, S: serde::Serializer>(
lock: &OnceLock<T>,
serializer: S,
) -> Result<S::Ok, S::Error> {
if let Some(val) = lock.get() {
val.serialize(serializer)
} else {
serializer.serialize_none()
}
}
pub fn is_once_lock_map_empty<K, V>(lock: &OnceLock<std::collections::BTreeMap<K, V>>) -> bool {
lock.get().map_or(true, |m| m.is_empty())
}
pub fn is_once_lock_vec_empty<T>(lock: &OnceLock<Vec<T>>) -> bool {
lock.get().map_or(true, |v| v.is_empty())
}
// Schema mirrors the Go Punc Generator's schema struct for consistency.
// It is an order-preserving representation of a JSON Schema.
pub fn deserialize_some<'de, D>(deserializer: D) -> Result<Option<Value>, D::Error>
where
D: serde::Deserializer<'de>,
{
let v = Value::deserialize(deserializer)?;
Ok(Some(v))
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct SchemaObject {
// Core Schema Keywords
#[serde(rename = "$id")]
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
#[serde(rename = "$ref")]
#[serde(skip_serializing_if = "Option::is_none")]
pub r#ref: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
#[serde(default)] // Allow missing type
#[serde(rename = "type")]
#[serde(skip_serializing_if = "Option::is_none")]
pub type_: Option<SchemaTypeOrArray>, // Handles string or array of strings
// Object Keywords
#[serde(skip_serializing_if = "Option::is_none")]
pub properties: Option<BTreeMap<String, Arc<Schema>>>,
#[serde(rename = "patternProperties")]
#[serde(skip_serializing_if = "Option::is_none")]
pub pattern_properties: Option<BTreeMap<String, Arc<Schema>>>,
#[serde(rename = "additionalProperties")]
#[serde(skip_serializing_if = "Option::is_none")]
pub additional_properties: Option<Arc<Schema>>,
#[serde(rename = "$family")]
#[serde(skip_serializing_if = "Option::is_none")]
pub family: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub required: Option<Vec<String>>,
// dependencies can be schema dependencies or property dependencies
#[serde(skip_serializing_if = "Option::is_none")]
pub dependencies: Option<BTreeMap<String, Dependency>>,
// Array Keywords
#[serde(rename = "items")]
#[serde(skip_serializing_if = "Option::is_none")]
pub items: Option<Arc<Schema>>,
#[serde(rename = "prefixItems")]
#[serde(skip_serializing_if = "Option::is_none")]
pub prefix_items: Option<Vec<Arc<Schema>>>,
// String Validation
#[serde(rename = "minLength")]
#[serde(skip_serializing_if = "Option::is_none")]
pub min_length: Option<f64>,
#[serde(rename = "maxLength")]
#[serde(skip_serializing_if = "Option::is_none")]
pub max_length: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub pattern: Option<String>,
// Array Validation
#[serde(rename = "minItems")]
#[serde(skip_serializing_if = "Option::is_none")]
pub min_items: Option<f64>,
#[serde(rename = "maxItems")]
#[serde(skip_serializing_if = "Option::is_none")]
pub max_items: Option<f64>,
#[serde(rename = "uniqueItems")]
#[serde(skip_serializing_if = "Option::is_none")]
pub unique_items: Option<bool>,
#[serde(rename = "contains")]
#[serde(skip_serializing_if = "Option::is_none")]
pub contains: Option<Arc<Schema>>,
#[serde(rename = "minContains")]
#[serde(skip_serializing_if = "Option::is_none")]
pub min_contains: Option<f64>,
#[serde(rename = "maxContains")]
#[serde(skip_serializing_if = "Option::is_none")]
pub max_contains: Option<f64>,
// Object Validation
#[serde(rename = "minProperties")]
#[serde(skip_serializing_if = "Option::is_none")]
pub min_properties: Option<f64>,
#[serde(rename = "maxProperties")]
#[serde(skip_serializing_if = "Option::is_none")]
pub max_properties: Option<f64>,
#[serde(rename = "propertyNames")]
#[serde(skip_serializing_if = "Option::is_none")]
pub property_names: Option<Arc<Schema>>,
// Numeric Validation
#[serde(skip_serializing_if = "Option::is_none")]
pub format: Option<String>,
#[serde(rename = "enum")]
#[serde(skip_serializing_if = "Option::is_none")]
pub enum_: Option<Vec<Value>>, // `enum` is a reserved keyword in Rust
#[serde(
default,
rename = "const",
deserialize_with = "crate::database::schema::deserialize_some"
)]
#[serde(skip_serializing_if = "Option::is_none")]
pub const_: Option<Value>,
// Numeric Validation
#[serde(rename = "multipleOf")]
#[serde(skip_serializing_if = "Option::is_none")]
pub multiple_of: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub minimum: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub maximum: Option<f64>,
#[serde(rename = "exclusiveMinimum")]
#[serde(skip_serializing_if = "Option::is_none")]
pub exclusive_minimum: Option<f64>,
#[serde(rename = "exclusiveMaximum")]
#[serde(skip_serializing_if = "Option::is_none")]
pub exclusive_maximum: Option<f64>,
// Combining Keywords
#[serde(rename = "allOf")]
#[serde(skip_serializing_if = "Option::is_none")]
pub all_of: Option<Vec<Arc<Schema>>>,
#[serde(rename = "oneOf")]
#[serde(skip_serializing_if = "Option::is_none")]
pub one_of: Option<Vec<Arc<Schema>>>,
#[serde(rename = "not")]
#[serde(skip_serializing_if = "Option::is_none")]
pub not: Option<Arc<Schema>>,
#[serde(rename = "if")]
#[serde(skip_serializing_if = "Option::is_none")]
pub if_: Option<Arc<Schema>>,
#[serde(rename = "then")]
#[serde(skip_serializing_if = "Option::is_none")]
pub then_: Option<Arc<Schema>>,
#[serde(rename = "else")]
#[serde(skip_serializing_if = "Option::is_none")]
pub else_: Option<Arc<Schema>>,
// Custom Vocabularies
#[serde(skip_serializing_if = "Option::is_none")]
pub form: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub display: Option<Vec<String>>,
#[serde(rename = "enumNames")]
#[serde(skip_serializing_if = "Option::is_none")]
pub enum_names: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub control: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub actions: Option<BTreeMap<String, Action>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub computer: Option<String>,
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub extensible: Option<bool>,
#[serde(rename = "compiledProperties")]
#[serde(skip_deserializing)]
#[serde(skip_serializing_if = "crate::database::schema::is_once_lock_vec_empty")]
#[serde(serialize_with = "crate::database::schema::serialize_once_lock")]
pub compiled_property_names: OnceLock<Vec<String>>,
#[serde(skip)]
pub compiled_properties: OnceLock<BTreeMap<String, Arc<Schema>>>,
#[serde(rename = "compiledEdges")]
#[serde(skip_deserializing)]
#[serde(skip_serializing_if = "crate::database::schema::is_once_lock_map_empty")]
#[serde(serialize_with = "crate::database::schema::serialize_once_lock")]
pub compiled_edges: OnceLock<BTreeMap<String, crate::database::edge::Edge>>,
#[serde(skip)]
pub compiled_format: OnceLock<CompiledFormat>,
#[serde(skip)]
pub compiled_pattern: OnceLock<CompiledRegex>,
#[serde(skip)]
pub compiled_pattern_properties: OnceLock<Vec<(CompiledRegex, Arc<Schema>)>>,
}
/// Represents a compiled format validator
#[derive(Clone)]
pub enum CompiledFormat {
Func(fn(&serde_json::Value) -> Result<(), Box<dyn std::error::Error + Send + Sync>>),
Regex(regex::Regex),
}
impl std::fmt::Debug for CompiledFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CompiledFormat::Func(_) => write!(f, "CompiledFormat::Func(...)"),
CompiledFormat::Regex(r) => write!(f, "CompiledFormat::Regex({:?})", r),
}
}
}
/// A wrapper for compiled regex patterns
#[derive(Debug, Clone)]
pub struct CompiledRegex(pub regex::Regex);
#[derive(Debug, Clone, Serialize, Default)]
pub struct Schema {
#[serde(flatten)]
pub obj: SchemaObject,
#[serde(skip)]
pub always_fail: bool,
}
impl std::ops::Deref for Schema {
type Target = SchemaObject;
fn deref(&self) -> &Self::Target {
&self.obj
}
}
impl std::ops::DerefMut for Schema {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.obj
}
}
impl Schema {
pub fn compile(
&self,
db: &crate::database::Database,
visited: &mut std::collections::HashSet<String>,
) {
if self.obj.compiled_properties.get().is_some() {
return;
}
if let Some(id) = &self.obj.id {
if !visited.insert(id.clone()) {
return; // Break cyclical resolution
}
}
if let Some(format_str) = &self.obj.format {
if let Some(fmt) = crate::database::formats::FORMATS.get(format_str.as_str()) {
let _ = self
.obj
.compiled_format
.set(crate::database::schema::CompiledFormat::Func(fmt.func));
}
}
if let Some(pattern_str) = &self.obj.pattern {
if let Ok(re) = regex::Regex::new(pattern_str) {
let _ = self
.obj
.compiled_pattern
.set(crate::database::schema::CompiledRegex(re));
}
}
if let Some(pattern_props) = &self.obj.pattern_properties {
let mut compiled = Vec::new();
for (k, v) in pattern_props {
if let Ok(re) = regex::Regex::new(k) {
compiled.push((crate::database::schema::CompiledRegex(re), v.clone()));
}
}
if !compiled.is_empty() {
let _ = self.obj.compiled_pattern_properties.set(compiled);
}
}
let mut props = std::collections::BTreeMap::new();
// 1. Resolve INHERITANCE dependencies first
if let Some(ref_id) = &self.obj.r#ref {
if let Some(parent) = db.schemas.get(ref_id) {
parent.compile(db, visited);
if let Some(p_props) = parent.obj.compiled_properties.get() {
props.extend(p_props.clone());
}
}
}
if let Some(all_of) = &self.obj.all_of {
for ao in all_of {
ao.compile(db, visited);
if let Some(ao_props) = ao.obj.compiled_properties.get() {
props.extend(ao_props.clone());
}
}
}
if let Some(then_schema) = &self.obj.then_ {
then_schema.compile(db, visited);
if let Some(t_props) = then_schema.obj.compiled_properties.get() {
props.extend(t_props.clone());
}
}
if let Some(else_schema) = &self.obj.else_ {
else_schema.compile(db, visited);
if let Some(e_props) = else_schema.obj.compiled_properties.get() {
props.extend(e_props.clone());
}
}
// 2. Add local properties
if let Some(local_props) = &self.obj.properties {
for (k, v) in local_props {
props.insert(k.clone(), v.clone());
}
}
// 3. Set the OnceLock!
let _ = self.obj.compiled_properties.set(props.clone());
let mut names: Vec<String> = props.keys().cloned().collect();
names.sort();
let _ = self.obj.compiled_property_names.set(names);
// 4. Compute Edges natively
let schema_edges = self.compile_edges(db, visited, &props);
let _ = self.obj.compiled_edges.set(schema_edges);
// 5. Build our inline children properties recursively NOW! (Depth-first search)
if let Some(local_props) = &self.obj.properties {
for child in local_props.values() {
child.compile(db, visited);
}
}
if let Some(items) = &self.obj.items {
items.compile(db, visited);
}
if let Some(pattern_props) = &self.obj.pattern_properties {
for child in pattern_props.values() {
child.compile(db, visited);
}
}
if let Some(additional_props) = &self.obj.additional_properties {
additional_props.compile(db, visited);
}
if let Some(one_of) = &self.obj.one_of {
for child in one_of {
child.compile(db, visited);
}
}
if let Some(arr) = &self.obj.prefix_items {
for child in arr {
child.compile(db, visited);
}
}
if let Some(child) = &self.obj.not {
child.compile(db, visited);
}
if let Some(child) = &self.obj.contains {
child.compile(db, visited);
}
if let Some(child) = &self.obj.property_names {
child.compile(db, visited);
}
if let Some(child) = &self.obj.if_ {
child.compile(db, visited);
}
if let Some(id) = &self.obj.id {
visited.remove(id);
}
}
#[allow(unused_variables)]
fn validate_identifier(id: &str, field_name: &str) -> Result<(), String> {
#[cfg(not(test))]
for c in id.chars() {
if !c.is_ascii_lowercase() && !c.is_ascii_digit() && c != '_' && c != '.' {
return Err(format!("Invalid character '{}' in JSON Schema '{}' property: '{}'. Identifiers must exclusively contain [a-z0-9_.]", c, field_name, id));
}
}
Ok(())
}
pub fn collect_schemas(
&mut self,
tracking_path: Option<String>,
to_insert: &mut Vec<(String, Schema)>,
) -> Result<(), String> {
if let Some(id) = &self.obj.id {
Self::validate_identifier(id, "$id")?;
to_insert.push((id.clone(), self.clone()));
}
if let Some(r#ref) = &self.obj.r#ref {
Self::validate_identifier(r#ref, "$ref")?;
}
if let Some(family) = &self.obj.family {
Self::validate_identifier(family, "$family")?;
}
// Is this schema an inline ad-hoc composition?
// Meaning it has a tracking context, lacks an explicit $id, but extends an Entity ref with explicit properties!
if self.obj.id.is_none() && self.obj.r#ref.is_some() && self.obj.properties.is_some() {
if let Some(ref path) = tracking_path {
to_insert.push((path.clone(), self.clone()));
}
}
// Provide the path origin to children natively, prioritizing the explicit `$id` boundary if one exists
let origin_path = self.obj.id.clone().or(tracking_path);
self.collect_child_schemas(origin_path, to_insert)?;
Ok(())
}
pub fn collect_child_schemas(
&mut self,
origin_path: Option<String>,
to_insert: &mut Vec<(String, Schema)>,
) -> Result<(), String> {
if let Some(props) = &mut self.obj.properties {
for (k, v) in props.iter_mut() {
let mut inner = (**v).clone();
let next_path = origin_path.as_ref().map(|o| format!("{}/{}", o, k));
inner.collect_schemas(next_path, to_insert)?;
*v = Arc::new(inner);
}
}
if let Some(pattern_props) = &mut self.obj.pattern_properties {
for (k, v) in pattern_props.iter_mut() {
let mut inner = (**v).clone();
let next_path = origin_path.as_ref().map(|o| format!("{}/{}", o, k));
inner.collect_schemas(next_path, to_insert)?;
*v = Arc::new(inner);
}
}
let mut map_arr = |arr: &mut Vec<Arc<Schema>>| -> Result<(), String> {
for v in arr.iter_mut() {
let mut inner = (**v).clone();
inner.collect_schemas(origin_path.clone(), to_insert)?;
*v = Arc::new(inner);
}
Ok(())
};
if let Some(arr) = &mut self.obj.prefix_items { map_arr(arr)?; }
if let Some(arr) = &mut self.obj.all_of { map_arr(arr)?; }
if let Some(arr) = &mut self.obj.one_of { map_arr(arr)?; }
let mut map_opt = |opt: &mut Option<Arc<Schema>>, pass_path: bool| -> Result<(), String> {
if let Some(v) = opt {
let mut inner = (**v).clone();
let next = if pass_path { origin_path.clone() } else { None };
inner.collect_schemas(next, to_insert)?;
*v = Arc::new(inner);
}
Ok(())
};
map_opt(&mut self.obj.additional_properties, false)?;
// `items` absolutely must inherit the EXACT property path assigned to the Array wrapper!
// This allows nested Arrays enclosing bare Entity structs to correctly register as the boundary mapping.
map_opt(&mut self.obj.items, true)?;
map_opt(&mut self.obj.not, false)?;
map_opt(&mut self.obj.contains, false)?;
map_opt(&mut self.obj.property_names, false)?;
map_opt(&mut self.obj.if_, false)?;
map_opt(&mut self.obj.then_, false)?;
map_opt(&mut self.obj.else_, false)?;
Ok(())
}
pub fn compile_edges(
&self,
db: &crate::database::Database,
visited: &mut std::collections::HashSet<String>,
props: &std::collections::BTreeMap<String, std::sync::Arc<Schema>>,
) -> std::collections::BTreeMap<String, crate::database::edge::Edge> {
let mut schema_edges = std::collections::BTreeMap::new();
let mut parent_type_name = None;
if let Some(family) = &self.obj.family {
parent_type_name = Some(family.split('.').next_back().unwrap_or(family).to_string());
} else if let Some(id) = &self.obj.id {
parent_type_name = Some(id.split('.').next_back().unwrap_or("").to_string());
} else if let Some(ref_id) = &self.obj.r#ref {
parent_type_name = Some(ref_id.split('.').next_back().unwrap_or("").to_string());
}
if let Some(p_type) = parent_type_name {
if db.types.contains_key(&p_type) {
for (prop_name, prop_schema) in props {
let mut child_type_name = None;
let mut target_schema = prop_schema.clone();
if let Some(crate::database::schema::SchemaTypeOrArray::Single(t)) =
&prop_schema.obj.type_
{
if t == "array" {
if let Some(items) = &prop_schema.obj.items {
target_schema = items.clone();
}
}
}
if let Some(family) = &target_schema.obj.family {
child_type_name = Some(family.split('.').next_back().unwrap_or(family).to_string());
} else if let Some(ref_id) = target_schema.obj.r#ref.as_ref() {
child_type_name = Some(ref_id.split('.').next_back().unwrap_or("").to_string());
} else if let Some(arr) = &target_schema.obj.one_of {
if let Some(first) = arr.first() {
if let Some(ref_id) = first.obj.id.as_ref().or(first.obj.r#ref.as_ref()) {
child_type_name = Some(ref_id.split('.').next_back().unwrap_or("").to_string());
}
}
}
if let Some(c_type) = child_type_name {
if db.types.contains_key(&c_type) {
target_schema.compile(db, visited);
if let Some(compiled_target_props) = target_schema.obj.compiled_properties.get() {
let keys_for_ambiguity: Vec<String> =
compiled_target_props.keys().cloned().collect();
if let Some((relation, is_forward)) =
resolve_relation(db, &p_type, &c_type, prop_name, Some(&keys_for_ambiguity))
{
schema_edges.insert(
prop_name.clone(),
crate::database::edge::Edge {
constraint: relation.constraint.clone(),
forward: is_forward,
},
);
}
}
}
}
}
}
}
schema_edges
}
}
pub(crate) fn resolve_relation<'a>(
db: &'a crate::database::Database,
parent_type: &str,
child_type: &str,
prop_name: &str,
relative_keys: Option<&Vec<String>>,
) -> Option<(&'a crate::database::relation::Relation, bool)> {
if parent_type == "entity" && child_type == "entity" {
return None;
}
let p_def = db.types.get(parent_type)?;
let c_def = db.types.get(child_type)?;
let mut matching_rels = Vec::new();
let mut directions = Vec::new();
for rel in db.relations.values() {
let is_forward = p_def.hierarchy.contains(&rel.source_type)
&& c_def.hierarchy.contains(&rel.destination_type);
let is_reverse = p_def.hierarchy.contains(&rel.destination_type)
&& c_def.hierarchy.contains(&rel.source_type);
if is_forward {
matching_rels.push(rel);
directions.push(true);
} else if is_reverse {
matching_rels.push(rel);
directions.push(false);
}
}
if matching_rels.is_empty() {
return None;
}
if matching_rels.len() == 1 {
return Some((matching_rels[0], directions[0]));
}
let mut chosen_idx = 0;
let mut resolved = false;
for (i, rel) in matching_rels.iter().enumerate() {
if let Some(prefix) = &rel.prefix {
if prop_name.starts_with(prefix)
|| prefix.starts_with(prop_name)
|| prefix.replace("_", "") == prop_name.replace("_", "")
{
chosen_idx = i;
resolved = true;
break;
}
}
}
if !resolved && relative_keys.is_some() {
let keys = relative_keys.unwrap();
let mut missing_prefix_ids = Vec::new();
for (i, rel) in matching_rels.iter().enumerate() {
if let Some(prefix) = &rel.prefix {
if !keys.contains(prefix) {
missing_prefix_ids.push(i);
}
}
}
if missing_prefix_ids.len() == 1 {
chosen_idx = missing_prefix_ids[0];
}
}
Some((matching_rels[chosen_idx], directions[chosen_idx]))
}
impl<'de> Deserialize<'de> for Schema {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let v: Value = Deserialize::deserialize(deserializer)?;
if let Some(b) = v.as_bool() {
let mut obj = SchemaObject::default();
if b {
obj.extensible = Some(true);
}
return Ok(Schema {
obj,
always_fail: !b,
});
}
let mut obj: SchemaObject =
serde_json::from_value(v.clone()).map_err(serde::de::Error::custom)?;
// If a schema is effectively empty (except for potentially carrying an ID),
// it functions as a boolean `true` schema in Draft2020 which means it should not
// restrict additional properties natively
let is_empty = obj.type_.is_none()
&& obj.properties.is_none()
&& obj.pattern_properties.is_none()
&& obj.additional_properties.is_none()
&& obj.required.is_none()
&& obj.dependencies.is_none()
&& obj.items.is_none()
&& obj.prefix_items.is_none()
&& obj.contains.is_none()
&& obj.format.is_none()
&& obj.enum_.is_none()
&& obj.const_.is_none()
&& obj.all_of.is_none()
&& obj.one_of.is_none()
&& obj.not.is_none()
&& obj.if_.is_none()
&& obj.then_.is_none()
&& obj.else_.is_none()
&& obj.r#ref.is_none()
&& obj.family.is_none();
if is_empty && obj.extensible.is_none() {
obj.extensible = Some(true);
}
Ok(Schema {
obj,
always_fail: false,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum SchemaTypeOrArray {
Single(String),
Multiple(Vec<String>),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Action {
#[serde(skip_serializing_if = "Option::is_none")]
pub navigate: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub punc: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum Dependency {
Props(Vec<String>),
Schema(Arc<Schema>),
}