jspg progress
This commit is contained in:
395
src/compiler.rs
Normal file
395
src/compiler.rs
Normal file
@ -0,0 +1,395 @@
|
||||
use crate::schema::Schema;
|
||||
use regex::Regex;
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::error::Error;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Represents a compiled format validator
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CompiledFormat {
|
||||
/// A simple function pointer validator
|
||||
Func(fn(&Value) -> Result<(), Box<dyn Error + Send + Sync>>),
|
||||
/// A regex-based validator
|
||||
Regex(Regex),
|
||||
}
|
||||
|
||||
/// A fully compiled schema with a root node and a pre-calculated index map.
|
||||
/// This allows O(1) lookup of any anchor or $id within the schema tree.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompiledSchema {
|
||||
pub root: Arc<Schema>,
|
||||
pub index: HashMap<String, Arc<Schema>>,
|
||||
}
|
||||
|
||||
/// A wrapper for compiled regex patterns
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompiledRegex(pub Regex);
|
||||
|
||||
/// The Compiler is responsible for pre-calculating high-cost schema operations
|
||||
pub struct Compiler;
|
||||
|
||||
impl Compiler {
|
||||
/// Internal: Compiles formats and regexes in-place
|
||||
fn compile_formats_and_regexes(schema: &mut Schema) {
|
||||
// 1. Compile Format
|
||||
if let Some(format_str) = &schema.format {
|
||||
if let Some(fmt) = crate::formats::FORMATS.get(format_str.as_str()) {
|
||||
schema.compiled_format = Some(CompiledFormat::Func(fmt.func));
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Compile Pattern (regex)
|
||||
if let Some(pattern_str) = &schema.pattern {
|
||||
if let Ok(re) = Regex::new(pattern_str) {
|
||||
schema.compiled_pattern = Some(CompiledRegex(re));
|
||||
}
|
||||
}
|
||||
|
||||
// 2.5 Compile Pattern Properties
|
||||
if let Some(pp) = &schema.pattern_properties {
|
||||
let mut compiled_pp = Vec::new();
|
||||
for (pattern, sub_schema) in pp {
|
||||
if let Ok(re) = Regex::new(pattern) {
|
||||
compiled_pp.push((CompiledRegex(re), sub_schema.clone()));
|
||||
} else {
|
||||
eprintln!(
|
||||
"Invalid patternProperty regex in schema (compile time): {}",
|
||||
pattern
|
||||
);
|
||||
}
|
||||
}
|
||||
if !compiled_pp.is_empty() {
|
||||
schema.compiled_pattern_properties = Some(compiled_pp);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Recurse
|
||||
Self::compile_recursive(schema);
|
||||
}
|
||||
|
||||
fn normalize_dependencies(schema: &mut Schema) {
|
||||
if let Some(deps) = schema.dependencies.take() {
|
||||
for (key, dep) in deps {
|
||||
match dep {
|
||||
crate::schema::Dependency::Props(props) => {
|
||||
schema
|
||||
.dependent_required
|
||||
.get_or_insert_with(std::collections::BTreeMap::new)
|
||||
.insert(key, props);
|
||||
}
|
||||
crate::schema::Dependency::Schema(sub_schema) => {
|
||||
schema
|
||||
.dependent_schemas
|
||||
.get_or_insert_with(std::collections::BTreeMap::new)
|
||||
.insert(key, sub_schema);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_recursive(schema: &mut Schema) {
|
||||
Self::normalize_dependencies(schema);
|
||||
|
||||
// Compile self
|
||||
if let Some(format_str) = &schema.format {
|
||||
if let Some(fmt) = crate::formats::FORMATS.get(format_str.as_str()) {
|
||||
schema.compiled_format = Some(CompiledFormat::Func(fmt.func));
|
||||
}
|
||||
}
|
||||
if let Some(pattern_str) = &schema.pattern {
|
||||
if let Ok(re) = Regex::new(pattern_str) {
|
||||
schema.compiled_pattern = Some(CompiledRegex(re));
|
||||
}
|
||||
}
|
||||
|
||||
// Recurse
|
||||
|
||||
if let Some(defs) = &mut schema.definitions {
|
||||
for s in defs.values_mut() {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
}
|
||||
if let Some(defs) = &mut schema.defs {
|
||||
for s in defs.values_mut() {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
}
|
||||
if let Some(props) = &mut schema.properties {
|
||||
for s in props.values_mut() {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
}
|
||||
|
||||
// ... Recurse logic ...
|
||||
if let Some(items) = &mut schema.items {
|
||||
Self::compile_recursive(Arc::make_mut(items));
|
||||
}
|
||||
if let Some(prefix_items) = &mut schema.prefix_items {
|
||||
for s in prefix_items {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
}
|
||||
if let Some(not) = &mut schema.not {
|
||||
Self::compile_recursive(Arc::make_mut(not));
|
||||
}
|
||||
if let Some(all_of) = &mut schema.all_of {
|
||||
for s in all_of {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
}
|
||||
if let Some(any_of) = &mut schema.any_of {
|
||||
for s in any_of {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
}
|
||||
if let Some(one_of) = &mut schema.one_of {
|
||||
for s in one_of {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
}
|
||||
if let Some(s) = &mut schema.if_ {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
if let Some(s) = &mut schema.then_ {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
if let Some(s) = &mut schema.else_ {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
|
||||
if let Some(ds) = &mut schema.dependent_schemas {
|
||||
for s in ds.values_mut() {
|
||||
Self::compile_recursive(Arc::make_mut(s));
|
||||
}
|
||||
}
|
||||
if let Some(pn) = &mut schema.property_names {
|
||||
Self::compile_recursive(Arc::make_mut(pn));
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively traverses the schema tree to build a map of all internal Anchors ($id) and JSON Pointers.
|
||||
fn compile_index(
|
||||
schema: &Arc<Schema>,
|
||||
index: &mut HashMap<String, Arc<Schema>>,
|
||||
parent_base: Option<String>,
|
||||
pointer: json_pointer::JsonPointer<String, Vec<String>>,
|
||||
) {
|
||||
// 1. Index using Parent Base (Path from Parent)
|
||||
if let Some(base) = &parent_base {
|
||||
// We use the pointer's string representation (e.g., "/definitions/foo")
|
||||
// and append it to the base.
|
||||
let fragment = pointer.to_string();
|
||||
let ptr_uri = if fragment.is_empty() {
|
||||
base.clone()
|
||||
} else {
|
||||
format!("{}#{}", base, fragment)
|
||||
};
|
||||
index.insert(ptr_uri, schema.clone());
|
||||
}
|
||||
|
||||
// 2. Determine Current Scope... (unchanged logic, just use pointer)
|
||||
let mut current_base = parent_base.clone();
|
||||
let mut child_pointer = pointer.clone();
|
||||
|
||||
if let Some(id) = &schema.obj.id {
|
||||
// ... resolve ID logic ...
|
||||
let mut new_base = None;
|
||||
if let Ok(_) = url::Url::parse(id) {
|
||||
new_base = Some(id.clone());
|
||||
} else if let Some(base) = ¤t_base {
|
||||
if let Ok(base_url) = url::Url::parse(base) {
|
||||
if let Ok(joined) = base_url.join(id) {
|
||||
new_base = Some(joined.to_string());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
new_base = Some(id.clone());
|
||||
}
|
||||
|
||||
if let Some(base) = new_base {
|
||||
index.insert(base.clone(), schema.clone());
|
||||
current_base = Some(base);
|
||||
child_pointer = json_pointer::JsonPointer::new(vec![]); // Reset
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Index by Anchor (unchanged)
|
||||
if let Some(anchor) = &schema.obj.anchor {
|
||||
if let Some(base) = ¤t_base {
|
||||
let anchor_uri = format!("{}#{}", base, anchor);
|
||||
index.insert(anchor_uri, schema.clone());
|
||||
}
|
||||
}
|
||||
// Index by Dynamic Anchor
|
||||
if let Some(d_anchor) = &schema.obj.dynamic_anchor {
|
||||
if let Some(base) = ¤t_base {
|
||||
let anchor_uri = format!("{}#{}", base, d_anchor);
|
||||
index.insert(anchor_uri.clone(), schema.clone());
|
||||
println!("Indexed Dynamic Anchor: {}", anchor_uri);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Index by Anchor
|
||||
if let Some(anchor) = &schema.obj.anchor {
|
||||
if let Some(base) = ¤t_base {
|
||||
let anchor_uri = format!("{}#{}", base, anchor);
|
||||
index.insert(anchor_uri.clone(), schema.clone());
|
||||
println!("Indexed Anchor: {}", anchor_uri);
|
||||
}
|
||||
}
|
||||
|
||||
// ... (Const/Enum indexing skipped for brevity, relies on string)
|
||||
|
||||
// 4. Recurse
|
||||
if let Some(defs) = schema.defs.as_ref().or(schema.definitions.as_ref()) {
|
||||
let segment = if schema.defs.is_some() {
|
||||
"$defs"
|
||||
} else {
|
||||
"definitions"
|
||||
};
|
||||
for (key, sub_schema) in defs {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push(segment.to_string());
|
||||
// Decode key to avoid double encoding by JsonPointer
|
||||
let decoded_key = percent_encoding::percent_decode_str(key).decode_utf8_lossy();
|
||||
sub.push(decoded_key.to_string());
|
||||
Self::compile_index(sub_schema, index, current_base.clone(), sub);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(props) = &schema.properties {
|
||||
for (key, sub_schema) in props {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("properties".to_string());
|
||||
sub.push(key.to_string());
|
||||
Self::compile_index(sub_schema, index, current_base.clone(), sub);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(items) = &schema.items {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("items".to_string());
|
||||
Self::compile_index(items, index, current_base.clone(), sub);
|
||||
}
|
||||
|
||||
if let Some(prefix_items) = &schema.prefix_items {
|
||||
for (i, sub_schema) in prefix_items.iter().enumerate() {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("prefixItems".to_string());
|
||||
sub.push(i.to_string());
|
||||
Self::compile_index(sub_schema, index, current_base.clone(), sub);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(all_of) = &schema.all_of {
|
||||
for (i, sub_schema) in all_of.iter().enumerate() {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("allOf".to_string());
|
||||
sub.push(i.to_string());
|
||||
Self::compile_index(sub_schema, index, current_base.clone(), sub);
|
||||
}
|
||||
}
|
||||
if let Some(any_of) = &schema.any_of {
|
||||
for (i, sub_schema) in any_of.iter().enumerate() {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("anyOf".to_string());
|
||||
sub.push(i.to_string());
|
||||
Self::compile_index(sub_schema, index, current_base.clone(), sub);
|
||||
}
|
||||
}
|
||||
if let Some(one_of) = &schema.one_of {
|
||||
for (i, sub_schema) in one_of.iter().enumerate() {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("oneOf".to_string());
|
||||
sub.push(i.to_string());
|
||||
Self::compile_index(sub_schema, index, current_base.clone(), sub);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(not) = &schema.not {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("not".to_string());
|
||||
Self::compile_index(not, index, current_base.clone(), sub);
|
||||
}
|
||||
if let Some(if_) = &schema.if_ {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("if".to_string());
|
||||
Self::compile_index(if_, index, current_base.clone(), sub);
|
||||
}
|
||||
if let Some(then_) = &schema.then_ {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("then".to_string());
|
||||
Self::compile_index(then_, index, current_base.clone(), sub);
|
||||
}
|
||||
if let Some(else_) = &schema.else_ {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("else".to_string());
|
||||
Self::compile_index(else_, index, current_base.clone(), sub);
|
||||
}
|
||||
if let Some(deps) = &schema.dependent_schemas {
|
||||
for (key, sub_schema) in deps {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("dependentSchemas".to_string());
|
||||
sub.push(key.to_string());
|
||||
Self::compile_index(sub_schema, index, current_base.clone(), sub);
|
||||
}
|
||||
}
|
||||
if let Some(pp) = &schema.pattern_properties {
|
||||
for (key, sub_schema) in pp {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("patternProperties".to_string());
|
||||
sub.push(key.to_string());
|
||||
Self::compile_index(sub_schema, index, current_base.clone(), sub);
|
||||
}
|
||||
}
|
||||
if let Some(contains) = &schema.contains {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("contains".to_string());
|
||||
Self::compile_index(contains, index, current_base.clone(), sub);
|
||||
}
|
||||
if let Some(property_names) = &schema.property_names {
|
||||
let mut sub = child_pointer.clone();
|
||||
sub.push("propertyNames".to_string());
|
||||
Self::compile_index(property_names, index, current_base.clone(), sub);
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolves a format string to a CompiledFormat (future optimization)
|
||||
pub fn compile_format(_format: &str) -> Option<CompiledFormat> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn compile(mut root_schema: Schema, root_id: Option<String>) -> CompiledSchema {
|
||||
// 1. Compile in-place (formats/regexes)
|
||||
Self::compile_formats_and_regexes(&mut root_schema);
|
||||
|
||||
// Apply root_id override if schema ID is missing
|
||||
if let Some(ref rid) = root_id {
|
||||
if root_schema.obj.id.is_none() {
|
||||
root_schema.obj.id = Some(rid.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Wrap in Arc
|
||||
let root = Arc::new(root_schema);
|
||||
let mut index = HashMap::new();
|
||||
|
||||
// 3. Build ID/Pointer Index
|
||||
// Default base_uri to "" so that pointers like "#/foo" are indexed even if no root ID exists
|
||||
Self::compile_index(
|
||||
&root,
|
||||
&mut index,
|
||||
root_id.clone().or(Some("".to_string())),
|
||||
json_pointer::JsonPointer::new(vec![]),
|
||||
);
|
||||
|
||||
// Also ensure root id is indexed if present
|
||||
if let Some(rid) = root_id {
|
||||
index.insert(rid, root.clone());
|
||||
}
|
||||
|
||||
CompiledSchema { root, index }
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user