jspg/src/lib.rs

use pgrx::*;

pg_module_magic!();

use serde_json::{json, Value};
use std::{collections::HashMap, sync::RwLock};
use boon::{Compiler, Schemas, ValidationError, SchemaIndex, CompileError};
use lazy_static::lazy_static;

struct BoonCache {
  schemas: Schemas,
  id_to_index: HashMap<String, SchemaIndex>,
}

lazy_static! {
  static ref SCHEMA_CACHE: RwLock<BoonCache> = RwLock::new(BoonCache {
    schemas: Schemas::new(),
    id_to_index: HashMap::new(),
  });
}

#[pg_extern(strict)]
fn cache_json_schema(schema_id: &str, schema: JsonB) -> JsonB {
  let mut cache = SCHEMA_CACHE.write().unwrap();
  let schema_value: Value = schema.0;
  let schema_path = format!("urn:{}", schema_id);

  let mut compiler = Compiler::new();
  compiler.enable_format_assertions();

  // Use schema_path when adding the resource
  if let Err(e) = compiler.add_resource(&schema_path, schema_value.clone()) {
    return JsonB(json!({
      "errors": [{
        "code": "SCHEMA_RESOURCE_ADD_FAILED",
        "message": format!("Failed to add schema resource '{}'", schema_id),
        "details": {
          "path": schema_path,
          "cause": format!("{}", e)
        }
      }]
    }));
  }

  // Use schema_path when compiling
  match compiler.compile(&schema_path, &mut cache.schemas) {
    Ok(sch_index) => {
      // Store the index using the original schema_id as the key
      cache.id_to_index.insert(schema_id.to_string(), sch_index);
      JsonB(json!({ "response": "success" }))
    }
    Err(e) => {
      let errors = match &e {
        CompileError::ValidationError { url: _url, src } => {
          // Collect leaf errors from the meta-schema validation failure
          let mut error_list = Vec::new();
          collect_validation_errors(src, &mut error_list);
          // Filter and format errors properly - no instance for schema compilation
          format_drop_errors(error_list, &schema_value)
        }
        _ => {
          // Other compilation errors
          vec![json!({
            "code": "SCHEMA_COMPILATION_FAILED",
            "message": format!("Schema '{}' compilation failed", schema_id),
            "details": {
              "path": schema_path,
              "cause": format!("{:?}", e)
            }
          })]
        }
      };
      JsonB(json!({ "errors": errors }))
    }
  }
}

#[pg_extern(strict, parallel_safe)]
fn validate_json_schema(schema_id: &str, instance: JsonB) -> JsonB {
  let cache = SCHEMA_CACHE.read().unwrap();

  // Lookup uses the original schema_id
  match cache.id_to_index.get(schema_id) {
    None => JsonB(json!({
      "errors": [{
        "code": "SCHEMA_NOT_FOUND",
        "message": format!("Schema '{}' not found in cache", schema_id),
        "details": {
          "cause": "Schema must be cached before validation"
        }
      }]
    })),
    Some(sch_index) => {
      let instance_value: Value = instance.0;
      match cache.schemas.validate(&instance_value, *sch_index) {
        Ok(_) => JsonB(json!({ "response": "success" })),
        Err(validation_error) => {
          let mut error_list = Vec::new();
          collect_validation_errors(&validation_error, &mut error_list);
          let errors = format_drop_errors(error_list, &instance_value);

            JsonB(json!({ "errors": errors }))
        }
      }
    }
  }
}

// Recursively collects validation errors
fn collect_validation_errors(error: &ValidationError, errors_list: &mut Vec<(String, String, String)>) {
  // Check if this is a structural error that we should skip
  let error_message = format!("{}", error.kind);
  let is_structural = error_message == "validation failed" ||
                     error_message == "allOf failed" ||
                     error_message == "anyOf failed" ||
                     error_message == "not failed" ||
                     error_message.starts_with("oneOf failed");

  if error.causes.is_empty() && !is_structural {
    // This is a leaf error that's not structural
    // Format just the error kind, not the whole validation error
    let message = format!("{}", error.kind);

    errors_list.push((
      error.instance_location.to_string(),
      error.schema_url.to_string(),
      message
    ));
  } else {
    // Recurse into causes
    for cause in &error.causes {
      collect_validation_errors(cause, errors_list);
    }
  }
}

// Formats errors according to DropError structure
fn format_drop_errors(raw_errors: Vec<(String, String, String)>, instance: &Value) -> Vec<Value> {
    use std::collections::HashMap;
    use std::collections::hash_map::Entry;

    // We don't filter structural paths from instance paths anymore
    // because instance paths shouldn't contain these segments anyway
    // The issue was likely with schema paths, not instance paths
    let plausible_errors = raw_errors;

    // 2. Deduplicate by instance_path and format as DropError
    let mut unique_errors: HashMap<String, Value> = HashMap::new();
    for (instance_path, schema_path, message) in plausible_errors {
        if let Entry::Vacant(entry) = unique_errors.entry(instance_path.clone()) {
            // Convert message to error code and make it human readable
            let (code, human_message) = enhance_error_message(&message);

            // Extract the failing value from the instance
            let failing_value = extract_value_at_path(instance, &instance_path);

            entry.insert(json!({
                "code": code,
                "message": human_message,
                "details": {
                    "path": schema_path,
                    "context": json!({
                        "instance_path": instance_path,
                        "failing_value": failing_value
                    }),
                    "cause": message // Original error message
                }
            }));
        }
    }

    unique_errors.into_values().collect()
}

// Helper function to extract value at a JSON pointer path
fn extract_value_at_path(instance: &Value, path: &str) -> Value {
    let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
    let mut current = instance;

    for part in parts {
        match current {
            Value::Object(map) => {
                if let Some(value) = map.get(part) {
                    current = value;
                } else {
                    return Value::Null;
                }
            }
            Value::Array(arr) => {
                if let Ok(index) = part.parse::<usize>() {
                    if let Some(value) = arr.get(index) {
                        current = value;
                    } else {
                        return Value::Null;
                    }
                } else {
                    return Value::Null;
                }
            }
            _ => return Value::Null,
        }
    }

    current.clone()
}

// Helper to convert validation messages to error codes and human-readable messages
fn enhance_error_message(message: &str) -> (String, String) {
    // Match exact boon error message patterns
    let trimmed = message.trim();

    if trimmed.contains("value must be one of") {
        ("ENUM_VIOLATED".to_string(),
         "Value is not one of the allowed options".to_string())
    } else if trimmed.contains("length must be >=") && trimmed.contains("but got") {
        ("MIN_LENGTH_VIOLATED".to_string(),
         "Field length is below the minimum required".to_string())
    } else if trimmed.contains("length must be <=") && trimmed.contains("but got") {
        ("MAX_LENGTH_VIOLATED".to_string(),
         "Field length exceeds the maximum allowed".to_string())
    } else if trimmed.contains("must be >=") && trimmed.contains("but got") {
        ("MINIMUM_VIOLATED".to_string(),
         "Value is below the minimum allowed".to_string())
    } else if trimmed.contains("must be <=") && trimmed.contains("but got") {
        ("MAXIMUM_VIOLATED".to_string(),
         "Value exceeds the maximum allowed".to_string())
    } else if trimmed.contains("must be >") && trimmed.contains("but got") {
        ("EXCLUSIVE_MINIMUM_VIOLATED".to_string(),
         "Value must be greater than the minimum".to_string())
    } else if trimmed.contains("must be <") && trimmed.contains("but got") {
        ("EXCLUSIVE_MAXIMUM_VIOLATED".to_string(),
         "Value must be less than the maximum".to_string())
    } else if trimmed.contains("does not match pattern") {
        ("PATTERN_VIOLATED".to_string(),
         "Value does not match the required pattern".to_string())
    } else if trimmed.contains("missing properties") {
        ("REQUIRED_FIELD_MISSING".to_string(),
         "Required field is missing".to_string())
    } else if trimmed.contains("want") && trimmed.contains("but got") {
        ("TYPE_MISMATCH".to_string(),
         "Field type does not match the expected type".to_string())
    } else if trimmed.starts_with("value must be") && !trimmed.contains("one of") {
        ("CONST_VIOLATED".to_string(),
         "Value does not match the required constant".to_string())
    } else if trimmed.contains("is not valid") && trimmed.contains(":") {
        ("FORMAT_INVALID".to_string(),
         extract_format_message(trimmed))
    } else if trimmed.contains("items at") && trimmed.contains("are equal") {
        ("UNIQUE_ITEMS_VIOLATED".to_string(),
         "Array contains duplicate items".to_string())
    } else if trimmed.contains("additionalProperties") && trimmed.contains("not allowed") {
        ("ADDITIONAL_PROPERTIES_NOT_ALLOWED".to_string(),
         "Object contains properties that are not allowed".to_string())
    } else if trimmed.contains("is not multipleOf") {
        ("MULTIPLE_OF_VIOLATED".to_string(),
         "Value is not a multiple of the required factor".to_string())
    } else if trimmed.contains("minimum") && trimmed.contains("properties required") {
        ("MIN_PROPERTIES_VIOLATED".to_string(),
         "Object has fewer properties than required".to_string())
    } else if trimmed.contains("maximum") && trimmed.contains("properties required") {
        ("MAX_PROPERTIES_VIOLATED".to_string(),
         "Object has more properties than allowed".to_string())
    } else if trimmed.contains("minimum") && trimmed.contains("items required") {
        ("MIN_ITEMS_VIOLATED".to_string(),
         "Array has fewer items than required".to_string())
    } else if trimmed.contains("maximum") && trimmed.contains("items required") {
        ("MAX_ITEMS_VIOLATED".to_string(),
         "Array has more items than allowed".to_string())
    } else if trimmed == "false schema" {
        ("FALSE_SCHEMA".to_string(),
         "Schema validation always fails".to_string())
    } else if trimmed == "not failed" {
        ("NOT_VIOLATED".to_string(),
         "Value matched a schema it should not match".to_string())
    } else if trimmed == "allOf failed" {
        ("ALL_OF_VIOLATED".to_string(),
         "Value does not match all required schemas".to_string())
    } else if trimmed == "anyOf failed" {
        ("ANY_OF_VIOLATED".to_string(),
         "Value does not match any of the allowed schemas".to_string())
    } else if trimmed.contains("oneOf failed") {
        ("ONE_OF_VIOLATED".to_string(),
         "Value must match exactly one schema".to_string())
    } else if trimmed == "validation failed" {
        ("VALIDATION_FAILED".to_string(),
         "Validation failed".to_string())
    } else {
        // For any unmatched patterns, try to provide a generic human-readable message
        // while preserving the original error in details.cause
        ("VALIDATION_FAILED".to_string(),
         "Validation failed".to_string())
    }
}


// Extract a better format message
fn extract_format_message(message: &str) -> String {
    if message.contains("date-time") {
        "Invalid date-time format".to_string()
    } else if message.contains("email") {
        "Invalid email format".to_string()
    } else if message.contains("uri") {
        "Invalid URI format".to_string()
    } else if message.contains("uuid") {
        "Invalid UUID format".to_string()
    } else {
        "Invalid format".to_string()
    }
}

#[pg_extern(strict, parallel_safe)]
fn json_schema_cached(schema_id: &str) -> bool {
  let cache = SCHEMA_CACHE.read().unwrap();
  cache.id_to_index.contains_key(schema_id)
}

#[pg_extern(strict)]
fn clear_json_schemas() -> JsonB {
  let mut cache = SCHEMA_CACHE.write().unwrap();
  *cache = BoonCache {
    schemas: Schemas::new(),
    id_to_index: HashMap::new(),
  };
  JsonB(json!({ "response": "success" }))
}

#[pg_extern(strict, parallel_safe)]
fn show_json_schemas() -> JsonB {
  let cache = SCHEMA_CACHE.read().unwrap();
  let ids: Vec<String> = cache.id_to_index.keys().cloned().collect();
  JsonB(json!({ "response": ids }))
}

/// This module is required by `cargo pgrx test` invocations.
/// It must be visible at the root of your extension crate.
#[cfg(test)]
pub mod pg_test {
  pub fn setup(_options: Vec<&str>) {
    // perform one-off initialization when the pg_test framework starts
  }

  #[must_use]
  pub fn postgresql_conf_options() -> Vec<&'static str> {
    // return any postgresql.conf settings that are required for your tests
    vec![]
  }
}


#[cfg(any(test, feature = "pg_test"))]
#[pg_schema]
mod tests {
  include!("tests.rs");
}