Compare commits

...

75 Commits

Author SHA1 Message Date
9ddc899411 version: 1.0.40 2025-10-02 18:15:19 -04:00
a8d726ec73 unevaluatedProperties now cascade infinitely down their leaf when strict validation mode is on 2025-10-02 18:15:07 -04:00
6b6647f2d6 version: 1.0.39 2025-09-30 20:44:35 -04:00
d301d5fab9 types at root not strict 2025-09-30 20:44:17 -04:00
61511b595d added flow commands for testing validator vs jspg 2025-09-30 20:29:13 -04:00
c7ae975275 version: 1.0.38 2025-09-30 20:19:51 -04:00
aa58082cd7 boon test suite itself passing 2025-09-30 20:19:41 -04:00
491fb3a3e3 docs updated 2025-09-30 20:01:49 -04:00
fc939d84ee version: 1.0.37 2025-09-30 19:56:46 -04:00
d6b34c99bb jspg additional properties bug squashed 2025-09-30 19:56:34 -04:00
cc04f38c14 boon now included 2025-09-30 01:10:58 -04:00
c9b1245a57 version: 1.0.36 2025-09-12 23:00:53 -04:00
3d770b0831 fixed type mismatch checking to not fail fast and work through nested data 2025-09-12 22:59:27 -04:00
3fdbf60396 minor reorg no release 2025-09-12 15:43:20 -04:00
6610b069db version: 1.0.35 2025-09-12 01:02:45 -04:00
bb84f9aa73 implemented type match checking for types on schema id instead of type const 2025-09-12 01:02:32 -04:00
704770051c version: 1.0.34 2025-09-01 22:58:10 -04:00
88c77deede punc request and response moved to punc schemas 2025-09-01 22:58:01 -04:00
0184c244d9 version: 1.0.33 2025-08-27 03:30:25 -04:00
e40de2eb12 more improvements to ref tracking in json schemas and tests 2025-08-27 03:30:15 -04:00
5e55786e3e version: 1.0.32 2025-08-21 20:18:44 -04:00
6520413069 jspg updates for punc-v2 2025-08-21 20:18:32 -04:00
b97879ff61 version: 1.0.31 2025-07-08 07:27:14 -04:00
ea0b139f87 upgraded rust and pgrx versions 2025-07-08 07:27:05 -04:00
dccaa0a46e version: 1.0.30 2025-07-04 04:23:15 -04:00
441597e604 need to allow empty strings when a string property has a format 2025-07-04 04:23:06 -04:00
710598752f version: 1.0.29 2025-06-17 18:55:27 -04:00
5fbf64bac5 serializing ErrorKind directly to drop error cause 2025-06-17 18:55:16 -04:00
2dd17f0b37 version: 1.0.28 2025-06-12 22:27:59 -04:00
cbda45e610 fixed conditional errors with false schemas and unevaluatedProperties 2025-06-12 22:27:49 -04:00
1085964c17 version: 1.0.27 2025-06-12 17:07:37 -04:00
65971d9b93 splitting up errorkind paths to produce multiple drop errors 2025-06-12 17:07:28 -04:00
d938058d34 version: 1.0.26 2025-06-12 00:59:44 -04:00
69ab6165bb improvements to error handling again 2025-06-12 00:59:33 -04:00
03beada825 version: 1.0.25 2025-06-11 20:28:46 -04:00
efdd7528cc switched strict validation from additionalProperties to unevaluatedProperties to catch conditional properties automatically in verification 2025-06-11 20:28:39 -04:00
59395a33ac version: 1.0.24 2025-06-11 19:38:56 -04:00
92c0a6fc0b even more jspg improved error handling, missing some codes before 2025-06-11 19:38:46 -04:00
7f66a4a35a no-op 2025-06-10 16:01:58 -04:00
d37aadb0dd version: 1.0.23 2025-06-09 18:09:33 -04:00
d0ccc47d97 added strict validation option 2025-06-09 18:09:15 -04:00
2d19bf100e version: 1.0.22 2025-06-06 14:25:18 -04:00
fb333c6cbb slight improvements to error messaging 2025-06-06 14:25:13 -04:00
d8a9a7b76b version: 1.0.21 2025-06-06 14:05:24 -04:00
c9022aefb9 fixed env 2025-06-06 14:05:19 -04:00
ccf0465e45 fixed gitignore 2025-06-06 14:02:43 -04:00
dce50d9dc3 error handling improvements to jspg to match drop structure 2025-06-06 13:58:50 -04:00
8ec6a5b58a flow updates 2025-05-29 17:51:16 -04:00
6ef7e0c55e flow update 2025-04-25 13:34:06 -04:00
1cb5fb0ecf removed random .env 2025-04-25 12:22:07 -04:00
d66aae8ae2 flow update 2025-04-24 20:02:18 -04:00
3b18901bda version: 1.0.20 2025-04-21 17:11:30 -04:00
b8c0e08068 more filtering 2025-04-21 17:11:24 -04:00
c734983a59 version: 1.0.19 2025-04-21 16:15:08 -04:00
9b11f661bc fixed release bug 2025-04-21 16:15:02 -04:00
f3a733626e version: 1.0.18 2025-04-21 16:13:16 -04:00
2bcdb8adbb version: 1.0.17 2025-04-21 16:11:31 -04:00
3988308965 branch error filtering 2025-04-21 16:11:12 -04:00
b7f528d1f6 flow 2025-04-16 21:14:07 -04:00
2febb292dc flow update 2025-04-16 20:00:35 -04:00
d1831a28ec flow update 2025-04-16 19:34:09 -04:00
c5834ac544 flow updated 2025-04-16 18:07:41 -04:00
eb25f8489e version: 1.0.16 2025-04-16 14:43:07 -04:00
21937db8de improved compile schema error messages 2025-04-16 14:42:57 -04:00
28b689cac0 version: 1.0.15 2025-04-16 01:00:57 -04:00
cc04a1a8bb made errors consistent 2025-04-16 01:00:51 -04:00
3ceb8a0770 version: 1.0.14 2025-04-16 00:38:10 -04:00
499bf68b2a more error cleanup 2025-04-16 00:38:04 -04:00
6ca00f27e9 version: 1.0.13 2025-04-15 23:30:57 -04:00
520be66035 better error messaging 2025-04-15 23:30:47 -04:00
c3146ca433 flow update 2025-04-15 01:52:12 -04:00
b4d9628b05 version: 1.0.12 2025-04-15 00:25:39 -04:00
635d31d723 more validation fixes 2025-04-15 00:25:29 -04:00
08efcb92db version: 1.0.11 2025-04-14 21:53:39 -04:00
dad1216e1f more validation fixes 2025-04-14 21:53:30 -04:00
84 changed files with 15905 additions and 958 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
/target
/package
.env

979
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +1,23 @@
[workspace]
members = [
".",
"validator",
]
[package]
name = "jspg"
version = "0.1.0"
edition = "2021"
edition = "2024"
[dependencies]
pgrx = "0.14.0"
pgrx = "0.15.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
jsonschema = "0.29.1"
lazy_static = "1.5.0"
boon = "0.6.1"
boon = { path = "validator" }
[dev-dependencies]
pgrx-tests = "0.14.0"
pgrx-tests = "0.15.0"
[lib]
crate-type = ["cdylib", "lib"]

79
GEMINI.md Normal file
View File

@ -0,0 +1,79 @@
# Gemini Project Overview: `jspg`
This document outlines the purpose of the `jspg` project, its architecture, and the specific modifications made to the vendored `boon` JSON schema validator crate.
## What is `jspg`?
`jspg` is a PostgreSQL extension written in Rust using the `pgrx` framework. Its primary function is to provide fast, in-database JSON schema validation against the 2020-12 draft of the JSON Schema specification.
### How It Works
The extension is designed for high-performance scenarios where schemas are defined once and used many times for validation. It achieves this through an in-memory cache.
1. **Caching:** A user first calls the `cache_json_schemas(enums, types, puncs)` SQL function. This function takes arrays of JSON objects representing different kinds of schemas within a larger application framework. It uses the vendored `boon` crate to compile all these schemas into an efficient internal format and stores them in a static, in-memory `SCHEMA_CACHE`. This cache is managed by a `RwLock` to allow concurrent reads during validation.
2. **Validation:** The `validate_json_schema(schema_id, instance)` SQL function is then used to validate a JSONB `instance` against a specific, pre-cached schema identified by its `$id`. This function looks up the compiled schema in the cache and runs the validation, returning a success response or a detailed error report.
3. **Custom Logic:** `jspg` uses a locally modified (vendored) version of the `boon` crate. This allows for powerful, application-specific validation logic that goes beyond the standard JSON Schema specification, such as runtime-based strictness.
### Error Handling
When validation fails, `jspg` provides a detailed error report in a consistent JSON format, which we refer to as a "DropError". This process involves two main helper functions in `src/lib.rs`:
1. **`collect_errors`**: `boon` returns a nested tree of `ValidationError` objects. This function recursively traverses that tree to find the most specific, underlying causes of the failure. It filters out structural errors (like `allOf` or `anyOf`) to create a flat list of concrete validation failures.
2. **`format_errors`**: This function takes the flat list of errors and transforms each one into the final DropError JSON format. It also de-duplicates errors that occur at the same JSON Pointer path, ensuring a cleaner output if a single value violates multiple constraints.
#### DropError Format
A DropError object provides a clear, structured explanation of a validation failure:
```json
{
"code": "ADDITIONAL_PROPERTIES_NOT_ALLOWED",
"message": "Property 'extra' is not allowed",
"details": {
"path": "/extra",
"context": "not allowed",
"cause": {
"got": [
"extra"
]
},
"schema": "basic_strict_test.request"
}
}
```
- `code` (string): A machine-readable error code (e.g., `ADDITIONAL_PROPERTIES_NOT_ALLOWED`, `MIN_LENGTH_VIOLATED`).
- `message` (string): A human-readable summary of the error.
- `details` (object):
- `path` (string): The JSON Pointer path to the invalid data within the instance.
- `context` (any): The actual value that failed validation.
- `cause` (any): The low-level reason from the validator, often including the expected value (`want`) and the actual value (`got`).
- `schema` (string): The `$id` of the schema that was being validated.
---
## `boon` Crate Modifications
The version of `boon` located in the `validator/` directory has been significantly modified to support runtime-based strict validation. The original `boon` crate only supports compile-time strictness and lacks the necessary mechanisms to propagate validation context correctly for our use case.
### 1. Recursive Runtime Strictness Control
- **Problem:** The `jspg` project requires that certain schemas (specifically those for public `puncs` and global `type`s) enforce a strict "no extra properties" policy. This strictness needs to be decided at runtime and must cascade through the entire validation hierarchy, including all nested objects and `$ref` chains. A compile-time flag was unsuitable because it would incorrectly apply strictness to shared, reusable schemas.
- **Solution:** A runtime validation option was implemented to enforce strictness recursively. This required several coordinated changes to the `boon` validator.
#### Key Changes
1. **`ValidationOptions` Struct**: A new `ValidationOptions { be_strict: bool }` struct was added to `validator/src/lib.rs`. The `jspg` code in `src/lib.rs` determines if a validation run should be strict and passes this struct to the validator.
2. **Strictness Check in `uneval_validate`**: The original `boon` only checked for unevaluated properties if the `unevaluatedProperties` keyword was present in the schema. We added an `else if be_strict` block to `uneval_validate` in `validator/src/validator.rs`. This block triggers a check for any leftover unevaluated properties at the end of a validation pass and reports them as errors, effectively enforcing our runtime strictness rule.
3. **Correct Context Propagation**: The most complex part of the fix was ensuring the set of unevaluated properties was correctly maintained across different validation contexts (especially `$ref` and nested property validations). Three critical changes were made:
- **Inheriting Context in `_validate_self`**: When validating keywords that apply to the same instance (like `$ref` or `allOf`), the sub-validator must know what properties the parent has already evaluated. We changed the creation of the `Validator` inside `_validate_self` to pass a clone of the parent's `uneval` state (`uneval: self.uneval.clone()`) instead of creating a new one from scratch. This allows the context to flow downwards.
- **Isolating Context in `validate_val`**: Conversely, when validating a property's value, that value is a *different* part of the JSON instance. The sub-validation should not affect the parent's list of unevaluated properties. We fixed this by commenting out the `self.uneval.merge(...)` call in the `validate_val` function.
- **Simplifying `Uneval::merge`**: The original logic for merging `uneval` state was different for `$ref` keywords. This was incorrect. We simplified the `merge` function to *always* perform an intersection (`retain`), which correctly combines the knowledge of evaluated properties from different schema parts that apply to the same instance.
4. **Removing Incompatible Assertions**: The changes to context propagation broke several `debug_assert!` macros in the `arr_validate` function, which were part of `boon`'s original design. Since our new validation flow is different but correct, these assertions were removed.

130
flow
View File

@ -1,155 +1,139 @@
#!/bin/bash
#!/usr/bin/env bash
# Flows
source ./flows/base
source ./flows/git
source ./flows/kube
source ./flows/packaging
source ./flows/rust
# Vars
POSTGRES_VERSION="17"
POSTGRES_CONFIG_PATH="/opt/homebrew/opt/postgresql@${POSTGRES_VERSION}/bin/pg_config"
DEPENDENCIES=(cargo git icu4c pkg-config "postgresql@${POSTGRES_VERSION}")
CARGO_DEPENDENCIES=(cargo-pgrx==0.14.0)
DEPENDENCIES+=(icu4c pkg-config "postgresql@${POSTGRES_VERSION}")
CARGO_DEPENDENCIES=(cargo-pgrx==0.15.0)
GITEA_ORGANIZATION="cellular"
GITEA_REPOSITORY="jspg"
env() {
# Check if GITEA_TOKEN is set
if [ -z "$GITEA_TOKEN" ]; then
# If not set, try to get it from kubectl
GITEA_TOKEN=$(kubectl get secret -n cellular gitea-git -o jsonpath='{.data.token}' | base64 --decode)
if [ -z "$GITEA_TOKEN" ]; then
echo -e "❌ ${RED}GITEA_TOKEN is not set and couldn't be retrieved from kubectl${RESET}" >&2
exit 1
fi
export GITEA_TOKEN
fi
echo -e "💰 ${GREEN}Environment variables set${RESET}"
}
pgrx-prepare() {
echo -e "${BLUE}Initializing pgrx...${RESET}"
info "Initializing pgrx..."
# Explicitly point to the postgresql@${POSTGRES_VERSION} pg_config, don't rely on 'which'
local POSTGRES_CONFIG_PATH="/opt/homebrew/opt/postgresql@${POSTGRES_VERSION}/bin/pg_config"
if [ ! -x "$POSTGRES_CONFIG_PATH" ]; then
echo -e "${RED}Error: pg_config not found or not executable at $POSTGRES_CONFIG_PATH.${RESET}"
echo -e "${YELLOW}Ensure postgresql@${POSTGRES_VERSION} is installed correctly via Homebrew.${RESET}"
exit 1
error "pg_config not found or not executable at $POSTGRES_CONFIG_PATH."
warning "Ensure postgresql@${POSTGRES_VERSION} is installed correctly via Homebrew."
return 2
fi
if cargo pgrx init --pg"$POSTGRES_VERSION"="$POSTGRES_CONFIG_PATH"; then
echo -e "${GREEN}pgrx initialized successfully.${RESET}"
success "pgrx initialized successfully."
else
echo -e "${RED}Failed to initialize pgrx. Check PostgreSQL development packages are installed and $POSTGRES_CONFIG_PATH is valid.${RESET}"
exit 1
error "Failed to initialize pgrx. Check PostgreSQL development packages are installed and $POSTGRES_CONFIG_PATH is valid."
return 2
fi
}
build() {
local version
version=$(get-version) || return 1
version=$(get-version) || return $?
local package_dir="./package"
local tarball_name="${GITEA_REPOSITORY}.tar.gz"
local tarball_path="${package_dir}/${tarball_name}"
echo -e "📦 Creating source tarball v$version for ${GITEA_REPOSITORY} in $package_dir..."
info "Creating source tarball v$version for ${GITEA_REPOSITORY} in $package_dir..."
# Clean previous package dir
rm -rf "${package_dir}"
mkdir -p "${package_dir}"
# Create the source tarball excluding specified patterns
echo -e " ${CYAN}Creating tarball: ${tarball_path}${RESET}"
info "Creating tarball: ${tarball_path}"
if tar --exclude='.git*' --exclude='./target' --exclude='./package' --exclude='./flows' --exclude='./flow' -czf "${tarball_path}" .; then
echo -e "✨ ${GREEN}Successfully created source tarball: ${tarball_path}${RESET}"
success "Successfully created source tarball: ${tarball_path}"
else
echo -e "❌ ${RED}Failed to create source tarball.${RESET}" >&2
return 1
error "Failed to create source tarball."
return 2
fi
}
install() {
local version
version=$(get-version) || return 1
version=$(get-version) || return $? # Propagate error
echo -e "🔧 ${CYAN}Building and installing PGRX extension v$version into local PostgreSQL...${RESET}"
info "Building and installing PGRX extension v$version into local PostgreSQL..."
# Run the pgrx install command
# It implicitly uses --release unless --debug is passed
# It finds pg_config or you can add flags like --pg-config if needed
if ! cargo pgrx install; then
echo -e "❌ ${RED}cargo pgrx install command failed.${RESET}" >&2
return 1
error "cargo pgrx install command failed."
return 2
fi
echo -e "✨ ${GREEN}PGRX extension v$version successfully built and installed.${RESET}"
success "PGRX extension v$version successfully built and installed."
# Post-install modification to allow non-superuser usage
# Get the installation path dynamically using pg_config
local pg_sharedir
pg_sharedir=$("$POSTGRES_CONFIG_PATH" --sharedir)
if [ -z "$pg_sharedir" ]; then
echo -e "❌ ${RED}Failed to determine PostgreSQL shared directory using pg_config.${RESET}" >&2
return 1
local pg_config_status=$?
if [ $pg_config_status -ne 0 ] || [ -z "$pg_sharedir" ]; then
error "Failed to determine PostgreSQL shared directory using pg_config."
return 2
fi
local installed_control_path="${pg_sharedir}/extension/jspg.control"
# Modify the control file
if [ ! -f "$installed_control_path" ]; then
echo -e "❌ ${RED}Installed control file not found: '$installed_control_path'${RESET}" >&2
return 1
error "Installed control file not found: '$installed_control_path'"
return 2
fi
echo -e "🔧 ${CYAN}Modifying control file for non-superuser access: ${installed_control_path}${RESET}"
info "Modifying control file for non-superuser access: ${installed_control_path}"
# Use sed -i '' for macOS compatibility
if sed -i '' '/^superuser = false/d' "$installed_control_path" && \
echo 'trusted = true' >> "$installed_control_path"; then
echo -e "✨ ${GREEN}Control file modified successfully.${RESET}"
success "Control file modified successfully."
else
echo -e "❌ ${RED}Failed to modify control file: ${installed_control_path}${RESET}" >&2
return 1
error "Failed to modify control file: ${installed_control_path}"
return 2
fi
}
test() {
echo -e "🧪 ${CYAN}Running jspg tests...${RESET}"
cargo pgrx test "pg${POSTGRES_VERSION}" "$@"
test-jspg() {
info "Running jspg tests..."
cargo pgrx test "pg${POSTGRES_VERSION}" "$@" || return $?
}
test-validator() {
info "Running validator tests..."
cargo test -p boon --features "pgrx/pg${POSTGRES_VERSION}" "$@" || return $?
}
clean() {
echo -e "🧹 ${CYAN}Cleaning build artifacts...${RESET}"
cargo clean # Use standard cargo clean
info "Cleaning build artifacts..."
cargo clean || return $?
}
jspg-usage() {
echo -e " ${CYAN}JSPG Commands:${RESET}"
echo -e " prepare Check OS, Cargo, and PGRX dependencies."
echo -e " install [opts] Run prepare, then build and install the extension locally."
echo -e " reinstall [opts] Run prepare, clean, then build and install the extension locally."
echo -e " test [opts] Run pgrx integration tests."
echo -e " clean Remove pgrx build artifacts."
echo -e " build Build release artifacts into ./package/ (called by release)."
echo -e " tag Tag the current version (called by release)."
echo -e " package Upload artifacts from ./package/ (called by release)."
echo -e " release Perform a full release (increments patch, builds, tags, pushes, packages)."
printf "prepare\tCheck OS, Cargo, and PGRX dependencies.\n"
printf "install\tBuild and install the extension locally (after prepare).\n"
printf "reinstall\tClean, build, and install the extension locally (after prepare).\n"
printf "test-jspg\t\tRun pgrx integration tests.\n"
printf "test-validator\t\tRun validator integration tests.\n"
printf "clean\t\tRemove pgrx build artifacts.\n"
}
jspg-flow() {
case "$1" in
env) env; return 0;;
prepare) base prepare; cargo-prepare; pgrx-prepare; return 0;;
build) build; return 0;;
install) install; return 0;;
reinstall) clean; install; return 0;;
test) test; return 0;;
package) env; package; return 0;;
release) env; release; return 0;;
clean) clean; return 0;;
prepare) prepare && cargo-prepare && pgrx-prepare; return $?;;
build) build; return $?;;
install) install; return $?;;
reinstall) clean && install; return $?;;
test-jspg) test-jspg "${@:2}"; return $?;;
test-validator) test-validator "${@:2}"; return $?;;
clean) clean; return $?;;
*) return 1 ;;
esac
}
register-flow "jspg-flow" "jspg-usage"
register-flow "jspg-usage" "jspg-flow"
dispatch "$@"

2
flows

Submodule flows updated: db55335254...e154758056

44
out.txt Normal file
View File

@ -0,0 +1,44 @@
running 23 tests
 Building extension with features pg_test pg17
 Running command "/opt/homebrew/bin/cargo" "build" "--lib" "--features" "pg_test pg17" "--message-format=json-render-diagnostics"
 Installing extension
 Copying control file to /opt/homebrew/share/postgresql@17/extension/jspg.control
 Copying shared library to /opt/homebrew/lib/postgresql@17/jspg.dylib
 Finished installing jspg
test tests::pg_test_cache_invalid ... ok
test tests::pg_test_validate_nested_req_deps ... ok
test tests::pg_test_validate_format_empty_string_with_ref ... ok
test tests::pg_test_validate_format_normal ... ok
test tests::pg_test_validate_format_empty_string ... ok
test tests::pg_test_validate_dependencies ... ok
test tests::pg_test_validate_dependencies_merging ... ok
test tests::pg_test_validate_additional_properties ... ok
test tests::pg_test_validate_enum_schema ... ok
test tests::pg_test_validate_errors ... ok
test tests::pg_test_validate_not_cached ... ok
test tests::pg_test_validate_oneof ... ok
test tests::pg_test_validate_punc_with_refs ... ok
test tests::pg_test_validate_property_merging ... ok
test tests::pg_test_validate_punc_local_refs ... ok
test tests::pg_test_validate_required_merging ... ok
test tests::pg_test_validate_required ... ok
test tests::pg_test_validate_simple ... ok
test tests::pg_test_validate_root_types ... ok
test tests::pg_test_validate_strict ... ok
test tests::pg_test_validate_title_override ... ok
test tests::pg_test_validate_unevaluated_properties ... ok
test tests::pg_test_validate_type_matching ... ok
test result: ok. 23 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 7.66s
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s

1
rustfmt.toml Normal file
View File

@ -0,0 +1 @@
tab_spaces = 2

88
src/helpers.rs Normal file
View File

@ -0,0 +1,88 @@
use serde_json::Value;
use pgrx::JsonB;
// Simple test helpers for cleaner test code
pub fn assert_success(result: &JsonB) {
let json = &result.0;
if !json.get("response").is_some() || json.get("errors").is_some() {
let pretty = serde_json::to_string_pretty(json).unwrap_or_else(|_| format!("{:?}", json));
panic!("Expected success but got:\n{}", pretty);
}
}
pub fn assert_failure(result: &JsonB) {
let json = &result.0;
if json.get("response").is_some() || !json.get("errors").is_some() {
let pretty = serde_json::to_string_pretty(json).unwrap_or_else(|_| format!("{:?}", json));
panic!("Expected failure but got:\n{}", pretty);
}
}
pub fn assert_error_count(result: &JsonB, expected_count: usize) {
assert_failure(result);
let errors = get_errors(result);
if errors.len() != expected_count {
let pretty = serde_json::to_string_pretty(&result.0).unwrap_or_else(|_| format!("{:?}", result.0));
panic!("Expected {} errors, got {}:\n{}", expected_count, errors.len(), pretty);
}
}
pub fn get_errors(result: &JsonB) -> &Vec<Value> {
result.0["errors"].as_array().expect("errors should be an array")
}
pub fn has_error_with_code(result: &JsonB, code: &str) -> bool {
get_errors(result).iter().any(|e| e["code"] == code)
}
pub fn has_error_with_code_and_path(result: &JsonB, code: &str, path: &str) -> bool {
get_errors(result).iter().any(|e| e["code"] == code && e["details"]["path"] == path)
}
pub fn assert_has_error(result: &JsonB, code: &str, path: &str) {
if !has_error_with_code_and_path(result, code, path) {
let pretty = serde_json::to_string_pretty(&result.0).unwrap_or_else(|_| format!("{:?}", result.0));
panic!("Expected error with code='{}' and path='{}' but not found:\n{}", code, path, pretty);
}
}
pub fn find_error_with_code<'a>(result: &'a JsonB, code: &str) -> &'a Value {
get_errors(result).iter().find(|e| e["code"] == code)
.unwrap_or_else(|| panic!("No error found with code '{}'", code))
}
pub fn find_error_with_code_and_path<'a>(result: &'a JsonB, code: &str, path: &str) -> &'a Value {
get_errors(result).iter().find(|e| e["code"] == code && e["details"]["path"] == path)
.unwrap_or_else(|| panic!("No error found with code '{}' and path '{}'", code, path))
}
pub fn assert_error_detail(error: &Value, detail_key: &str, expected_value: &str) {
let actual = error["details"][detail_key].as_str()
.unwrap_or_else(|| panic!("Error detail '{}' is not a string", detail_key));
assert_eq!(actual, expected_value, "Error detail '{}' mismatch", detail_key);
}
// Additional convenience helpers for common patterns
pub fn assert_error_message_contains(error: &Value, substring: &str) {
let message = error["message"].as_str().expect("error should have message");
assert!(message.contains(substring), "Expected message to contain '{}', got '{}'", substring, message);
}
pub fn assert_error_cause_json(error: &Value, expected_cause: &Value) {
let cause = &error["details"]["cause"];
assert!(cause.is_object(), "cause should be JSON object");
assert_eq!(cause, expected_cause, "cause mismatch");
}
pub fn assert_error_context(error: &Value, expected_context: &Value) {
assert_eq!(&error["details"]["context"], expected_context, "context mismatch");
}
pub fn jsonb(val: Value) -> JsonB {
JsonB(val)
}

1193
src/lib.rs

File diff suppressed because it is too large Load Diff

910
src/schemas.rs Normal file
View File

@ -0,0 +1,910 @@
use crate::*;
use serde_json::{json, Value};
use pgrx::JsonB;
// Helper to convert Value to JsonB
fn jsonb(val: Value) -> JsonB {
JsonB(val)
}
pub fn simple_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([{
"name": "simple",
"public": false,
"schemas": [{
"$id": "simple.request",
"type": "object",
"properties": {
"name": { "type": "string" },
"age": { "type": "integer", "minimum": 0 }
},
"required": ["name", "age"]
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn invalid_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([{
"name": "invalid_punc",
"public": false,
"schemas": [{
"$id": "invalid_punc.request",
"type": ["invalid_type_value"]
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn errors_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([{
"name": "detailed_errors_test",
"public": false,
"schemas": [{
"$id": "detailed_errors_test.request",
"type": "object",
"properties": {
"address": {
"type": "object",
"properties": {
"street": { "type": "string" },
"city": { "type": "string", "maxLength": 10 }
},
"required": ["street", "city"]
}
},
"required": ["address"]
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn oneof_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([{
"name": "oneof_test",
"public": false,
"schemas": [{
"$id": "oneof_test.request",
"oneOf": [
{
"type": "object",
"properties": {
"string_prop": { "type": "string", "maxLength": 5 }
},
"required": ["string_prop"]
},
{
"type": "object",
"properties": {
"number_prop": { "type": "number", "minimum": 10 }
},
"required": ["number_prop"]
}
]
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn root_types_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([
{
"name": "object_test",
"public": false,
"schemas": [{
"$id": "object_test.request",
"type": "object",
"properties": {
"name": { "type": "string" },
"age": { "type": "integer", "minimum": 0 }
},
"required": ["name", "age"]
}]
},
{
"name": "array_test",
"public": false,
"schemas": [{
"$id": "array_test.request",
"type": "array",
"items": {
"type": "object",
"properties": {
"id": { "type": "string", "format": "uuid" }
}
}
}]
}
]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn strict_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([
{
"name": "basic_strict_test",
"public": true,
"schemas": [{
"$id": "basic_strict_test.request",
"type": "object",
"properties": {
"name": { "type": "string" }
}
}]
},
{
"name": "non_strict_test",
"public": false,
"schemas": [{
"$id": "non_strict_test.request",
"type": "object",
"properties": {
"name": { "type": "string" }
}
}]
},
{
"name": "nested_strict_test",
"public": true,
"schemas": [{
"$id": "nested_strict_test.request",
"type": "object",
"properties": {
"user": {
"type": "object",
"properties": {
"name": { "type": "string" }
}
},
"items": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": { "type": "string" }
}
}
}
}
}]
},
{
"name": "already_unevaluated_test",
"public": true,
"schemas": [{
"$id": "already_unevaluated_test.request",
"type": "object",
"properties": {
"name": { "type": "string" }
},
"unevaluatedProperties": true
}]
},
{
"name": "already_additional_test",
"public": true,
"schemas": [{
"$id": "already_additional_test.request",
"type": "object",
"properties": {
"name": { "type": "string" }
},
"additionalProperties": false
}]
},
{
"name": "conditional_strict_test",
"public": true,
"schemas": [{
"$id": "conditional_strict_test.request",
"type": "object",
"properties": {
"creating": { "type": "boolean" }
},
"if": {
"properties": {
"creating": { "const": true }
}
},
"then": {
"properties": {
"name": { "type": "string" }
},
"required": ["name"]
}
}]
}
]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn required_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([{
"name": "basic_validation_test",
"public": false,
"schemas": [{
"$id": "basic_validation_test.request",
"type": "object",
"properties": {
"name": { "type": "string" },
"age": { "type": "integer", "minimum": 0 }
},
"required": ["name", "age"]
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn dependencies_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([{
"name": "dependency_split_test",
"public": false,
"schemas": [{
"$id": "dependency_split_test.request",
"type": "object",
"properties": {
"creating": { "type": "boolean" },
"name": { "type": "string" },
"kind": { "type": "string" },
"description": { "type": "string" }
},
"dependencies": {
"creating": ["name", "kind"]
}
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn nested_req_deps_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([{
"name": "nested_dep_test",
"public": false,
"schemas": [{
"$id": "nested_dep_test.request",
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": { "type": "string" },
"creating": { "type": "boolean" },
"name": { "type": "string" },
"kind": { "type": "string" }
},
"required": ["id"],
"dependencies": {
"creating": ["name", "kind"]
}
}
}
},
"required": ["items"]
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn additional_properties_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([
{
"name": "additional_props_test",
"public": false,
"schemas": [{
"$id": "additional_props_test.request",
"type": "object",
"properties": {
"name": { "type": "string" },
"age": { "type": "number" }
},
"additionalProperties": false
}]
},
{
"name": "nested_additional_props_test",
"public": false,
"schemas": [{
"$id": "nested_additional_props_test.request",
"type": "object",
"properties": {
"user": {
"type": "object",
"properties": {
"name": { "type": "string" }
},
"additionalProperties": false
}
}
}]
}
]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn unevaluated_properties_schemas() -> JsonB {
let enums = json!([]);
let types = json!([{
"name": "nested_for_uneval",
"schemas": [{
"$id": "nested_for_uneval",
"type": "object",
"properties": {
"deep_prop": { "type": "string" }
}
}]
}]);
let puncs = json!([
{
"name": "simple_unevaluated_test",
"public": false,
"schemas": [{
"$id": "simple_unevaluated_test.request",
"type": "object",
"properties": {
"name": { "type": "string" },
"age": { "type": "number" }
},
"patternProperties": {
"^attr_": { "type": "string" }
},
"unevaluatedProperties": false
}]
},
{
"name": "conditional_unevaluated_test",
"public": false,
"schemas": [{
"$id": "conditional_unevaluated_test.request",
"type": "object",
"allOf": [
{
"properties": {
"firstName": { "type": "string" }
}
},
{
"properties": {
"lastName": { "type": "string" }
}
}
],
"properties": {
"age": { "type": "number" }
},
"unevaluatedProperties": false
}]
},
{
"name": "nested_unevaluated_test",
"public": true, // To trigger strict mode
"schemas": [{
"$id": "nested_unevaluated_test.request",
"type": "object",
"properties": {
"non_strict_branch": {
"type": "object",
"unevaluatedProperties": true, // The magic switch
"properties": {
"some_prop": { "$ref": "nested_for_uneval" }
}
},
"strict_branch": {
"type": "object",
"properties": {
"another_prop": { "type": "string" }
}
}
}
}]
}
]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn format_schemas() -> JsonB {
let enums = json!([]);
let types = json!([]);
let puncs = json!([{
"name": "format_test",
"public": false,
"schemas": [{
"$id": "format_test.request",
"type": "object",
"properties": {
"uuid": { "type": "string", "format": "uuid" },
"date_time": { "type": "string", "format": "date-time" },
"email": { "type": "string", "format": "email" }
}
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn property_merging_schemas() -> JsonB {
let enums = json!([]);
let types = json!([
{
"name": "entity",
"schemas": [{
"$id": "entity",
"type": "object",
"properties": {
"id": { "type": "string" },
"name": { "type": "string" },
"type": { "type": "string" }
},
"required": ["id"]
}]
},
{
"name": "user",
"schemas": [{
"$id": "user",
"$ref": "entity",
"properties": {
"password": { "type": "string", "minLength": 8 }
},
"required": ["password"]
}]
},
{
"name": "person",
"schemas": [{
"$id": "person",
"$ref": "user",
"properties": {
"first_name": { "type": "string", "minLength": 1 },
"last_name": { "type": "string", "minLength": 1 }
},
"required": ["first_name", "last_name"]
}]
}
]);
let puncs = json!([]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn required_merging_schemas() -> JsonB {
let enums = json!([]);
let types = json!([
{
"name": "entity",
"schemas": [{
"$id": "entity",
"type": "object",
"properties": {
"id": { "type": "string", "format": "uuid" },
"type": { "type": "string" },
"created_by": { "type": "string", "format": "uuid" }
},
"required": ["id", "type", "created_by"]
}]
},
{
"name": "user",
"schemas": [{
"$id": "user",
"$ref": "entity",
"properties": {
"password": { "type": "string", "minLength": 8 }
},
"if": {
"properties": { "type": { "const": "user" } }
},
"then": {
"required": ["password"]
}
}]
},
{
"name": "person",
"schemas": [{
"$id": "person",
"$ref": "user",
"properties": {
"first_name": { "type": "string", "minLength": 1 },
"last_name": { "type": "string", "minLength": 1 }
},
"if": {
"properties": { "type": { "const": "person" } }
},
"then": {
"required": ["first_name", "last_name"]
}
}]
}
]);
let puncs = json!([]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn dependencies_merging_schemas() -> JsonB {
let enums = json!([]);
let types = json!([
{
"name": "entity",
"schemas": [{
"$id": "entity",
"type": "object",
"properties": {
"id": { "type": "string", "format": "uuid" },
"type": { "type": "string" },
"created_by": { "type": "string", "format": "uuid" },
"creating": { "type": "boolean" },
"name": { "type": "string" }
},
"required": ["id", "type", "created_by"],
"dependencies": {
"creating": ["name"]
}
}]
},
{
"name": "user",
"schemas": [{
"$id": "user",
"$ref": "entity",
"properties": {
"password": { "type": "string", "minLength": 8 }
},
"dependencies": {
"creating": ["name"]
}
}]
},
{
"name": "person",
"schemas": [{
"$id": "person",
"$ref": "user",
"properties": {
"first_name": { "type": "string", "minLength": 1 },
"last_name": { "type": "string", "minLength": 1 }
},
"dependencies": {
"creating": ["first_name", "last_name"]
}
}]
}
]);
let puncs = json!([]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn punc_with_refs_schemas() -> JsonB {
let enums = json!([]);
let types = json!([
{
"name": "entity",
"schemas": [{
"$id": "entity",
"type": "object",
"properties": {
"id": { "type": "string" },
"name": { "type": "string" },
"type": { "type": "string" }
},
"required": ["id", "type"]
}]
},
{
"name": "person",
"schemas": [{
"$id": "person",
"$ref": "entity",
"properties": {
"first_name": { "type": "string", "minLength": 1 },
"last_name": { "type": "string", "minLength": 1 },
"address": {
"type": "object",
"properties": {
"street": { "type": "string" },
"city": { "type": "string" }
},
"required": ["street", "city"]
}
}
}]
}
]);
let puncs = json!([
{
"name": "public_ref_test",
"public": true,
"schemas": [{
"$id": "public_ref_test.request",
"$ref": "person"
}]
},
{
"name": "private_ref_test",
"public": false,
"schemas": [{
"$id": "private_ref_test.request",
"$ref": "person"
}]
}
]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn enum_schemas() -> JsonB {
let enums = json!([
{
"name": "task_priority",
"values": ["low", "medium", "high", "urgent"],
"schemas": [{
"$id": "task_priority",
"type": "string",
"enum": ["low", "medium", "high", "urgent"]
}]
}
]);
let types = json!([]);
let puncs = json!([{
"name": "enum_ref_test",
"public": false,
"schemas": [{
"$id": "enum_ref_test.request",
"type": "object",
"properties": {
"priority": { "$ref": "task_priority" }
},
"required": ["priority"]
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn punc_local_refs_schemas() -> JsonB {
let enums = json!([]);
let types = json!([
{
"name": "global_thing",
"schemas": [{
"$id": "global_thing",
"type": "object",
"properties": {
"id": { "type": "string", "format": "uuid" },
"type": { "type": "string" }
},
"required": ["id", "type"]
}]
}
]);
let puncs = json!([
{
"name": "punc_with_local_ref_test",
"public": false,
"schemas": [
{
"$id": "local_address",
"type": "object",
"properties": {
"street": { "type": "string" },
"city": { "type": "string" }
},
"required": ["street", "city"]
},
{
"$id": "punc_with_local_ref_test.request",
"$ref": "local_address"
}
]
},
{
"name": "punc_with_local_ref_to_global_test",
"public": false,
"schemas": [
{
"$id": "local_user_with_thing",
"type": "object",
"properties": {
"user_name": { "type": "string" },
"thing": { "$ref": "global_thing" }
},
"required": ["user_name", "thing"]
},
{
"$id": "punc_with_local_ref_to_global_test.request",
"$ref": "local_user_with_thing"
}
]
}
]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn title_override_schemas() -> JsonB {
let enums = json!([]);
let types = json!([
{
"name": "base_with_title",
"schemas": [{
"$id": "base_with_title",
"type": "object",
"title": "Base Title",
"properties": {
"name": { "type": "string" },
"type": { "type": "string" }
},
"required": ["name"]
}]
},
{
"name": "override_with_title",
"schemas": [{
"$id": "override_with_title",
"$ref": "base_with_title",
"title": "Override Title"
}]
}
]);
let puncs = json!([]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn format_with_ref_schemas() -> JsonB {
let enums = json!([]);
let types = json!([
{
"name": "entity",
"schemas": [{
"$id": "entity",
"type": "object",
"properties": {
"id": { "type": "string", "format": "uuid" },
"type": { "type": "string" },
"name": { "type": "string" }
},
"required": ["id", "type"]
}]
},
{
"name": "job",
"schemas": [{
"$id": "job",
"$ref": "entity",
"properties": {
"worker_id": { "type": "string", "format": "uuid" }
}
}]
}
]);
let puncs = json!([{
"name": "save_job",
"public": true,
"schemas": [
{
"$id": "save_job.request",
"$ref": "job"
},
{
"$id": "save_job.response",
"$ref": "job"
}
]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}
pub fn type_matching_schemas() -> JsonB {
let enums = json!([]);
let types = json!([
{
"name": "entity",
"schemas": [{
"$id": "entity",
"type": "object",
"properties": { "type": { "type": "string" }, "name": { "type": "string" } },
"required": ["type", "name"]
}]
},
{
"name": "job",
"schemas": [{
"$id": "job",
"$ref": "entity",
"properties": { "job_id": { "type": "string" } },
"required": ["job_id"]
}]
},
{
"name": "super_job",
"schemas": [
{
"$id": "super_job",
"$ref": "job",
"properties": { "manager_id": { "type": "string" } },
"required": ["manager_id"]
},
{
"$id": "super_job.short",
"$ref": "super_job",
"properties": { "name": { "maxLength": 10 } }
}
]
}
]);
let puncs = json!([{
"name": "type_test_punc",
"public": false,
"schemas": [{
"$id": "type_test_punc.request",
"type": "object",
"properties": {
"root_job": { "$ref": "job" },
"nested_or_super_job": {
"oneOf": [
{ "$ref": "super_job" },
{
"type": "object",
"properties": {
"my_job": { "$ref": "job" }
},
"required": ["my_job"]
}
]
}
},
"required": ["root_job", "nested_or_super_job"]
}]
}]);
cache_json_schemas(jsonb(enums), jsonb(types), jsonb(puncs))
}

932
src/tests.rs Normal file
View File

@ -0,0 +1,932 @@
use crate::*;
use crate::helpers::*;
use crate::schemas::*;
use serde_json::json;
use pgrx::pg_test;
#[pg_test]
fn test_validate_not_cached() {
clear_json_schemas();
let instance = json!({ "foo": "bar" });
let result = validate_json_schema("non_existent_schema", jsonb(instance));
assert_error_count(&result, 1);
let error = find_error_with_code(&result, "SCHEMA_NOT_FOUND");
assert_error_message_contains(error, "Schema 'non_existent_schema' not found");
}
#[pg_test]
fn test_validate_simple() {
// Use specific schema setup for this test
let cache_result = simple_schemas();
assert_success(&cache_result);
// Test the basic validation schema
let valid_instance = json!({ "name": "Alice", "age": 30 });
let invalid_instance_type = json!({ "name": "Bob", "age": -5 });
let invalid_instance_missing = json!({ "name": "Charlie" });
let valid_result = validate_json_schema("simple.request", jsonb(valid_instance));
assert_success(&valid_result);
// Invalid type - age is negative
let invalid_result_type = validate_json_schema("simple.request", jsonb(invalid_instance_type));
assert_error_count(&invalid_result_type, 1);
let error = find_error_with_code_and_path(&invalid_result_type, "MINIMUM_VIOLATED", "/age");
assert_error_detail(error, "schema", "simple.request");
assert_error_context(error, &json!(-5));
assert_error_cause_json(error, &json!({"got": -5, "want": 0}));
assert_error_message_contains(error, "Value must be at least 0, but got -5");
// Missing field
let invalid_result_missing = validate_json_schema("simple.request", jsonb(invalid_instance_missing));
assert_error_count(&invalid_result_missing, 1);
let missing_error = find_error_with_code_and_path(&invalid_result_missing, "REQUIRED_FIELD_MISSING", "/age");
assert_error_detail(missing_error, "schema", "simple.request");
assert_error_cause_json(missing_error, &json!({"want": ["age"]}));
assert_error_message_contains(missing_error, "Required field 'age' is missing");
}
#[pg_test]
fn test_cache_invalid() {
let cache_result = invalid_schemas();
assert_error_count(&cache_result, 2);
assert!(has_error_with_code(&cache_result, "ENUM_VIOLATED"),
"Should have ENUM_VIOLATED errors");
}
#[pg_test]
fn test_validate_errors() {
let cache_result = errors_schemas();
assert_success(&cache_result);
let invalid_instance = json!({
"address": {
"street": 123, // Wrong type
"city": "Supercalifragilisticexpialidocious" // Too long (maxLength: 10)
}
});
let result = validate_json_schema("detailed_errors_test.request", jsonb(invalid_instance));
// Expect 2 errors: one for type mismatch, one for maxLength violation
assert_error_count(&result, 2);
assert_has_error(&result, "TYPE_MISMATCH", "/address/street");
assert_has_error(&result, "MAX_LENGTH_VIOLATED", "/address/city");
}
#[pg_test]
fn test_validate_oneof() {
let cache_result = oneof_schemas();
assert_success(&cache_result);
// --- Test case 1: Fails string maxLength (in branch 0) AND missing number_prop (in branch 1) ---
let invalid_string_instance = json!({ "string_prop": "toolongstring" });
let result_invalid_string = validate_json_schema("oneof_test.request", jsonb(invalid_string_instance));
assert_error_count(&result_invalid_string, 2);
assert_has_error(&result_invalid_string, "MAX_LENGTH_VIOLATED", "/string_prop");
assert_has_error(&result_invalid_string, "REQUIRED_FIELD_MISSING", "/number_prop");
// --- Test case 2: Fails number minimum (in branch 1) AND missing string_prop (in branch 0) ---
let invalid_number_instance = json!({ "number_prop": 5 });
let result_invalid_number = validate_json_schema("oneof_test.request", jsonb(invalid_number_instance));
assert_error_count(&result_invalid_number, 2);
assert_has_error(&result_invalid_number, "MINIMUM_VIOLATED", "/number_prop");
assert_has_error(&result_invalid_number, "REQUIRED_FIELD_MISSING", "/string_prop");
// --- Test case 3: Fails type check (not object) for both branches ---
// Input: boolean, expected object for both branches
let invalid_bool_instance = json!(true); // Not an object
let result_invalid_bool = validate_json_schema("oneof_test.request", jsonb(invalid_bool_instance));
// Expect only 1 leaf error after filtering, as both original errors have instance_path ""
assert_error_count(&result_invalid_bool, 1);
let error = find_error_with_code_and_path(&result_invalid_bool, "TYPE_MISMATCH", "");
assert_error_detail(error, "schema", "oneof_test.request");
// --- Test case 4: Fails missing required for both branches ---
// Input: empty object, expected string_prop (branch 0) OR number_prop (branch 1)
let invalid_empty_obj = json!({});
let result_empty_obj = validate_json_schema("oneof_test.request", jsonb(invalid_empty_obj));
// Now we expect 2 errors because required fields are split into individual errors
assert_error_count(&result_empty_obj, 2);
assert_has_error(&result_empty_obj, "REQUIRED_FIELD_MISSING", "/string_prop");
assert_has_error(&result_empty_obj, "REQUIRED_FIELD_MISSING", "/number_prop");
}
#[pg_test]
fn test_validate_root_types() {
let cache_result = root_types_schemas();
assert_success(&cache_result);
// Test 1: Validate null against array schema (using array_test from comprehensive setup)
let null_instance = json!(null);
let null_result = validate_json_schema("array_test.request", jsonb(null_instance));
assert_error_count(&null_result, 1);
let null_error = find_error_with_code_and_path(&null_result, "TYPE_MISMATCH", "");
assert_error_detail(null_error, "schema", "array_test.request");
assert_error_context(null_error, &json!(null));
assert_error_cause_json(null_error, &json!({"got": "null", "want": ["array"]}));
assert_error_message_contains(null_error, "Expected array but got null");
// Test 2: Validate object against array schema
let object_instance = json!({"id": "not-an-array"});
let object_result = validate_json_schema("array_test.request", jsonb(object_instance.clone()));
assert_error_count(&object_result, 1);
let object_error = find_error_with_code_and_path(&object_result, "TYPE_MISMATCH", "");
assert_error_detail(object_error, "schema", "array_test.request");
assert_error_context(object_error, &object_instance);
assert_error_cause_json(object_error, &json!({"got": "object", "want": ["array"]}));
assert_error_message_contains(object_error, "Expected array but got object");
// Test 3: Valid empty array
let valid_empty = json!([]);
let valid_result = validate_json_schema("array_test.request", jsonb(valid_empty));
assert_success(&valid_result);
// Test 4: String at root when object expected (using object_test)
let string_instance = json!("not an object");
let string_result = validate_json_schema("object_test.request", jsonb(string_instance));
assert_error_count(&string_result, 1);
let string_error = find_error_with_code_and_path(&string_result, "TYPE_MISMATCH", "");
assert_error_detail(string_error, "schema", "object_test.request");
assert_error_context(string_error, &json!("not an object"));
assert_error_cause_json(string_error, &json!({"got": "string", "want": ["object"]}));
assert_error_message_contains(string_error, "Expected object but got string");
}
#[pg_test]
fn test_validate_strict() {
let cache_result = strict_schemas();
assert_success(&cache_result);
// Test 1: Basic strict validation - extra properties should fail
let valid_basic = json!({ "name": "John" });
let invalid_basic = json!({ "name": "John", "extra": "not allowed" });
let result_basic_valid = validate_json_schema("basic_strict_test.request", jsonb(valid_basic));
assert_success(&result_basic_valid);
let result_basic_invalid = validate_json_schema("basic_strict_test.request", jsonb(invalid_basic.clone()));
assert_error_count(&result_basic_invalid, 1);
assert_has_error(&result_basic_invalid, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra");
// Test 2: Non-strict validation - extra properties should pass
let result_non_strict = validate_json_schema("non_strict_test.request", jsonb(invalid_basic.clone()));
assert_success(&result_non_strict);
// Test 3: Nested objects and arrays - test recursive strict validation
let valid_nested = json!({
"user": { "name": "Alice" },
"items": [{ "id": "123" }]
});
let invalid_nested = json!({
"user": { "name": "Alice", "extra": "not allowed" }, // Extra in nested object
"items": [{ "id": "123", "extra": "not allowed" }] // Extra in array item
});
let result_nested_valid = validate_json_schema("nested_strict_test.request", jsonb(valid_nested));
assert_success(&result_nested_valid);
let result_nested_invalid = validate_json_schema("nested_strict_test.request", jsonb(invalid_nested));
assert_error_count(&result_nested_invalid, 2);
assert_has_error(&result_nested_invalid, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/user/extra");
assert_has_error(&result_nested_invalid, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/items/0/extra");
// Test 4: Schema with unevaluatedProperties already set - should allow extras
let result_already_unevaluated = validate_json_schema("already_unevaluated_test.request", jsonb(invalid_basic.clone()));
assert_success(&result_already_unevaluated);
// Test 5: Schema with additionalProperties already set - should follow that setting
let result_already_additional = validate_json_schema("already_additional_test.request", jsonb(invalid_basic));
assert_error_count(&result_already_additional, 1);
assert_has_error(&result_already_additional, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra");
// Test 6: Conditional schemas - properties in if/then/else should not be restricted
let valid_conditional = json!({
"creating": true,
"name": "Test" // Required when creating=true
});
let invalid_conditional = json!({
"creating": true,
"name": "Test",
"extra": "not allowed" // Extra property at root level
});
let result_conditional_valid = validate_json_schema("conditional_strict_test.request", jsonb(valid_conditional));
assert_success(&result_conditional_valid);
let result_conditional_invalid = validate_json_schema("conditional_strict_test.request", jsonb(invalid_conditional));
assert_error_count(&result_conditional_invalid, 1);
assert_has_error(&result_conditional_invalid, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra");
}
#[pg_test]
fn test_validate_required() {
let cache_result = required_schemas();
assert_success(&cache_result);
// Test 1: Missing all required fields (using basic_validation_test which requires name and age)
let empty_instance = json!({});
let result = validate_json_schema("basic_validation_test.request", jsonb(empty_instance));
// Should get 2 separate errors, one for each missing field
assert_error_count(&result, 2);
let name_error = find_error_with_code_and_path(&result, "REQUIRED_FIELD_MISSING", "/name");
assert_error_message_contains(name_error, "Required field 'name' is missing");
let age_error = find_error_with_code_and_path(&result, "REQUIRED_FIELD_MISSING", "/age");
assert_error_message_contains(age_error, "Required field 'age' is missing");
// Test 2: Missing only some required fields
let partial_instance = json!({
"name": "Alice"
});
let partial_result = validate_json_schema("basic_validation_test.request", jsonb(partial_instance));
// Should get 1 error for the missing field
assert_error_count(&partial_result, 1);
assert_has_error(&partial_result, "REQUIRED_FIELD_MISSING", "/age");
}
#[pg_test]
fn test_validate_dependencies() {
let cache_result = dependencies_schemas();
assert_success(&cache_result);
// Test 1: Has creating=true but missing both dependent fields
let missing_both = json!({
"creating": true,
"description": "Some description"
});
let result = validate_json_schema("dependency_split_test.request", jsonb(missing_both));
// Should get 2 separate errors, one for each missing dependent field
assert_error_count(&result, 2);
let name_dep_error = find_error_with_code_and_path(&result, "DEPENDENCY_FAILED", "/name");
assert_error_message_contains(name_dep_error, "Field 'name' is required when 'creating' is present");
let kind_dep_error = find_error_with_code_and_path(&result, "DEPENDENCY_FAILED", "/kind");
assert_error_message_contains(kind_dep_error, "Field 'kind' is required when 'creating' is present");
// Test 2: Has creating=true with only one dependent field
let missing_one = json!({
"creating": true,
"name": "My Account"
});
let result_one = validate_json_schema("dependency_split_test.request", jsonb(missing_one));
// Should get 1 error for the missing kind field
assert_error_count(&result_one, 1);
let kind_error = find_error_with_code_and_path(&result_one, "DEPENDENCY_FAILED", "/kind");
assert_error_message_contains(kind_error, "Field 'kind' is required when 'creating' is present");
// Test 3: Has no creating field - no dependency errors
let no_creating = json!({
"description": "No creating field"
});
let result_no_creating = validate_json_schema("dependency_split_test.request", jsonb(no_creating));
assert_success(&result_no_creating);
// Test 4: Has creating=false - dependencies still apply because field exists!
let creating_false = json!({
"creating": false,
"description": "Creating is false"
});
let result_false = validate_json_schema("dependency_split_test.request", jsonb(creating_false));
// Dependencies are triggered by field existence, not value, so this should fail
assert_error_count(&result_false, 2);
assert_has_error(&result_false, "DEPENDENCY_FAILED", "/name");
assert_has_error(&result_false, "DEPENDENCY_FAILED", "/kind");
}
#[pg_test]
fn test_validate_nested_req_deps() {
let cache_result = nested_req_deps_schemas();
assert_success(&cache_result);
// Test with array items that have dependency violations
let instance = json!({
"items": [
{
"id": "item1",
"creating": true
// Missing name and kind
},
{
"id": "item2",
"creating": true,
"name": "Item 2"
// Missing kind
}
]
});
let result = validate_json_schema("nested_dep_test.request", jsonb(instance));
// Should get 3 errors total: 2 for first item, 1 for second item
assert_error_count(&result, 3);
// Check paths are correct for array items
assert_has_error(&result, "DEPENDENCY_FAILED", "/items/0/name");
assert_has_error(&result, "DEPENDENCY_FAILED", "/items/0/kind");
assert_has_error(&result, "DEPENDENCY_FAILED", "/items/1/kind");
}
#[pg_test]
fn test_validate_additional_properties() {
let cache_result = additional_properties_schemas();
assert_success(&cache_result);
// Test 1: Multiple additional properties not allowed
let instance_many_extras = json!({
"name": "Alice",
"age": 30,
"extra1": "not allowed",
"extra2": 42,
"extra3": true
});
let result = validate_json_schema("additional_props_test.request", jsonb(instance_many_extras));
// Should get 3 separate errors, one for each additional property
assert_error_count(&result, 3);
let extra1_error = find_error_with_code_and_path(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra1");
assert_error_message_contains(extra1_error, "Property 'extra1' is not allowed");
let extra2_error = find_error_with_code_and_path(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra2");
assert_error_message_contains(extra2_error, "Property 'extra2' is not allowed");
let extra3_error = find_error_with_code_and_path(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra3");
assert_error_message_contains(extra3_error, "Property 'extra3' is not allowed");
// Test 2: Single additional property
let instance_one_extra = json!({
"name": "Bob",
"age": 25,
"unauthorized": "field"
});
let result_one = validate_json_schema("additional_props_test.request", jsonb(instance_one_extra));
// Should get 1 error for the additional property
assert_error_count(&result_one, 1);
let unauthorized_error = find_error_with_code_and_path(&result_one, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/unauthorized");
assert_error_message_contains(unauthorized_error, "Property 'unauthorized' is not allowed");
// Test 3: Nested objects with additional properties (already in comprehensive setup)
let nested_instance = json!({
"user": {
"name": "Charlie",
"role": "admin",
"level": 5
}
});
let nested_result = validate_json_schema("nested_additional_props_test.request", jsonb(nested_instance));
// Should get 2 errors for the nested additional properties
assert_error_count(&nested_result, 2);
assert_has_error(&nested_result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/user/role");
assert_has_error(&nested_result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/user/level");
}
#[pg_test]
fn test_validate_unevaluated_properties() {
let cache_result = unevaluated_properties_schemas();
assert_success(&cache_result);
// Test 1: Multiple unevaluated properties
let instance_uneval = json!({
"name": "Alice",
"age": 30,
"attr_color": "blue", // This is OK - matches pattern
"extra1": "not evaluated", // These should fail
"extra2": 42,
"extra3": true
});
let result = validate_json_schema("simple_unevaluated_test.request", jsonb(instance_uneval));
// Should get 3 separate ADDITIONAL_PROPERTIES_NOT_ALLOWED errors, one for each unevaluated property
assert_error_count(&result, 3);
// Verify all errors are ADDITIONAL_PROPERTIES_NOT_ALLOWED and check paths
assert_has_error(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra1");
assert_has_error(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra2");
assert_has_error(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra3");
// Verify error messages
let extra1_error = find_error_with_code_and_path(&result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra1");
assert_error_message_contains(extra1_error, "Property 'extra1' is not allowed");
// Test 2: Complex schema with allOf and unevaluatedProperties (already in comprehensive setup)
// firstName and lastName are evaluated by allOf schemas, age by main schema
let complex_instance = json!({
"firstName": "John",
"lastName": "Doe",
"age": 25,
"nickname": "JD", // Not evaluated by any schema
"title": "Mr" // Not evaluated by any schema
});
let complex_result = validate_json_schema("conditional_unevaluated_test.request", jsonb(complex_instance));
// Should get 2 ADDITIONAL_PROPERTIES_NOT_ALLOWED errors for unevaluated properties
assert_error_count(&complex_result, 2);
assert_has_error(&complex_result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/nickname");
assert_has_error(&complex_result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/title");
// Test 3: Valid instance with all properties evaluated
let valid_instance = json!({
"name": "Bob",
"age": 40,
"attr_style": "modern",
"attr_theme": "dark"
});
let valid_result = validate_json_schema("simple_unevaluated_test.request", jsonb(valid_instance));
assert_success(&valid_result);
// Test 4: Test that unevaluatedProperties: true cascades down refs
let cascading_instance = json!({
"strict_branch": {
"another_prop": "is_ok"
},
"non_strict_branch": {
"extra_at_toplevel": "is_ok", // Extra property at this level
"some_prop": {
"deep_prop": "is_ok",
"extra_in_ref": "is_also_ok" // Extra property in the $ref'd schema
}
}
});
let cascading_result = validate_json_schema("nested_unevaluated_test.request", jsonb(cascading_instance));
assert_success(&cascading_result);
// Test 5: For good measure, test that the strict branch is still strict
let strict_fail_instance = json!({
"strict_branch": {
"another_prop": "is_ok",
"extra_in_strict": "is_not_ok"
}
});
let strict_fail_result = validate_json_schema("nested_unevaluated_test.request", jsonb(strict_fail_instance));
assert_error_count(&strict_fail_result, 1);
assert_has_error(&strict_fail_result, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/strict_branch/extra_in_strict");
}
#[pg_test]
fn test_validate_format_normal() {
let cache_result = format_schemas();
assert_success(&cache_result);
// A non-empty but invalid string should still fail
let instance = json!({
"date_time": "not-a-date"
});
let result = validate_json_schema("format_test.request", jsonb(instance));
assert_error_count(&result, 1);
let error = find_error_with_code(&result, "FORMAT_INVALID");
assert_error_message_contains(error, "not-a-date");
}
#[pg_test]
fn test_validate_format_empty_string() {
let cache_result = format_schemas();
assert_success(&cache_result);
// Test with empty strings for all formatted fields
let instance = json!({
"uuid": "",
"date_time": "",
"email": ""
});
let result = validate_json_schema("format_test.request", jsonb(instance));
// This is the test that should fail before the change and pass after
assert_success(&result);
}
#[pg_test]
fn test_validate_format_empty_string_with_ref() {
let cache_result = format_with_ref_schemas();
assert_success(&cache_result);
// Test that an optional field with a format constraint passes validation
// when the value is an empty string, even when the schema is referenced by a punc.
let instance = json!({
"id": "123e4567-e89b-12d3-a456-426614174000",
"type": "job",
"worker_id": "" // Optional field with format, but empty string
});
let result = validate_json_schema("save_job.request", jsonb(instance));
// This should succeed because empty strings are ignored for format validation.
assert_success(&result);
}
#[pg_test]
fn test_validate_property_merging() {
let cache_result = property_merging_schemas();
assert_success(&cache_result);
// Test that person schema has all properties from the inheritance chain:
// entity (id, name) + user (password) + person (first_name, last_name)
let valid_person_with_all_properties = json!({
// From entity
"id": "550e8400-e29b-41d4-a716-446655440000",
"name": "John Doe",
"type": "person",
// From user
"password": "securepass123",
// From person
"first_name": "John",
"last_name": "Doe"
});
let result = validate_json_schema("person", jsonb(valid_person_with_all_properties));
assert_success(&result);
// Test that properties validate according to their schema definitions across the chain
let invalid_mixed_properties = json!({
"id": "550e8400-e29b-41d4-a716-446655440000",
"name": "John Doe",
"type": "person",
"password": "short", // Too short from user schema
"first_name": "", // Empty string violates person schema minLength
"last_name": "Doe"
});
let result_invalid = validate_json_schema("person", jsonb(invalid_mixed_properties));
assert_error_count(&result_invalid, 2);
assert_has_error(&result_invalid, "MIN_LENGTH_VIOLATED", "/password");
assert_has_error(&result_invalid, "MIN_LENGTH_VIOLATED", "/first_name");
}
#[pg_test]
fn test_validate_required_merging() {
let cache_result = required_merging_schemas();
assert_success(&cache_result);
// Test that required fields are merged from inheritance chain:
// entity: ["id", "type", "created_by"]
// user: ["password"] (conditional when type=user)
// person: ["first_name", "last_name"] (conditional when type=person)
let missing_all_required = json!({ "type": "person" });
let result = validate_json_schema("person", jsonb(missing_all_required));
// Should fail for all required fields across inheritance chain
assert_error_count(&result, 4); // id, created_by, first_name, last_name
assert_has_error(&result, "REQUIRED_FIELD_MISSING", "/id");
assert_has_error(&result, "REQUIRED_FIELD_MISSING", "/created_by");
assert_has_error(&result, "REQUIRED_FIELD_MISSING", "/first_name");
assert_has_error(&result, "REQUIRED_FIELD_MISSING", "/last_name");
// Test conditional requirements work through inheritance
let with_person_type = json!({
"id": "550e8400-e29b-41d4-a716-446655440000",
"type": "person",
"created_by": "550e8400-e29b-41d4-a716-446655440001"
// Missing password (required when type=user, which person inherits from)
// Missing first_name, last_name (required when type=person)
});
let result_conditional = validate_json_schema("person", jsonb(with_person_type));
assert_error_count(&result_conditional, 2); // first_name, last_name
assert_has_error(&result_conditional, "REQUIRED_FIELD_MISSING", "/first_name");
assert_has_error(&result_conditional, "REQUIRED_FIELD_MISSING", "/last_name");
}
#[pg_test]
fn test_validate_dependencies_merging() {
let cache_result = dependencies_merging_schemas();
assert_success(&cache_result);
// Test dependencies are merged across inheritance:
// user: creating -> ["name"]
// person: creating -> ["first_name", "last_name"]
let with_creating_missing_deps = json!({
"id": "550e8400-e29b-41d4-a716-446655440000",
"type": "person",
"created_by": "550e8400-e29b-41d4-a716-446655440001",
"creating": true,
"password": "securepass"
// Missing name (from user dependency)
// Missing first_name, last_name (from person dependency)
});
let result = validate_json_schema("person", jsonb(with_creating_missing_deps));
assert_error_count(&result, 3); // name, first_name, last_name
assert_has_error(&result, "DEPENDENCY_FAILED", "/name");
assert_has_error(&result, "DEPENDENCY_FAILED", "/first_name");
assert_has_error(&result, "DEPENDENCY_FAILED", "/last_name");
// Test partial dependency satisfaction
let with_some_deps = json!({
"id": "550e8400-e29b-41d4-a716-446655440000",
"type": "person",
"created_by": "550e8400-e29b-41d4-a716-446655440001",
"creating": true,
"password": "securepass",
"name": "John Doe",
"first_name": "John"
// Missing last_name from person dependency
});
let result_partial = validate_json_schema("person", jsonb(with_some_deps));
assert_error_count(&result_partial, 1);
assert_has_error(&result_partial, "DEPENDENCY_FAILED", "/last_name");
}
#[pg_test]
fn test_validate_punc_with_refs() {
let cache_result = punc_with_refs_schemas();
assert_success(&cache_result);
// Test 1: Public punc is strict - no extra properties allowed at root level
let public_root_extra = json!({
"type": "person",
"id": "550e8400-e29b-41d4-a716-446655440000",
"name": "John Doe",
"first_name": "John",
"last_name": "Doe",
"extra_field": "not allowed at root", // Should fail in public punc
"another_extra": 123 // Should also fail in public punc
});
let result_public_root = validate_json_schema("public_ref_test.request", jsonb(public_root_extra));
assert_error_count(&result_public_root, 2);
assert_has_error(&result_public_root, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/extra_field");
assert_has_error(&result_public_root, "ADDITIONAL_PROPERTIES_NOT_ALLOWED", "/another_extra");
// Test 2: Private punc allows extra properties at root level
let private_root_extra = json!({
"type": "person",
"id": "550e8400-e29b-41d4-a716-446655440000",
"name": "John Doe",
"first_name": "John",
"last_name": "Doe",
"extra_field": "allowed at root in private punc", // Should pass in private punc
"another_extra": 123 // Should also pass in private punc
});
let result_private_root = validate_json_schema("private_ref_test.request", jsonb(private_root_extra));
assert_success(&result_private_root); // Should pass with extra properties at root
// Test 3: Valid data with address should pass for both
let valid_data_with_address = json!({
"type": "person",
"id": "550e8400-e29b-41d4-a716-446655440000",
"name": "John Doe",
"first_name": "John",
"last_name": "Doe",
"address": {
"street": "123 Main St",
"city": "Boston"
}
});
let result_public_valid = validate_json_schema("public_ref_test.request", jsonb(valid_data_with_address.clone()));
assert_success(&result_public_valid);
let result_private_valid = validate_json_schema("private_ref_test.request", jsonb(valid_data_with_address));
assert_success(&result_private_valid);
}
#[pg_test]
fn test_validate_enum_schema() {
let cache_result = enum_schemas();
assert_success(&cache_result);
// Test valid enum value
let valid_priority = json!({
"priority": "high"
});
let result = validate_json_schema("enum_ref_test.request", jsonb(valid_priority));
assert_success(&result);
// Test invalid enum value for priority (required field)
let invalid_priority = json!({
"priority": "critical" // Invalid - not in task_priority enum
});
let result_priority = validate_json_schema("enum_ref_test.request", jsonb(invalid_priority));
assert_error_count(&result_priority, 1);
assert_has_error(&result_priority, "ENUM_VIOLATED", "/priority");
// Test missing required enum field
let missing_priority = json!({});
let result_missing = validate_json_schema("enum_ref_test.request", jsonb(missing_priority));
assert_error_count(&result_missing, 1);
assert_has_error(&result_missing, "REQUIRED_FIELD_MISSING", "/priority");
}
#[pg_test]
fn test_validate_punc_local_refs() {
let cache_result = punc_local_refs_schemas();
assert_success(&cache_result);
// Test 1: Punc request referencing a schema defined locally within the punc
let valid_local_ref = json!({
"type": "local_address",
"street": "123 Main St",
"city": "Anytown"
});
let result_valid_local = validate_json_schema("punc_with_local_ref_test.request", jsonb(valid_local_ref));
assert_success(&result_valid_local);
let invalid_local_ref = json!({
"type": "local_address",
"street": "123 Main St" // Missing city
});
let result_invalid_local = validate_json_schema("punc_with_local_ref_test.request", jsonb(invalid_local_ref));
assert_error_count(&result_invalid_local, 1);
assert_has_error(&result_invalid_local, "REQUIRED_FIELD_MISSING", "/city");
// Test 2: Punc with a local schema that references a global type schema
let valid_global_ref = json!({
"type": "local_user_with_thing",
"user_name": "Alice",
"thing": {
"type": "global_thing",
"id": "550e8400-e29b-41d4-a716-446655440000"
}
});
let result_valid_global = validate_json_schema("punc_with_local_ref_to_global_test.request", jsonb(valid_global_ref));
assert_success(&result_valid_global);
let invalid_global_ref = json!({
"type": "local_user_with_thing",
"user_name": "Bob",
"thing": {
"type": "global_thing",
"id": "not-a-uuid" // Invalid format for global_thing's id
}
});
let result_invalid_global = validate_json_schema("punc_with_local_ref_to_global_test.request", jsonb(invalid_global_ref));
assert_error_count(&result_invalid_global, 1);
assert_has_error(&result_invalid_global, "FORMAT_INVALID", "/thing/id");
}
#[pg_test]
fn test_validate_title_override() {
let cache_result = title_override_schemas();
assert_success(&cache_result);
// Test that a schema with an overridden title still inherits validation keywords correctly.
// This instance is valid because it provides the 'name' required by the base schema.
let valid_instance = json!({ "type": "override_with_title", "name": "Test Name" });
let result_valid = validate_json_schema("override_with_title", jsonb(valid_instance));
assert_success(&result_valid);
// This instance is invalid because it's missing the 'name' required by the base schema.
// This proves that validation keywords are inherited even when metadata keywords are overridden.
let invalid_instance = json!({ "type": "override_with_title" });
let result_invalid = validate_json_schema("override_with_title", jsonb(invalid_instance));
assert_error_count(&result_invalid, 1);
assert_has_error(&result_invalid, "REQUIRED_FIELD_MISSING", "/name");
}
#[pg_test]
fn test_validate_type_matching() {
let cache_result = type_matching_schemas();
assert_success(&cache_result);
// 1. Test 'job' which extends 'entity'
let valid_job = json!({
"type": "job",
"name": "my job",
"job_id": "job123"
});
let result_valid_job = validate_json_schema("job", jsonb(valid_job));
assert_success(&result_valid_job);
let invalid_job = json!({
"type": "not_job",
"name": "my job",
"job_id": "job123"
});
let result_invalid_job = validate_json_schema("job", jsonb(invalid_job));
assert_error_count(&result_invalid_job, 1);
assert_has_error(&result_invalid_job, "TYPE_MISMATCH", "/type");
// 2. Test 'super_job' which extends 'job'
let valid_super_job = json!({
"type": "super_job",
"name": "my super job",
"job_id": "job123",
"manager_id": "mgr1"
});
let result_valid_super_job = validate_json_schema("super_job", jsonb(valid_super_job));
assert_success(&result_valid_super_job);
// 3. Test 'super_job.short' which should still expect type 'super_job'
let valid_short_super_job = json!({
"type": "super_job",
"name": "short", // maxLength: 10
"job_id": "job123",
"manager_id": "mgr1"
});
let result_valid_short = validate_json_schema("super_job.short", jsonb(valid_short_super_job));
assert_success(&result_valid_short);
let invalid_short_super_job = json!({
"type": "job", // Should be 'super_job'
"name": "short",
"job_id": "job123",
"manager_id": "mgr1"
});
let result_invalid_short = validate_json_schema("super_job.short", jsonb(invalid_short_super_job));
assert_error_count(&result_invalid_short, 1);
let error = find_error_with_code_and_path(&result_invalid_short, "TYPE_MISMATCH", "/type");
assert_error_message_contains(error, "Instance type 'job' does not match expected type 'super_job'");
// 4. Test punc with root, nested, and oneOf type refs
let valid_punc_instance = json!({
"root_job": {
"type": "job",
"name": "root job",
"job_id": "job456"
},
"nested_or_super_job": {
"type": "super_job",
"name": "nested super job",
"job_id": "job789",
"manager_id": "mgr2"
}
});
let result_valid_punc = validate_json_schema("type_test_punc.request", jsonb(valid_punc_instance));
assert_success(&result_valid_punc);
// 5. Test invalid type at punc root ref
let invalid_punc_root = json!({
"root_job": {
"type": "entity", // Should be "job"
"name": "root job",
"job_id": "job456"
},
"nested_or_super_job": {
"type": "super_job",
"name": "nested super job",
"job_id": "job789",
"manager_id": "mgr2"
}
});
let result_invalid_punc_root = validate_json_schema("type_test_punc.request", jsonb(invalid_punc_root));
assert_error_count(&result_invalid_punc_root, 1);
assert_has_error(&result_invalid_punc_root, "TYPE_MISMATCH", "/root_job/type");
// 6. Test invalid type at punc nested ref
let invalid_punc_nested = json!({
"root_job": {
"type": "job",
"name": "root job",
"job_id": "job456"
},
"nested_or_super_job": {
"my_job": {
"type": "entity", // Should be "job"
"name": "nested job",
"job_id": "job789"
}
}
});
let result_invalid_punc_nested = validate_json_schema("type_test_punc.request", jsonb(invalid_punc_nested));
assert_error_count(&result_invalid_punc_nested, 1);
assert_has_error(&result_invalid_punc_nested, "TYPE_MISMATCH", "/nested_or_super_job/my_job/type");
// 7. Test invalid type at punc oneOf ref
let invalid_punc_oneof = json!({
"root_job": {
"type": "job",
"name": "root job",
"job_id": "job456"
},
"nested_or_super_job": {
"type": "job", // Should be "super_job"
"name": "nested super job",
"job_id": "job789",
"manager_id": "mgr2"
}
});
let result_invalid_punc_oneof = validate_json_schema("type_test_punc.request", jsonb(invalid_punc_oneof));
// This will have multiple errors because the invalid oneOf branch will also fail the other branch's validation
assert_has_error(&result_invalid_punc_oneof, "TYPE_MISMATCH", "/nested_or_super_job/type");
}

81
validator/CHANGELOG.md Normal file
View File

@ -0,0 +1,81 @@
# Changelog
## [Unreleased]
### Bug Fixes
- validator: ensure `uneval` state is propagated when `$ref` validation fails
## [0.6.1] - 2025-01-07
### Bug Fixes
- fix: FileLoader should not be used in wasm
## [0.6.0] - 2024-05-30
### Braking Changes
- loader: Allow to replace entirely
### Bug Fixes
- seperate doc loading from root creation
- validator: if contentEncoding fails, skip contentMediaType
- loader: should load latest from metaschemas dir
- fix: hash for json numbers with zero fractions
- fix: resources/anchors in non-std schema loc not supported
### Changes
- boon binary artificats under github release
- boon binary `--cacert` option
- boon binary `--insecure` flag
## [0.5.3] - 2024-01-27
### Changes
- updated dependencies
## [0.5.2] - 2024-01-27
### Bug Fixes
- Error message for failed const validation is wrong
## [0.5.1] - 2023-07-13
### Changes
- WASM compatibility
- minor performance improvements
## [0.5.0] - 2023-03-29
### Breaking Changes
- chages to error api
### Performance
- minor improvements in validation
## [0.4.0] - 2023-03-24
### Breaking Changes
- chages to error api
### Fixed
- Compler.add_resource should not check file exists
### Added
- implement `contentSchema` keyword
- ECMA-262 regex compatibility
- add example_custom_content_encoding
- add example_custom_content_media_type
### Performance
- significant improvement in validation
## [0.3.1] - 2023-03-07
### Added
- add example_from_yaml_files
- cli: support yaml files
### Fixed
- ensure fragment decoded before use
- $dynamicRef w/o anchor is same as $ref

1441
validator/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

39
validator/Cargo.toml Normal file
View File

@ -0,0 +1,39 @@
[package]
name = "boon"
version = "0.6.1"
edition = "2021"
description = "JSONSchema (draft 2020-12, draft 2019-09, draft-7, draft-6, draft-4) Validation"
readme = "README.md"
repository = "https://github.com/santhosh-tekuri/boon"
authors = ["santhosh kumar tekuri <santhosh.tekuri@gmail.com>"]
keywords = ["jsonschema", "validation"]
license = "MIT OR Apache-2.0"
categories = ["web-programming"]
exclude = [ "tests", ".github", ".gitmodules" ]
[dependencies]
pgrx = "0.15.0"
serde = "1"
serde_json = "1"
regex = "1.10.3"
regex-syntax = "0.8.2"
url = "2"
fluent-uri = "0.3.2"
idna = "1.0"
percent-encoding = "2"
once_cell = "1"
base64 = "0.22"
ahash = "0.8.3"
appendlist = "1.4"
[dev-dependencies]
pgrx-tests = "0.15.0"
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
ureq = "2.12"
rustls = "0.23"
criterion = "0.5"
[[bench]]
name = "bench"
harness = false

177
validator/LICENSE-APACHE Normal file
View File

@ -0,0 +1,177 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS

18
validator/LICENSE-MIT Normal file
View File

@ -0,0 +1,18 @@
Copyright 2023 Santhosh Kumar Tekuri
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the “Software”), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

88
validator/README.md Normal file
View File

@ -0,0 +1,88 @@
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Crates.io](https://img.shields.io/crates/v/boon.svg)](https://crates.io/crates/boon)
[![docs.rs](https://docs.rs/boon/badge.svg)](https://docs.rs/boon/)
[![Build Status](https://github.com/santhosh-tekuri/boon/actions/workflows/rust.yml/badge.svg?branch=main)](https://github.com/santhosh-tekuri/boon/actions/workflows/rust.yml)
[![codecov](https://codecov.io/gh/santhosh-tekuri/boon/branch/main/graph/badge.svg?token=A2YC4A0BLG)](https://codecov.io/gh/santhosh-tekuri/boon)
[![dependency status](https://deps.rs/repo/github/Santhosh-tekuri/boon/status.svg?refresh)](https://deps.rs/repo/github/Santhosh-tekuri/boon)
[Examples](https://github.com/santhosh-tekuri/boon/blob/main/tests/examples.rs)
[Changelog](https://github.com/santhosh-tekuri/boon/blob/main/CHANGELOG.md)
## Library Features
- [x] pass [JSON-Schema-Test-Suite](https://github.com/json-schema-org/JSON-Schema-Test-Suite) excluding optional(compare with other impls at [bowtie](https://bowtie-json-schema.github.io/bowtie/#))
- [x] [![draft-04](https://img.shields.io/endpoint?url=https://bowtie.report/badges/rust-boon/compliance/draft4.json)](https://bowtie.report/#/dialects/draft4)
- [x] [![draft-06](https://img.shields.io/endpoint?url=https://bowtie.report/badges/rust-boon/compliance/draft6.json)](https://bowtie.report/#/dialects/draft6)
- [x] [![draft-07](https://img.shields.io/endpoint?url=https://bowtie.report/badges/rust-boon/compliance/draft7.json)](https://bowtie.report/#/dialects/draft7)
- [x] [![draft/2019-09](https://img.shields.io/endpoint?url=https://bowtie.report/badges/rust-boon/compliance/draft2019-09.json)](https://bowtie.report/#/dialects/draft2019-09)
- [x] [![draft/2020-12](https://img.shields.io/endpoint?url=https://bowtie.report/badges/rust-boon/compliance/draft2020-12.json)](https://bowtie.report/#/dialects/draft2020-12)
- [x] detect infinite loop traps
- [x] `$schema` cycle
- [x] validation cycle
- [x] custom `$schema` url
- [x] vocabulary based validation
- [x] ECMA-262 regex compatibility (pass tests from `optional/ecmascript-regex.json`)
- [x] format assertions
- [x] flag to enable in draft >= 2019-09
- [x] custom format registration
- [x] built-in formats
- [x] regex, uuid
- [x] ipv4, ipv6
- [x] hostname, email
- [x] idn-hostname, idn-email
- [x] date, time, date-time, duration
- [x] json-pointer, relative-json-pointer
- [x] uri, uri-reference, uri-template
- [x] iri, iri-reference
- [x] period
- [x] content assertions
- [x] flag to enable in draft >= 7
- [x] contentEncoding
- [x] base64
- [x] custom
- [x] contentMediaType
- [x] application/json
- [x] custom
- [x] contentSchema
- [x] errors
- [x] introspectable
- [x] hierarchy
- [x] alternative display with `#`
- [x] output
- [x] flag
- [x] basic
- [x] detailed
- [ ] custom vocabulary
## CLI
to install: `cargo install boon-cli --locked`
or download it from [releases](https://github.com/santhosh-tekuri/boon/releases)
```
Usage: boon [OPTIONS] SCHEMA [INSTANCE...]
Options:
-h, --help Print help information
-q, --quiet Do not print errors
-d, --draft <VER> Draft used when '$schema' is missing. Valid values 4,
6, 7, 2019, 2020 (default 2020)
-o, --output <FMT> Output format. Valid values simple, alt, flag, basic,
detailed (default simple)
-f, --assert-format
Enable format assertions with draft >= 2019
-c, --assert-content
Enable content assertions with draft >= 7
--cacert <FILE> Use the specified PEM certificate file to verify the
peer. The file may contain multiple CA certificates
-k, --insecure Use insecure TLS connection
```
This cli can validate both schema and multiple instances.
It support both json and yaml files
exit code is:
- `1` if command line arguments are invalid.
- `2` if there are errors

View File

@ -0,0 +1,26 @@
use std::{env, fs::File};
use boon::{Compiler, Schemas};
use criterion::{criterion_group, criterion_main, Criterion};
use serde_json::Value;
pub fn validate(c: &mut Criterion) {
let (Ok(schema), Ok(instance)) = (env::var("SCHEMA"), env::var("INSTANCE")) else {
panic!("SCHEMA, INSTANCE environment variables not set");
};
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.enable_format_assertions();
let sch = compiler.compile(&schema, &mut schemas).unwrap();
let rdr = File::open(&instance).unwrap();
let inst: Value = if instance.ends_with(".yaml") || instance.ends_with(".yml") {
serde_yaml::from_reader(rdr).unwrap()
} else {
serde_json::from_reader(rdr).unwrap()
};
c.bench_function("boon", |b| b.iter(|| schemas.validate(&inst, sch).unwrap()));
}
criterion_group!(benches, validate);
criterion_main!(benches);

1156
validator/cli/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

25
validator/cli/Cargo.toml Normal file
View File

@ -0,0 +1,25 @@
[package]
name = "boon-cli"
version = "0.6.2"
edition = "2021"
description = "cli for JSONSchema (draft 2020-12, draft 2019-09, draft-7, draft-6, draft-4) Validation"
repository = "https://github.com/santhosh-tekuri/boon/cli"
authors = ["santhosh kumar tekuri <santhosh.tekuri@gmail.com>"]
keywords = ["jsonschema", "validation"]
categories = ["web-programming"]
license = "MIT OR Apache-2.0"
[dependencies]
boon = { version = "0.6.1", path = ".."}
url = "2"
getopts = "0.2"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_yaml = "0.9"
ureq = "2.12"
rustls = { version = "0.23", features = ["ring"] }
rustls-pemfile = "2.1"
[[bin]]
name = "boon"
path = "src/main.rs"

316
validator/cli/src/main.rs Normal file
View File

@ -0,0 +1,316 @@
use core::panic;
use std::{env, error::Error, fs::File, io::BufReader, process, str::FromStr, sync::Arc};
use boon::{Compiler, Draft, Schemas, SchemeUrlLoader, UrlLoader};
use getopts::Options;
use rustls::client::danger::{HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier};
use serde_json::Value;
use ureq::Agent;
use url::Url;
fn main() {
let opts = options();
let matches = match opts.parse(env::args().skip(1)) {
Ok(m) => m,
Err(f) => {
eprintln!("{f}");
eprintln!();
eprintln!("{}", opts.usage(BRIEF));
process::exit(1)
}
};
if matches.opt_present("version") {
println!("{}", env!("CARGO_PKG_VERSION"));
process::exit(0);
}
if matches.opt_present("help") {
println!("{}", opts.usage(BRIEF));
process::exit(0);
}
// draft --
let mut draft = Draft::default();
if let Some(v) = matches.opt_str("draft") {
let Ok(v) = usize::from_str(&v) else {
eprintln!("invalid draft: {v}");
eprintln!();
eprintln!("{}", opts.usage(BRIEF));
process::exit(1);
};
draft = match v {
4 => Draft::V4,
6 => Draft::V6,
7 => Draft::V7,
2019 => Draft::V2019_09,
2020 => Draft::V2020_12,
_ => {
eprintln!("invalid draft: {v}");
eprintln!();
eprintln!("{}", opts.usage(BRIEF));
process::exit(1);
}
};
}
// output --
let output = matches.opt_str("output");
if let Some(o) = &output {
if !matches!(o.as_str(), "simple" | "alt" | "flag" | "basic" | "detailed") {
eprintln!("invalid output: {o}");
eprintln!();
eprintln!("{}", opts.usage(BRIEF));
process::exit(1);
}
}
// flags --
let quiet = matches.opt_present("quiet");
let assert_format = matches.opt_present("assert-format");
let assert_content = matches.opt_present("assert-content");
let insecure = matches.opt_present("insecure");
// schema --
let Some(schema) = matches.free.first() else {
eprintln!("missing SCHEMA");
eprintln!();
eprintln!("{}", opts.usage(BRIEF));
process::exit(1);
};
// compile --
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
let mut loader = SchemeUrlLoader::new();
loader.register("file", Box::new(FileUrlLoader));
let cacert = matches.opt_str("cacert");
let cacert = cacert.as_deref();
loader.register("http", Box::new(HttpUrlLoader::new(cacert, insecure)));
loader.register("https", Box::new(HttpUrlLoader::new(cacert, insecure)));
compiler.use_loader(Box::new(loader));
compiler.set_default_draft(draft);
if assert_format {
compiler.enable_format_assertions();
}
if assert_content {
compiler.enable_content_assertions();
}
let sch = match compiler.compile(schema, &mut schemas) {
Ok(sch) => {
println!("schema {schema}: ok");
sch
}
Err(e) => {
println!("schema {schema}: failed");
if !quiet {
println!("{e:#}");
}
process::exit(2);
}
};
// validate --
let mut all_valid = true;
for instance in &matches.free[1..] {
if !quiet {
println!();
}
let rdr = match File::open(instance) {
Ok(rdr) => BufReader::new(rdr),
Err(e) => {
println!("instance {instance}: failed");
if !quiet {
println!("error reading file {instance}: {e}");
}
all_valid = false;
continue;
}
};
let value: Result<Value, String> =
if instance.ends_with(".yaml") || instance.ends_with(".yml") {
serde_yaml::from_reader(rdr).map_err(|e| e.to_string())
} else {
serde_json::from_reader(rdr).map_err(|e| e.to_string())
};
let value = match value {
Ok(v) => v,
Err(e) => {
println!("instance {instance}: failed");
if !quiet {
println!("error parsing file {instance}: {e}");
}
all_valid = false;
continue;
}
};
match schemas.validate(&value, sch) {
Ok(_) => println!("instance {instance}: ok"),
Err(e) => {
println!("instance {instance}: failed");
if !quiet {
match &output {
Some(out) => match out.as_str() {
"simple" => println!("{e}"),
"alt" => println!("{e:#}"),
"flag" => println!("{:#}", e.flag_output()),
"basic" => println!("{:#}", e.basic_output()),
"detailed" => println!("{:#}", e.detailed_output()),
_ => (),
},
None => println!("{e}"),
}
}
all_valid = false;
continue;
}
};
}
if !all_valid {
process::exit(2);
}
}
const BRIEF: &str = "Usage: boon [OPTIONS] SCHEMA [INSTANCE...]";
fn options() -> Options {
let mut opts = Options::new();
opts.optflag("v", "version", "Print version and exit");
opts.optflag("h", "help", "Print help information");
opts.optflag("q", "quiet", "Do not print errors");
opts.optopt(
"d",
"draft",
"Draft used when '$schema' is missing. Valid values 4, 6, 7, 2019, 2020 (default 2020)",
"<VER>",
);
opts.optopt(
"o",
"output",
"Output format. Valid values simple, alt, flag, basic, detailed (default simple)",
"<FMT>",
);
opts.optflag(
"f",
"assert-format",
"Enable format assertions with draft >= 2019",
);
opts.optflag(
"c",
"assert-content",
"Enable content assertions with draft >= 7",
);
opts.optopt(
"",
"cacert",
"Use the specified PEM certificate file to verify the peer. The file may contain multiple CA certificates",
"<FILE>",
);
opts.optflag("k", "insecure", "Use insecure TLS connection");
opts
}
struct FileUrlLoader;
impl UrlLoader for FileUrlLoader {
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>> {
let url = Url::parse(url)?;
let path = url.to_file_path().map_err(|_| "invalid file path")?;
let file = File::open(&path)?;
if path
.extension()
.filter(|&ext| ext == "yaml" || ext == "yml")
.is_some()
{
Ok(serde_yaml::from_reader(file)?)
} else {
Ok(serde_json::from_reader(file)?)
}
}
}
struct HttpUrlLoader(Agent);
impl HttpUrlLoader {
fn new(cacert: Option<&str>, insecure: bool) -> Self {
let mut builder = ureq::builder();
if let Some(cacert) = cacert {
let file = File::open(cacert).unwrap_or_else(|e| panic!("error opening {cacert}: {e}"));
let certs: Result<Vec<_>, _> =
rustls_pemfile::certs(&mut BufReader::new(file)).collect();
let certs = certs.unwrap_or_else(|e| panic!("error reading cacert: {e}"));
assert!(!certs.is_empty(), "no certs in cacert");
let mut store = rustls::RootCertStore::empty();
for cert in certs {
store
.add(cert)
.unwrap_or_else(|e| panic!("error adding cert: {e}"))
}
let tls_config = rustls::ClientConfig::builder()
.with_root_certificates(store)
.with_no_client_auth();
builder = builder.tls_config(tls_config.into());
} else if insecure {
let tls_config = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(InsecureVerifier))
.with_no_client_auth();
builder = builder.tls_config(tls_config.into());
}
Self(builder.build())
}
}
impl UrlLoader for HttpUrlLoader {
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>> {
let response = self.0.get(url).call()?;
let is_yaml = url.ends_with(".yaml") || url.ends_with(".yml") || {
let ctype = response.content_type();
ctype.ends_with("/yaml") || ctype.ends_with("-yaml")
};
if is_yaml {
Ok(serde_yaml::from_reader(response.into_reader())?)
} else {
Ok(serde_json::from_reader(response.into_reader())?)
}
}
}
#[derive(Debug)]
struct InsecureVerifier;
impl ServerCertVerifier for InsecureVerifier {
fn verify_server_cert(
&self,
_end_entity: &rustls::pki_types::CertificateDer<'_>,
_intermediates: &[rustls::pki_types::CertificateDer<'_>],
_server_name: &rustls::pki_types::ServerName<'_>,
_ocsp_response: &[u8],
_now: rustls::pki_types::UnixTime,
) -> Result<rustls::client::danger::ServerCertVerified, rustls::Error> {
Ok(ServerCertVerified::assertion())
}
fn verify_tls12_signature(
&self,
_message: &[u8],
_cert: &rustls::pki_types::CertificateDer<'_>,
_dss: &rustls::DigitallySignedStruct,
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
Ok(HandshakeSignatureValid::assertion())
}
fn verify_tls13_signature(
&self,
_message: &[u8],
_cert: &rustls::pki_types::CertificateDer<'_>,
_dss: &rustls::DigitallySignedStruct,
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
Ok(HandshakeSignatureValid::assertion())
}
fn supported_verify_schemes(&self) -> Vec<rustls::SignatureScheme> {
rustls::crypto::ring::default_provider()
.signature_verification_algorithms
.supported_schemes()
}
}

985
validator/src/compiler.rs Normal file
View File

@ -0,0 +1,985 @@
use std::{cmp::Ordering, collections::HashMap, error::Error, fmt::Display};
use regex::Regex;
use serde_json::{Map, Value};
use url::Url;
use crate::{content::*, draft::*, ecma, formats::*, root::*, roots::*, util::*, *};
/// Supported draft versions
#[non_exhaustive]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Draft {
/// Draft for `http://json-schema.org/draft-04/schema`
V4,
/// Draft for `http://json-schema.org/draft-06/schema`
V6,
/// Draft for `http://json-schema.org/draft-07/schema`
V7,
/// Draft for `https://json-schema.org/draft/2019-09/schema`
V2019_09,
/// Draft for `https://json-schema.org/draft/2020-12/schema`
V2020_12,
}
impl Draft {
/**
Get [`Draft`] for given `url`
# Arguments
* `url` - accepts both `http` and `https` and ignores any fragments in url
# Examples
```
# use boon::*;
assert_eq!(Draft::from_url("https://json-schema.org/draft/2020-12/schema"), Some(Draft::V2020_12));
assert_eq!(Draft::from_url("http://json-schema.org/draft-07/schema#"), Some(Draft::V7));
```
*/
pub fn from_url(url: &str) -> Option<Draft> {
match crate::draft::Draft::from_url(url) {
Some(draft) => match draft.version {
4 => Some(Draft::V4),
6 => Some(Draft::V6),
7 => Some(Draft::V7),
2019 => Some(Draft::V2019_09),
2020 => Some(Draft::V2020_12),
_ => None,
},
None => None,
}
}
pub(crate) fn internal(&self) -> &'static crate::draft::Draft {
match self {
Draft::V4 => &DRAFT4,
Draft::V6 => &DRAFT6,
Draft::V7 => &DRAFT7,
Draft::V2019_09 => &DRAFT2019,
Draft::V2020_12 => &DRAFT2020,
}
}
}
/// Returns latest draft supported
impl Default for Draft {
fn default() -> Self {
Draft::V2020_12
}
}
/// JsonSchema compiler.
#[derive(Default)]
pub struct Compiler {
roots: Roots,
assert_format: bool,
assert_content: bool,
formats: HashMap<&'static str, Format>,
decoders: HashMap<&'static str, Decoder>,
media_types: HashMap<&'static str, MediaType>,
}
impl Compiler {
pub fn new() -> Self {
Self::default()
}
/**
Overrides the draft used to compile schemas without
explicit `$schema` field.
By default this library uses latest draft supported.
The use of this option is HIGHLY encouraged to ensure
continued correct operation of your schema. The current
default value will not stay the same over time.
*/
pub fn set_default_draft(&mut self, d: Draft) {
self.roots.default_draft = d.internal()
}
/**
Always enable format assertions.
# Default Behavior
- for draft-07 and earlier: enabled
- for draft/2019-09: disabled, unless
metaschema says `format` vocabulary is required
- for draft/2020-12: disabled, unless
metaschema says `format-assertion` vocabulary is required
*/
pub fn enable_format_assertions(&mut self) {
self.assert_format = true;
}
/**
Always enable content assertions.
content assertions include keywords:
- contentEncoding
- contentMediaType
- contentSchema
Default Behavior is always disabled.
*/
pub fn enable_content_assertions(&mut self) {
self.assert_content = true;
}
/// Overrides default [`UrlLoader`] used to load schema resources
pub fn use_loader(&mut self, url_loader: Box<dyn UrlLoader>) {
self.roots.loader.use_loader(url_loader);
}
/**
Registers custom `format`
# Note
- `regex` format cannot be overridden
- format assertions are disabled for draft >= 2019-09.
see [`Compiler::enable_format_assertions`]
*/
pub fn register_format(&mut self, format: Format) {
if format.name != "regex" {
self.formats.insert(format.name, format);
}
}
/**
Registers custom `contentEncoding`
Note that content assertions are disabled by default.
see [`Compiler::enable_content_assertions`]
*/
pub fn register_content_encoding(&mut self, decoder: Decoder) {
self.decoders.insert(decoder.name, decoder);
}
/**
Registers custom `contentMediaType`
Note that content assertions are disabled by default.
see [`Compiler::enable_content_assertions`]
*/
pub fn register_content_media_type(&mut self, media_type: MediaType) {
self.media_types.insert(media_type.name, media_type);
}
/**
Adds schema resource which used later in reference resoltion
If you do not know which schema resources required, then use [`UrlLoader`].
The argument `loc` can be file path or url. any fragment in `loc` is ignored.
# Errors
returns [`CompileError`] if url parsing failed.
*/
pub fn add_resource(&mut self, loc: &str, json: Value) -> Result<(), CompileError> {
let uf = UrlFrag::absolute(loc)?;
self.roots.loader.add_doc(uf.url, json);
Ok(())
}
/**
Compile given `loc` into `target` and return an identifier to the compiled
schema.
the argument `loc` can be file path or url with optional fragment.
examples: `http://example.com/schema.json#/defs/address`,
`samples/schema_file.json#defs/address`
if `loc` is already compiled, it simply returns the same [`SchemaIndex`]
*/
pub fn compile(
&mut self,
loc: &str,
target: &mut Schemas,
) -> Result<SchemaIndex, CompileError> {
let uf = UrlFrag::absolute(loc)?;
// resolve anchor
let up = self.roots.resolve_fragment(uf)?;
let result = self.do_compile(up, target);
if let Err(bug @ CompileError::Bug(_)) = &result {
debug_assert!(false, "{bug}");
}
result
}
fn do_compile(
&mut self,
up: UrlPtr,
target: &mut Schemas,
) -> Result<SchemaIndex, CompileError> {
let mut queue = Queue::new();
let mut compiled = Vec::new();
let index = queue.enqueue_schema(target, up);
if queue.schemas.is_empty() {
// already got compiled
return Ok(index);
}
while queue.schemas.len() > compiled.len() {
let up = &queue.schemas[compiled.len()];
self.roots.ensure_subschema(up)?;
let Some(root) = self.roots.get(&up.url) else {
return Err(CompileError::Bug("or_load didn't add".into()));
};
let doc = self.roots.loader.load(&root.url)?;
let v = up.lookup(doc)?;
let sch = self.compile_value(target, v, &up.clone(), root, &mut queue)?;
compiled.push(sch);
self.roots.insert(&mut queue.roots);
}
target.insert(queue.schemas, compiled);
Ok(index)
}
fn compile_value(
&self,
schemas: &Schemas,
v: &Value,
up: &UrlPtr,
root: &Root,
queue: &mut Queue,
) -> Result<Schema, CompileError> {
let mut s = Schema::new(up.to_string());
s.draft_version = root.draft.version;
// we know it is already in queue, we just want to get its index
let len = queue.schemas.len();
s.idx = queue.enqueue_schema(schemas, up.to_owned());
debug_assert_eq!(queue.schemas.len(), len, "{up} should already be in queue");
s.resource = {
let base = UrlPtr {
url: up.url.clone(),
ptr: root.resource(&up.ptr).ptr.clone(),
};
queue.enqueue_schema(schemas, base)
};
// if resource, enqueue dynamicAnchors for compilation
if s.idx == s.resource && root.draft.version >= 2020 {
let res = root.resource(&up.ptr);
for (anchor, anchor_ptr) in &res.anchors {
if res.dynamic_anchors.contains(anchor) {
let up = UrlPtr {
url: up.url.clone(),
ptr: anchor_ptr.clone(),
};
let danchor_sch = queue.enqueue_schema(schemas, up);
s.dynamic_anchors.insert(anchor.to_string(), danchor_sch);
}
}
}
match v {
Value::Object(obj) => {
if obj.is_empty() {
s.boolean = Some(true);
} else {
ObjCompiler {
c: self,
obj,
up,
schemas,
root,
queue,
}
.compile_obj(&mut s)?;
}
}
Value::Bool(b) => s.boolean = Some(*b),
_ => {}
}
s.all_props_evaluated = s.additional_properties.is_some();
s.all_items_evaluated = if s.draft_version < 2020 {
s.additional_items.is_some() || matches!(s.items, Some(Items::SchemaRef(_)))
} else {
s.items2020.is_some()
};
s.num_items_evaluated = if let Some(Items::SchemaRefs(list)) = &s.items {
list.len()
} else {
s.prefix_items.len()
};
Ok(s)
}
}
struct ObjCompiler<'c, 'v, 'l, 's, 'r, 'q> {
c: &'c Compiler,
obj: &'v Map<String, Value>,
up: &'l UrlPtr,
schemas: &'s Schemas,
root: &'r Root,
queue: &'q mut Queue,
}
// compile supported drafts
impl ObjCompiler<'_, '_, '_, '_, '_, '_> {
fn compile_obj(&mut self, s: &mut Schema) -> Result<(), CompileError> {
self.compile_draft4(s)?;
if self.draft_version() >= 6 {
self.compile_draft6(s)?;
}
if self.draft_version() >= 7 {
self.compile_draft7(s)?;
}
if self.draft_version() >= 2019 {
self.compile_draft2019(s)?;
}
if self.draft_version() >= 2020 {
self.compile_draft2020(s)?;
}
Ok(())
}
fn compile_draft4(&mut self, s: &mut Schema) -> Result<(), CompileError> {
if self.has_vocab("core") {
s.ref_ = self.enqueue_ref("$ref")?;
if s.ref_.is_some() && self.draft_version() < 2019 {
// All other properties in a "$ref" object MUST be ignored
return Ok(());
}
}
if self.has_vocab("applicator") {
s.all_of = self.enqueue_arr("allOf");
s.any_of = self.enqueue_arr("anyOf");
s.one_of = self.enqueue_arr("oneOf");
s.not = self.enqueue_prop("not");
if self.draft_version() < 2020 {
match self.value("items") {
Some(Value::Array(_)) => {
s.items = Some(Items::SchemaRefs(self.enqueue_arr("items")));
s.additional_items = self.enquue_additional("additionalItems");
}
_ => s.items = self.enqueue_prop("items").map(Items::SchemaRef),
}
}
s.properties = self.enqueue_map("properties");
s.pattern_properties = {
let mut v = vec![];
if let Some(Value::Object(obj)) = self.value("patternProperties") {
for pname in obj.keys() {
let ecma =
ecma::convert(pname).map_err(|src| CompileError::InvalidRegex {
url: self.up.format("patternProperties"),
regex: pname.to_owned(),
src,
})?;
let regex =
Regex::new(ecma.as_ref()).map_err(|e| CompileError::InvalidRegex {
url: self.up.format("patternProperties"),
regex: ecma.into_owned(),
src: e.into(),
})?;
let ptr = self.up.ptr.append2("patternProperties", pname);
let sch = self.enqueue_schema(ptr);
v.push((regex, sch));
}
}
v
};
s.additional_properties = self.enquue_additional("additionalProperties");
if let Some(Value::Object(deps)) = self.value("dependencies") {
s.dependencies = deps
.iter()
.filter_map(|(k, v)| {
let v = match v {
Value::Array(_) => Some(Dependency::Props(to_strings(v))),
_ => {
let ptr = self.up.ptr.append2("dependencies", k);
Some(Dependency::SchemaRef(self.enqueue_schema(ptr)))
}
};
v.map(|v| (k.clone(), v))
})
.collect();
}
}
if self.has_vocab("validation") {
match self.value("type") {
Some(Value::String(t)) => {
if let Some(t) = Type::from_str(t) {
s.types.add(t)
}
}
Some(Value::Array(arr)) => {
for t in arr {
if let Value::String(t) = t {
if let Some(t) = Type::from_str(t) {
s.types.add(t)
}
}
}
}
_ => {}
}
if let Some(Value::Array(e)) = self.value("enum") {
let mut types = Types::default();
for item in e {
types.add(Type::of(item));
}
s.enum_ = Some(Enum {
types,
values: e.clone(),
});
}
s.multiple_of = self.num("multipleOf");
s.maximum = self.num("maximum");
if let Some(Value::Bool(exclusive)) = self.value("exclusiveMaximum") {
if *exclusive {
s.exclusive_maximum = s.maximum.take();
}
} else {
s.exclusive_maximum = self.num("exclusiveMaximum");
}
s.minimum = self.num("minimum");
if let Some(Value::Bool(exclusive)) = self.value("exclusiveMinimum") {
if *exclusive {
s.exclusive_minimum = s.minimum.take();
}
} else {
s.exclusive_minimum = self.num("exclusiveMinimum");
}
s.max_length = self.usize("maxLength");
s.min_length = self.usize("minLength");
if let Some(Value::String(p)) = self.value("pattern") {
let p = ecma::convert(p).map_err(CompileError::Bug)?;
s.pattern = Some(Regex::new(p.as_ref()).map_err(|e| CompileError::Bug(e.into()))?);
}
s.max_items = self.usize("maxItems");
s.min_items = self.usize("minItems");
s.unique_items = self.bool("uniqueItems");
s.max_properties = self.usize("maxProperties");
s.min_properties = self.usize("minProperties");
if let Some(req) = self.value("required") {
s.required = to_strings(req);
}
}
// format --
if self.c.assert_format
|| self.has_vocab(match self.draft_version().cmp(&2019) {
Ordering::Less => "core",
Ordering::Equal => "format",
Ordering::Greater => "format-assertion",
})
{
if let Some(Value::String(format)) = self.value("format") {
s.format = self
.c
.formats
.get(format.as_str())
.or_else(|| FORMATS.get(format.as_str()))
.cloned();
}
}
Ok(())
}
fn compile_draft6(&mut self, s: &mut Schema) -> Result<(), CompileError> {
if self.has_vocab("applicator") {
s.contains = self.enqueue_prop("contains");
s.property_names = self.enqueue_prop("propertyNames");
}
if self.has_vocab("validation") {
s.constant = self.value("const").cloned();
}
Ok(())
}
fn compile_draft7(&mut self, s: &mut Schema) -> Result<(), CompileError> {
if self.has_vocab("applicator") {
s.if_ = self.enqueue_prop("if");
if s.if_.is_some() {
if !self.bool_schema("if", false) {
s.then = self.enqueue_prop("then");
}
if !self.bool_schema("if", true) {
s.else_ = self.enqueue_prop("else");
}
}
}
if self.c.assert_content {
if let Some(Value::String(encoding)) = self.value("contentEncoding") {
s.content_encoding = self
.c
.decoders
.get(encoding.as_str())
.or_else(|| DECODERS.get(encoding.as_str()))
.cloned();
}
if let Some(Value::String(media_type)) = self.value("contentMediaType") {
s.content_media_type = self
.c
.media_types
.get(media_type.as_str())
.or_else(|| MEDIA_TYPES.get(media_type.as_str()))
.cloned();
}
}
Ok(())
}
fn compile_draft2019(&mut self, s: &mut Schema) -> Result<(), CompileError> {
if self.has_vocab("core") {
s.recursive_ref = self.enqueue_ref("$recursiveRef")?;
s.recursive_anchor = self.bool("$recursiveAnchor");
}
if self.has_vocab("validation") {
if s.contains.is_some() {
s.max_contains = self.usize("maxContains");
s.min_contains = self.usize("minContains");
}
if let Some(Value::Object(dep_req)) = self.value("dependentRequired") {
for (pname, pvalue) in dep_req {
s.dependent_required
.push((pname.clone(), to_strings(pvalue)));
}
}
}
if self.has_vocab("applicator") {
s.dependent_schemas = self.enqueue_map("dependentSchemas");
}
if self.has_vocab(match self.draft_version() {
2019 => "applicator",
_ => "unevaluated",
}) {
s.unevaluated_items = self.enqueue_prop("unevaluatedItems");
s.unevaluated_properties = self.enqueue_prop("unevaluatedProperties");
}
if self.c.assert_content
&& s.content_media_type
.map(|mt| mt.json_compatible)
.unwrap_or(false)
{
s.content_schema = self.enqueue_prop("contentSchema");
}
Ok(())
}
fn compile_draft2020(&mut self, s: &mut Schema) -> Result<(), CompileError> {
if self.has_vocab("core") {
if let Some(sch) = self.enqueue_ref("$dynamicRef")? {
if let Some(Value::String(dref)) = self.value("$dynamicRef") {
let Ok((_, frag)) = Fragment::split(dref) else {
let loc = self.up.format("$dynamicRef");
return Err(CompileError::ParseAnchorError { loc });
};
let anchor = match frag {
Fragment::Anchor(Anchor(s)) => Some(s),
Fragment::JsonPointer(_) => None,
};
s.dynamic_ref = Some(DynamicRef { sch, anchor });
}
};
if let Some(Value::String(anchor)) = self.value("$dynamicAnchor") {
s.dynamic_anchor = Some(anchor.to_owned());
}
}
if self.has_vocab("applicator") {
s.prefix_items = self.enqueue_arr("prefixItems");
s.items2020 = self.enqueue_prop("items");
}
Ok(())
}
}
// enqueue helpers
impl ObjCompiler<'_, '_, '_, '_, '_, '_> {
fn enqueue_schema(&mut self, ptr: JsonPointer) -> SchemaIndex {
let up = UrlPtr {
url: self.up.url.clone(),
ptr,
};
self.queue.enqueue_schema(self.schemas, up)
}
fn enqueue_prop(&mut self, pname: &'static str) -> Option<SchemaIndex> {
if self.obj.contains_key(pname) {
let ptr = self.up.ptr.append(pname);
Some(self.enqueue_schema(ptr))
} else {
None
}
}
fn enqueue_arr(&mut self, pname: &'static str) -> Vec<SchemaIndex> {
if let Some(Value::Array(arr)) = self.obj.get(pname) {
(0..arr.len())
.map(|i| {
let ptr = self.up.ptr.append2(pname, &i.to_string());
self.enqueue_schema(ptr)
})
.collect()
} else {
Vec::new()
}
}
fn enqueue_map<T>(&mut self, pname: &'static str) -> T
where
T: Default,
T: FromIterator<(String, SchemaIndex)>,
{
if let Some(Value::Object(obj)) = self.obj.get(pname) {
obj.keys()
.map(|k| {
let ptr = self.up.ptr.append2(pname, k);
(k.clone(), self.enqueue_schema(ptr))
})
.collect()
} else {
T::default()
}
}
fn enqueue_ref(&mut self, pname: &str) -> Result<Option<SchemaIndex>, CompileError> {
let Some(Value::String(ref_)) = self.obj.get(pname) else {
return Ok(None);
};
let base_url = self.root.base_url(&self.up.ptr);
let abs_ref = UrlFrag::join(base_url, ref_)?;
if let Some(resolved_ref) = self.root.resolve(&abs_ref)? {
// local ref
return Ok(Some(self.enqueue_schema(resolved_ref.ptr)));
}
// remote ref
let up = self.queue.resolve_anchor(abs_ref, &self.c.roots)?;
Ok(Some(self.queue.enqueue_schema(self.schemas, up)))
}
fn enquue_additional(&mut self, pname: &'static str) -> Option<Additional> {
if let Some(Value::Bool(b)) = self.obj.get(pname) {
Some(Additional::Bool(*b))
} else {
self.enqueue_prop(pname).map(Additional::SchemaRef)
}
}
}
// query helpers
impl<'v> ObjCompiler<'_, 'v, '_, '_, '_, '_> {
fn draft_version(&self) -> usize {
self.root.draft.version
}
fn has_vocab(&self, name: &str) -> bool {
self.root.has_vocab(name)
}
fn value(&self, pname: &str) -> Option<&'v Value> {
self.obj.get(pname)
}
fn bool(&self, pname: &str) -> bool {
matches!(self.obj.get(pname), Some(Value::Bool(true)))
}
fn usize(&self, pname: &str) -> Option<usize> {
let Some(Value::Number(n)) = self.obj.get(pname) else {
return None;
};
if n.is_u64() {
n.as_u64().map(|n| n as usize)
} else {
n.as_f64()
.filter(|n| n.is_sign_positive() && n.fract() == 0.0)
.map(|n| n as usize)
}
}
fn num(&self, pname: &str) -> Option<Number> {
if let Some(Value::Number(n)) = self.obj.get(pname) {
Some(n.clone())
} else {
None
}
}
fn bool_schema(&self, pname: &str, b: bool) -> bool {
if let Some(Value::Bool(v)) = self.obj.get(pname) {
return *v == b;
}
false
}
}
/// Error type for compilation failures.
#[derive(Debug)]
pub enum CompileError {
/// Error in parsing `url`.
ParseUrlError { url: String, src: Box<dyn Error> },
/// Failed loading `url`.
LoadUrlError { url: String, src: Box<dyn Error> },
/// no [`UrlLoader`] registered for the `url`
UnsupportedUrlScheme { url: String },
/// Error in parsing `$schema` url.
InvalidMetaSchemaUrl { url: String, src: Box<dyn Error> },
/// draft `url` is not supported
UnsupportedDraft { url: String },
/// Cycle in resolving `$schema` in `url`.
MetaSchemaCycle { url: String },
/// `url` is not valid against metaschema.
ValidationError {
url: String,
src: ValidationError<'static, 'static>,
},
/// Error in parsing id at `loc`
ParseIdError { loc: String },
/// Error in parsing anchor at `loc`
ParseAnchorError { loc: String },
/// Duplicate id `id` in `url` at `ptr1` and `ptr2`.
DuplicateId {
url: String,
id: String,
ptr1: String,
ptr2: String,
},
/// Duplicate anchor `anchor` in `url` at `ptr1` and `ptr2`.
DuplicateAnchor {
anchor: String,
url: String,
ptr1: String,
ptr2: String,
},
/// Not a valid json pointer.
InvalidJsonPointer(String),
/// JsonPointer evaluated to nothing.
JsonPointerNotFound(String),
/// anchor in `reference` not found in `url`.
AnchorNotFound { url: String, reference: String },
/// Unsupported vocabulary `vocabulary` in `url`.
UnsupportedVocabulary { url: String, vocabulary: String },
/// Invalid Regex `regex` at `url`.
InvalidRegex {
url: String,
regex: String,
src: Box<dyn Error>,
},
/// Encountered bug in compiler implementation. Please report
/// this as an issue for this crate.
Bug(Box<dyn Error>),
}
impl Error for CompileError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::ParseUrlError { src, .. } => Some(src.as_ref()),
Self::LoadUrlError { src, .. } => Some(src.as_ref()),
Self::InvalidMetaSchemaUrl { src, .. } => Some(src.as_ref()),
Self::ValidationError { src, .. } => Some(src),
Self::Bug(src) => Some(src.as_ref()),
_ => None,
}
}
}
impl Display for CompileError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ParseUrlError { url, src } => {
if f.alternate() {
write!(f, "error parsing url {url}: {src}")
} else {
write!(f, "error parsing {url}")
}
}
Self::LoadUrlError { url, src } => {
if f.alternate() {
write!(f, "error loading {url}: {src}")
} else {
write!(f, "error loading {url}")
}
}
Self::UnsupportedUrlScheme { url } => write!(f, "unsupported scheme in {url}"),
Self::InvalidMetaSchemaUrl { url, src } => {
if f.alternate() {
write!(f, "invalid $schema in {url}: {src}")
} else {
write!(f, "invalid $schema in {url}")
}
}
Self::UnsupportedDraft { url } => write!(f, "draft {url} is not supported"),
Self::MetaSchemaCycle { url } => {
write!(f, "cycle in resolving $schema in {url}")
}
Self::ValidationError { url, src } => {
if f.alternate() {
write!(f, "{url} is not valid against metaschema: {src}")
} else {
write!(f, "{url} is not valid against metaschema")
}
}
Self::ParseIdError { loc } => write!(f, "error in parsing id at {loc}"),
Self::ParseAnchorError { loc } => write!(f, "error in parsing anchor at {loc}"),
Self::DuplicateId {
url,
id,
ptr1,
ptr2,
} => write!(f, "duplicate $id {id} in {url} at {ptr1:?} and {ptr2:?}"),
Self::DuplicateAnchor {
anchor,
url,
ptr1,
ptr2,
} => {
write!(
f,
"duplicate anchor {anchor:?} in {url} at {ptr1:?} and {ptr2:?}"
)
}
Self::InvalidJsonPointer(loc) => write!(f, "invalid json-pointer {loc}"),
Self::JsonPointerNotFound(loc) => write!(f, "json-pointer in {loc} not found"),
Self::AnchorNotFound { url, reference } => {
write!(
f,
"anchor in reference {reference} is not found in schema {url}"
)
}
Self::UnsupportedVocabulary { url, vocabulary } => {
write!(f, "unsupported vocabulary {vocabulary} in {url}")
}
Self::InvalidRegex { url, regex, src } => {
if f.alternate() {
write!(f, "invalid regex {} at {url}: {src}", quote(regex))
} else {
write!(f, "invalid regex {} at {url}", quote(regex))
}
}
Self::Bug(src) => {
write!(
f,
"encountered bug in jsonschema compiler. please report: {src}"
)
}
}
}
}
// helpers --
fn to_strings(v: &Value) -> Vec<String> {
if let Value::Array(a) = v {
a.iter()
.filter_map(|t| {
if let Value::String(t) = t {
Some(t.clone())
} else {
None
}
})
.collect()
} else {
vec![]
}
}
pub(crate) struct Queue {
pub(crate) schemas: Vec<UrlPtr>,
pub(crate) roots: HashMap<Url, Root>,
}
impl Queue {
fn new() -> Self {
Self {
schemas: vec![],
roots: HashMap::new(),
}
}
pub(crate) fn resolve_anchor(
&mut self,
uf: UrlFrag,
roots: &Roots,
) -> Result<UrlPtr, CompileError> {
match uf.frag {
Fragment::JsonPointer(ptr) => Ok(UrlPtr { url: uf.url, ptr }),
Fragment::Anchor(_) => {
let root = match roots.get(&uf.url).or_else(|| self.roots.get(&uf.url)) {
Some(root) => root,
None => {
let doc = roots.loader.load(&uf.url)?;
let r = roots.create_root(uf.url.clone(), doc)?;
self.roots.entry(uf.url).or_insert(r)
}
};
root.resolve_fragment(&uf.frag)
}
}
}
pub(crate) fn enqueue_schema(&mut self, schemas: &Schemas, up: UrlPtr) -> SchemaIndex {
if let Some(sch) = schemas.get_by_loc(&up) {
// already got compiled
return sch.idx;
}
if let Some(qindex) = self.schemas.iter().position(|e| *e == up) {
// already queued for compilation
return SchemaIndex(schemas.size() + qindex);
}
// new compilation request
self.schemas.push(up);
SchemaIndex(schemas.size() + self.schemas.len() - 1)
}
}

82
validator/src/content.rs Normal file
View File

@ -0,0 +1,82 @@
use std::{collections::HashMap, error::Error};
use base64::Engine;
use once_cell::sync::Lazy;
use serde::de::IgnoredAny;
use serde_json::Value;
// decoders --
/// Defines Decoder for `contentEncoding`.
#[derive(Clone, Copy)]
pub struct Decoder {
/// Name of the encoding
pub name: &'static str,
/// Decodes given string to bytes
#[allow(clippy::type_complexity)]
pub func: fn(s: &str) -> Result<Vec<u8>, Box<dyn Error>>,
}
pub(crate) static DECODERS: Lazy<HashMap<&'static str, Decoder>> = Lazy::new(|| {
let mut m = HashMap::<&'static str, Decoder>::new();
m.insert(
"base64",
Decoder {
name: "base64",
func: decode_base64,
},
);
m
});
fn decode_base64(s: &str) -> Result<Vec<u8>, Box<dyn Error>> {
Ok(base64::engine::general_purpose::STANDARD.decode(s)?)
}
// mediatypes --
/// Defines Mediatype for `contentMediaType`.
#[derive(Clone, Copy)]
pub struct MediaType {
/// Name of this media-type as defined in RFC 2046.
/// Example: `application/json`
pub name: &'static str,
/// whether this media type can be deserialized to json. If so it can
/// be validated by `contentSchema` keyword.
pub json_compatible: bool,
/**
Check whether `bytes` conforms to this media-type.
Should return `Ok(Some(Value))` if `deserialize` is `true`, otherwise it can return `Ok(None)`.
Ideally you could deserialize to `serde::de::IgnoredAny` if `deserialize` is `false` to gain
some performance.
`deserialize` is always `false` if `json_compatible` is `false`.
*/
#[allow(clippy::type_complexity)]
pub func: fn(bytes: &[u8], deserialize: bool) -> Result<Option<Value>, Box<dyn Error>>,
}
pub(crate) static MEDIA_TYPES: Lazy<HashMap<&'static str, MediaType>> = Lazy::new(|| {
let mut m = HashMap::<&'static str, MediaType>::new();
m.insert(
"application/json",
MediaType {
name: "application/json",
json_compatible: true,
func: check_json,
},
);
m
});
fn check_json(bytes: &[u8], deserialize: bool) -> Result<Option<Value>, Box<dyn Error>> {
if deserialize {
return Ok(Some(serde_json::from_slice(bytes)?));
}
serde_json::from_slice::<IgnoredAny>(bytes)?;
Ok(None)
}

576
validator/src/draft.rs Normal file
View File

@ -0,0 +1,576 @@
use std::{
collections::{hash_map::Entry, HashMap},
str::FromStr,
};
use once_cell::sync::Lazy;
use serde_json::{Map, Value};
use url::Url;
use crate::{compiler::*, root::Resource, util::*, SchemaIndex, Schemas};
const POS_SELF: u8 = 1 << 0;
const POS_PROP: u8 = 1 << 1;
const POS_ITEM: u8 = 1 << 2;
pub(crate) static DRAFT4: Lazy<Draft> = Lazy::new(|| Draft {
version: 4,
id: "id",
url: "http://json-schema.org/draft-04/schema",
subschemas: HashMap::from([
// type agnostic
("definitions", POS_PROP),
("not", POS_SELF),
("allOf", POS_ITEM),
("anyOf", POS_ITEM),
("oneOf", POS_ITEM),
// object
("properties", POS_PROP),
("additionalProperties", POS_SELF),
("patternProperties", POS_PROP),
// array
("items", POS_SELF | POS_ITEM),
("additionalItems", POS_SELF),
("dependencies", POS_PROP),
]),
vocab_prefix: "",
all_vocabs: vec![],
default_vocabs: vec![],
});
pub(crate) static DRAFT6: Lazy<Draft> = Lazy::new(|| {
let mut subschemas = DRAFT4.subschemas.clone();
subschemas.extend([("propertyNames", POS_SELF), ("contains", POS_SELF)]);
Draft {
version: 6,
id: "$id",
url: "http://json-schema.org/draft-06/schema",
subschemas,
vocab_prefix: "",
all_vocabs: vec![],
default_vocabs: vec![],
}
});
pub(crate) static DRAFT7: Lazy<Draft> = Lazy::new(|| {
let mut subschemas = DRAFT6.subschemas.clone();
subschemas.extend([("if", POS_SELF), ("then", POS_SELF), ("else", POS_SELF)]);
Draft {
version: 7,
id: "$id",
url: "http://json-schema.org/draft-07/schema",
subschemas,
vocab_prefix: "",
all_vocabs: vec![],
default_vocabs: vec![],
}
});
pub(crate) static DRAFT2019: Lazy<Draft> = Lazy::new(|| {
let mut subschemas = DRAFT7.subschemas.clone();
subschemas.extend([
("$defs", POS_PROP),
("dependentSchemas", POS_PROP),
("unevaluatedProperties", POS_SELF),
("unevaluatedItems", POS_SELF),
("contentSchema", POS_SELF),
]);
Draft {
version: 2019,
id: "$id",
url: "https://json-schema.org/draft/2019-09/schema",
subschemas,
vocab_prefix: "https://json-schema.org/draft/2019-09/vocab/",
all_vocabs: vec![
"core",
"applicator",
"validation",
"meta-data",
"format",
"content",
],
default_vocabs: vec!["core", "applicator", "validation"],
}
});
pub(crate) static DRAFT2020: Lazy<Draft> = Lazy::new(|| {
let mut subschemas = DRAFT2019.subschemas.clone();
subschemas.extend([("prefixItems", POS_ITEM)]);
Draft {
version: 2020,
id: "$id",
url: "https://json-schema.org/draft/2020-12/schema",
subschemas,
vocab_prefix: "https://json-schema.org/draft/2020-12/vocab/",
all_vocabs: vec![
"core",
"applicator",
"unevaluated",
"validation",
"meta-data",
"format-annotation",
"format-assertion",
"content",
],
default_vocabs: vec!["core", "applicator", "unevaluated", "validation"],
}
});
pub(crate) static STD_METASCHEMAS: Lazy<Schemas> =
Lazy::new(|| load_std_metaschemas().expect("std metaschemas must be compilable"));
pub(crate) fn latest() -> &'static Draft {
crate::Draft::default().internal()
}
// --
pub(crate) struct Draft {
pub(crate) version: usize,
pub(crate) url: &'static str,
id: &'static str, // property name used to represent id
subschemas: HashMap<&'static str, u8>, // location of subschemas
pub(crate) vocab_prefix: &'static str, // prefix used for vocabulary
pub(crate) all_vocabs: Vec<&'static str>, // names of supported vocabs
pub(crate) default_vocabs: Vec<&'static str>, // names of default vocabs
}
impl Draft {
pub(crate) fn from_url(url: &str) -> Option<&'static Draft> {
let (mut url, frag) = split(url);
if !frag.is_empty() {
return None;
}
if let Some(s) = url.strip_prefix("http://") {
url = s;
}
if let Some(s) = url.strip_prefix("https://") {
url = s;
}
match url {
"json-schema.org/schema" => Some(latest()),
"json-schema.org/draft/2020-12/schema" => Some(&DRAFT2020),
"json-schema.org/draft/2019-09/schema" => Some(&DRAFT2019),
"json-schema.org/draft-07/schema" => Some(&DRAFT7),
"json-schema.org/draft-06/schema" => Some(&DRAFT6),
"json-schema.org/draft-04/schema" => Some(&DRAFT4),
_ => None,
}
}
fn get_schema(&self) -> Option<SchemaIndex> {
let url = match self.version {
2020 => "https://json-schema.org/draft/2020-12/schema",
2019 => "https://json-schema.org/draft/2019-09/schema",
7 => "http://json-schema.org/draft-07/schema",
6 => "http://json-schema.org/draft-06/schema",
4 => "http://json-schema.org/draft-04/schema",
_ => return None,
};
let up = UrlPtr {
url: Url::parse(url).unwrap_or_else(|_| panic!("{url} should be valid url")),
ptr: "".into(),
};
STD_METASCHEMAS.get_by_loc(&up).map(|s| s.idx)
}
pub(crate) fn validate(&self, up: &UrlPtr, v: &Value) -> Result<(), CompileError> {
let Some(sch) = self.get_schema() else {
return Err(CompileError::Bug(
format!("no metaschema preloaded for draft {}", self.version).into(),
));
};
STD_METASCHEMAS
.validate(v, sch, None)
.map_err(|src| CompileError::ValidationError {
url: up.to_string(),
src: src.clone_static(),
})
}
fn get_id<'a>(&self, obj: &'a Map<String, Value>) -> Option<&'a str> {
if self.version < 2019 && obj.contains_key("$ref") {
return None; // All other properties in a "$ref" object MUST be ignored
}
let Some(Value::String(id)) = obj.get(self.id) else {
return None;
};
let (id, _) = split(id); // ignore fragment
Some(id).filter(|id| !id.is_empty())
}
pub(crate) fn get_vocabs(
&self,
url: &Url,
doc: &Value,
) -> Result<Option<Vec<String>>, CompileError> {
if self.version < 2019 {
return Ok(None);
}
let Value::Object(obj) = doc else {
return Ok(None);
};
let Some(Value::Object(obj)) = obj.get("$vocabulary") else {
return Ok(None);
};
let mut vocabs = vec![];
for (vocab, reqd) in obj {
if let Value::Bool(true) = reqd {
let name = vocab
.strip_prefix(self.vocab_prefix)
.filter(|name| self.all_vocabs.contains(name));
if let Some(name) = name {
vocabs.push(name.to_owned()); // todo: avoid alloc
} else {
return Err(CompileError::UnsupportedVocabulary {
url: url.as_str().to_owned(),
vocabulary: vocab.to_owned(),
});
}
}
}
Ok(Some(vocabs))
}
// collects anchors/dynamic_achors from `sch` into `res`.
// note this does not collect from subschemas in sch.
pub(crate) fn collect_anchors(
&self,
sch: &Value,
sch_ptr: &JsonPointer,
res: &mut Resource,
url: &Url,
) -> Result<(), CompileError> {
let Value::Object(obj) = sch else {
return Ok(());
};
let mut add_anchor = |anchor: Anchor| match res.anchors.entry(anchor) {
Entry::Occupied(entry) => {
if entry.get() == sch_ptr {
// anchor with same root_ptr already exists
return Ok(());
}
Err(CompileError::DuplicateAnchor {
url: url.as_str().to_owned(),
anchor: entry.key().to_string(),
ptr1: entry.get().to_string(),
ptr2: sch_ptr.to_string(),
})
}
entry => {
entry.or_insert(sch_ptr.to_owned());
Ok(())
}
};
if self.version < 2019 {
if obj.contains_key("$ref") {
return Ok(()); // All other properties in a "$ref" object MUST be ignored
}
// anchor is specified in id
if let Some(Value::String(id)) = obj.get(self.id) {
let Ok((_, frag)) = Fragment::split(id) else {
let loc = UrlFrag::format(url, sch_ptr.as_str());
return Err(CompileError::ParseAnchorError { loc });
};
if let Fragment::Anchor(anchor) = frag {
add_anchor(anchor)?;
};
return Ok(());
}
}
if self.version >= 2019 {
if let Some(Value::String(anchor)) = obj.get("$anchor") {
add_anchor(anchor.as_str().into())?;
}
}
if self.version >= 2020 {
if let Some(Value::String(anchor)) = obj.get("$dynamicAnchor") {
add_anchor(anchor.as_str().into())?;
res.dynamic_anchors.insert(anchor.as_str().into());
}
}
Ok(())
}
// error is json-ptr to invalid id
pub(crate) fn collect_resources(
&self,
sch: &Value,
base: &Url, // base of json
sch_ptr: JsonPointer, // ptr of json
url: &Url,
resources: &mut HashMap<JsonPointer, Resource>,
) -> Result<(), CompileError> {
if resources.contains_key(&sch_ptr) {
// resources are already collected
return Ok(());
}
if let Value::Bool(_) = sch {
if sch_ptr.is_empty() {
// root resource
resources.insert(sch_ptr.clone(), Resource::new(sch_ptr, base.clone()));
}
return Ok(());
}
let Value::Object(obj) = sch else {
return Ok(());
};
let mut base = base;
let tmp;
let res = if let Some(id) = self.get_id(obj) {
let Ok(id) = UrlFrag::join(base, id) else {
let loc = UrlFrag::format(url, sch_ptr.as_str());
return Err(CompileError::ParseIdError { loc });
};
tmp = id.url;
base = &tmp;
Some(Resource::new(sch_ptr.clone(), base.clone()))
} else if sch_ptr.is_empty() {
// root resource
Some(Resource::new(sch_ptr.clone(), base.clone()))
} else {
None
};
if let Some(res) = res {
if let Some(dup) = resources.values_mut().find(|res| res.id == *base) {
return Err(CompileError::DuplicateId {
url: url.to_string(),
id: base.to_string(),
ptr1: res.ptr.to_string(),
ptr2: dup.ptr.to_string(),
});
}
resources.insert(sch_ptr.clone(), res);
}
// collect anchors into base resource
if let Some(res) = resources.values_mut().find(|res| res.id == *base) {
self.collect_anchors(sch, &sch_ptr, res, url)?;
} else {
debug_assert!(false, "base resource must exist");
}
for (&kw, &pos) in &self.subschemas {
let Some(v) = obj.get(kw) else {
continue;
};
if pos & POS_SELF != 0 {
let ptr = sch_ptr.append(kw);
self.collect_resources(v, base, ptr, url, resources)?;
}
if pos & POS_ITEM != 0 {
if let Value::Array(arr) = v {
for (i, item) in arr.iter().enumerate() {
let ptr = sch_ptr.append2(kw, &i.to_string());
self.collect_resources(item, base, ptr, url, resources)?;
}
}
}
if pos & POS_PROP != 0 {
if let Value::Object(obj) = v {
for (pname, pvalue) in obj {
let ptr = sch_ptr.append2(kw, pname);
self.collect_resources(pvalue, base, ptr, url, resources)?;
}
}
}
}
Ok(())
}
pub(crate) fn is_subschema(&self, ptr: &str) -> bool {
if ptr.is_empty() {
return true;
}
fn split(mut ptr: &str) -> (&str, &str) {
ptr = &ptr[1..]; // rm `/` prefix
if let Some(i) = ptr.find('/') {
(&ptr[..i], &ptr[i..])
} else {
(ptr, "")
}
}
let (tok, ptr) = split(ptr);
if let Some(&pos) = self.subschemas.get(tok) {
if pos & POS_SELF != 0 && self.is_subschema(ptr) {
return true;
}
if !ptr.is_empty() {
if pos & POS_PROP != 0 {
let (_, ptr) = split(ptr);
if self.is_subschema(ptr) {
return true;
}
}
if pos & POS_ITEM != 0 {
let (tok, ptr) = split(ptr);
if usize::from_str(tok).is_ok() && self.is_subschema(ptr) {
return true;
}
}
}
}
false
}
}
fn load_std_metaschemas() -> Result<Schemas, CompileError> {
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.enable_format_assertions();
compiler.compile("https://json-schema.org/draft/2020-12/schema", &mut schemas)?;
compiler.compile("https://json-schema.org/draft/2019-09/schema", &mut schemas)?;
compiler.compile("http://json-schema.org/draft-07/schema", &mut schemas)?;
compiler.compile("http://json-schema.org/draft-06/schema", &mut schemas)?;
compiler.compile("http://json-schema.org/draft-04/schema", &mut schemas)?;
Ok(schemas)
}
#[cfg(test)]
mod tests {
use crate::{Compiler, Schemas};
use super::*;
#[test]
fn test_meta() {
let mut schemas = Schemas::default();
let mut compiler = Compiler::default();
let v: Value = serde_json::from_str(include_str!("metaschemas/draft-04/schema")).unwrap();
let url = "https://json-schema.org/draft-04/schema";
compiler.add_resource(url, v).unwrap();
compiler.compile(url, &mut schemas).unwrap();
}
#[test]
fn test_from_url() {
let tests = [
("http://json-schema.org/draft/2020-12/schema", Some(2020)), // http url
("https://json-schema.org/draft/2020-12/schema", Some(2020)), // https url
("https://json-schema.org/schema", Some(latest().version)), // latest
("https://json-schema.org/draft-04/schema", Some(4)),
];
for (url, version) in tests {
let got = Draft::from_url(url).map(|d| d.version);
assert_eq!(got, version, "for {url}");
}
}
#[test]
fn test_collect_ids() {
let url = Url::parse("http://a.com/schema.json").unwrap();
let json: Value = serde_json::from_str(
r#"{
"id": "http://a.com/schemas/schema.json",
"definitions": {
"s1": { "id": "http://a.com/definitions/s1" },
"s2": {
"id": "../s2",
"items": [
{ "id": "http://c.com/item" },
{ "id": "http://d.com/item" }
]
},
"s3": {
"definitions": {
"s1": {
"id": "s3",
"items": {
"id": "http://b.com/item"
}
}
}
},
"s4": { "id": "http://e.com/def#abcd" }
}
}"#,
)
.unwrap();
let want = {
let mut m = HashMap::new();
m.insert("", "http://a.com/schemas/schema.json"); // root with id
m.insert("/definitions/s1", "http://a.com/definitions/s1");
m.insert("/definitions/s2", "http://a.com/s2"); // relative id
m.insert("/definitions/s3/definitions/s1", "http://a.com/schemas/s3");
m.insert("/definitions/s3/definitions/s1/items", "http://b.com/item");
m.insert("/definitions/s2/items/0", "http://c.com/item");
m.insert("/definitions/s2/items/1", "http://d.com/item");
m.insert("/definitions/s4", "http://e.com/def"); // id with fragments
m
};
let mut got = HashMap::new();
DRAFT4
.collect_resources(&json, &url, "".into(), &url, &mut got)
.unwrap();
let got = got
.iter()
.map(|(k, v)| (k.as_str(), v.id.as_str()))
.collect::<HashMap<&str, &str>>();
assert_eq!(got, want);
}
#[test]
fn test_collect_anchors() {
let url = Url::parse("http://a.com/schema.json").unwrap();
let json: Value = serde_json::from_str(
r#"{
"$defs": {
"s2": {
"$id": "http://b.com",
"$anchor": "b1",
"items": [
{ "$anchor": "b2" },
{
"$id": "http//c.com",
"items": [
{"$anchor": "c1"},
{"$dynamicAnchor": "c2"}
]
},
{ "$dynamicAnchor": "b3" }
]
}
}
}"#,
)
.unwrap();
let mut resources = HashMap::new();
DRAFT2020
.collect_resources(&json, &url, "".into(), &url, &mut resources)
.unwrap();
assert!(resources.get("").unwrap().anchors.is_empty());
assert_eq!(resources.get("/$defs/s2").unwrap().anchors, {
let mut want = HashMap::new();
want.insert("b1".into(), "/$defs/s2".into());
want.insert("b2".into(), "/$defs/s2/items/0".into());
want.insert("b3".into(), "/$defs/s2/items/2".into());
want
});
assert_eq!(resources.get("/$defs/s2/items/1").unwrap().anchors, {
let mut want = HashMap::new();
want.insert("c1".into(), "/$defs/s2/items/1/items/0".into());
want.insert("c2".into(), "/$defs/s2/items/1/items/1".into());
want
});
}
#[test]
fn test_is_subschema() {
let tests = vec![("/allOf/0", true), ("/allOf/$defs", false)];
for test in tests {
let got = DRAFT2020.is_subschema(test.0);
assert_eq!(got, test.1, "{}", test.0);
}
}
}

197
validator/src/ecma.rs Normal file
View File

@ -0,0 +1,197 @@
use std::borrow::Cow;
use regex_syntax::ast::parse::Parser;
use regex_syntax::ast::{self, *};
// covert ecma regex to rust regex if possible
// see https://262.ecma-international.org/11.0/#sec-regexp-regular-expression-objects
pub(crate) fn convert(pattern: &str) -> Result<Cow<'_, str>, Box<dyn std::error::Error>> {
let mut pattern = Cow::Borrowed(pattern);
let mut ast = loop {
match Parser::new().parse(pattern.as_ref()) {
Ok(ast) => break ast,
Err(e) => {
if let Some(s) = fix_error(&e) {
pattern = Cow::Owned(s);
} else {
Err(e)?;
}
}
}
};
loop {
let translator = Translator {
pat: pattern.as_ref(),
out: None,
};
if let Some(updated_pattern) = ast::visit(&ast, translator)? {
match Parser::new().parse(&updated_pattern) {
Ok(updated_ast) => {
pattern = Cow::Owned(updated_pattern);
ast = updated_ast;
}
Err(e) => {
debug_assert!(
false,
"ecma::translate changed {:?} to {:?}: {e}",
pattern, updated_pattern
);
break;
}
}
} else {
break;
}
}
Ok(pattern)
}
fn fix_error(e: &Error) -> Option<String> {
if let ErrorKind::EscapeUnrecognized = e.kind() {
let (start, end) = (e.span().start.offset, e.span().end.offset);
let s = &e.pattern()[start..end];
if let r"\c" = s {
// handle \c{control_letter}
if let Some(control_letter) = e.pattern()[end..].chars().next() {
if control_letter.is_ascii_alphabetic() {
return Some(format!(
"{}{}{}",
&e.pattern()[..start],
((control_letter as u8) % 32) as char,
&e.pattern()[end + 1..],
));
}
}
}
}
None
}
/**
handles following translations:
- \d should ascii digits only. so replace with [0-9]
- \D should match everything but ascii digits. so replace with [^0-9]
- \w should match ascii letters only. so replace with [a-zA-Z0-9_]
- \W should match everything but ascii letters. so replace with [^a-zA-Z0-9_]
- \s and \S differences
- \a is not an ECMA 262 control escape
*/
struct Translator<'a> {
pat: &'a str,
out: Option<String>,
}
impl Translator<'_> {
fn replace(&mut self, span: &Span, with: &str) {
let (start, end) = (span.start.offset, span.end.offset);
self.out = Some(format!("{}{with}{}", &self.pat[..start], &self.pat[end..]));
}
fn replace_class_class(&mut self, perl: &ClassPerl) {
match perl.kind {
ClassPerlKind::Digit => {
self.replace(&perl.span, if perl.negated { "[^0-9]" } else { "[0-9]" });
}
ClassPerlKind::Word => {
let with = &if perl.negated {
"[^A-Za-z0-9_]"
} else {
"[A-Za-z0-9_]"
};
self.replace(&perl.span, with);
}
ClassPerlKind::Space => {
let with = &if perl.negated {
"[^ \t\n\r\u{000b}\u{000c}\u{00a0}\u{feff}\u{2003}\u{2029}]"
} else {
"[ \t\n\r\u{000b}\u{000c}\u{00a0}\u{feff}\u{2003}\u{2029}]"
};
self.replace(&perl.span, with);
}
}
}
}
impl Visitor for Translator<'_> {
type Output = Option<String>;
type Err = &'static str;
fn finish(self) -> Result<Self::Output, Self::Err> {
Ok(self.out)
}
fn visit_class_set_item_pre(&mut self, ast: &ast::ClassSetItem) -> Result<(), Self::Err> {
if let ClassSetItem::Perl(perl) = ast {
self.replace_class_class(perl);
}
Ok(())
}
fn visit_post(&mut self, ast: &Ast) -> Result<(), Self::Err> {
if self.out.is_some() {
return Ok(());
}
match ast {
Ast::ClassPerl(perl) => {
self.replace_class_class(perl);
}
Ast::Literal(ref literal) => {
if let Literal {
kind: LiteralKind::Special(SpecialLiteralKind::Bell),
..
} = literal.as_ref()
{
return Err("\\a is not an ECMA 262 control escape");
}
}
_ => (),
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ecma_compat_valid() {
// println!("{:#?}", Parser::new().parse(r#"a\a"#));
let tests = [
(r"ab\cAcde\cBfg", "ab\u{1}cde\u{2}fg"), // \c{control_letter}
(r"\\comment", r"\\comment"), // there is no \c
(r"ab\def", r#"ab[0-9]ef"#), // \d
(r"ab[a-z\d]ef", r#"ab[a-z[0-9]]ef"#), // \d inside classSet
(r"ab\Def", r#"ab[^0-9]ef"#), // \d
(r"ab[a-z\D]ef", r#"ab[a-z[^0-9]]ef"#), // \D inside classSet
];
for (input, want) in tests {
match convert(input) {
Ok(got) => {
if got.as_ref() != want {
panic!("convert({input:?}): got: {got:?}, want: {want:?}");
}
}
Err(e) => {
panic!("convert({input:?}) failed: {e}");
}
}
}
}
#[test]
fn test_ecma_compat_invalid() {
// println!("{:#?}", Parser::new().parse(r#"a\a"#));
let tests = [
r"\c\n", // \c{invalid_char}
r"abc\adef", // \a is not valid
];
for input in tests {
if convert(input).is_ok() {
panic!("convert({input:?}) mut fail");
}
}
}
}

838
validator/src/formats.rs Normal file
View File

@ -0,0 +1,838 @@
use std::{
collections::HashMap,
error::Error,
net::{Ipv4Addr, Ipv6Addr},
};
use once_cell::sync::Lazy;
use percent_encoding::percent_decode_str;
use serde_json::Value;
use url::Url;
use crate::ecma;
/// Defines format for `format` keyword.
#[derive(Clone, Copy)]
pub struct Format {
/// Name of the format
pub name: &'static str,
/// validates given value.
pub func: fn(v: &Value) -> Result<(), Box<dyn Error>>,
}
pub(crate) static FORMATS: Lazy<HashMap<&'static str, Format>> = Lazy::new(|| {
let mut m = HashMap::<&'static str, Format>::new();
let mut register = |name, func| m.insert(name, Format { name, func });
register("regex", validate_regex);
register("ipv4", validate_ipv4);
register("ipv6", validate_ipv6);
register("hostname", validate_hostname);
register("idn-hostname", validate_idn_hostname);
register("email", validate_email);
register("idn-email", validate_idn_email);
register("date", validate_date);
register("time", validate_time);
register("date-time", validate_date_time);
register("duration", validate_duration);
register("period", validate_period);
register("json-pointer", validate_json_pointer);
register("relative-json-pointer", validate_relative_json_pointer);
register("uuid", validate_uuid);
register("uri", validate_uri);
register("iri", validate_iri);
register("uri-reference", validate_uri_reference);
register("iri-reference", validate_iri_reference);
register("uri-template", validate_uri_template);
m
});
fn validate_regex(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
ecma::convert(s).map(|_| ())
}
fn validate_ipv4(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
s.parse::<Ipv4Addr>()?;
Ok(())
}
fn validate_ipv6(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
s.parse::<Ipv6Addr>()?;
Ok(())
}
fn validate_date(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_date(s)
}
fn matches_char(s: &str, index: usize, ch: char) -> bool {
s.is_char_boundary(index) && s[index..].starts_with(ch)
}
// see https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
fn check_date(s: &str) -> Result<(), Box<dyn Error>> {
// yyyy-mm-dd
if s.len() != 10 {
Err("must be 10 characters long")?;
}
if !matches_char(s, 4, '-') || !matches_char(s, 7, '-') {
Err("missing hyphen in correct place")?;
}
let mut ymd = s.splitn(3, '-').filter_map(|t| t.parse::<usize>().ok());
let (Some(y), Some(m), Some(d)) = (ymd.next(), ymd.next(), ymd.next()) else {
Err("non-positive year/month/day")?
};
if !matches!(m, 1..=12) {
Err(format!("{m} months in year"))?;
}
if !matches!(d, 1..=31) {
Err(format!("{d} days in month"))?;
}
match m {
2 => {
let mut feb_days = 28;
if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
feb_days += 1; // leap year
};
if d > feb_days {
Err(format!("february has {feb_days} days only"))?;
}
}
4 | 6 | 9 | 11 => {
if d > 30 {
Err("month has 30 days only")?;
}
}
_ => {}
}
Ok(())
}
fn validate_time(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_time(s)
}
fn check_time(mut str: &str) -> Result<(), Box<dyn Error>> {
// min: hh:mm:ssZ
if str.len() < 9 {
Err("less than 9 characters long")?
}
if !matches_char(str, 2, ':') || !matches_char(str, 5, ':') {
Err("missing colon in correct place")?
}
// parse hh:mm:ss
if !str.is_char_boundary(8) {
Err("contains non-ascii char")?
}
let mut hms = (str[..8])
.splitn(3, ':')
.filter_map(|t| t.parse::<usize>().ok());
let (Some(mut h), Some(mut m), Some(s)) = (hms.next(), hms.next(), hms.next()) else {
Err("non-positive hour/min/sec")?
};
if h > 23 || m > 59 || s > 60 {
Err("hour/min/sec out of range")?
}
str = &str[8..];
// parse sec-frac if present
if let Some(rem) = str.strip_prefix('.') {
let n_digits = rem.chars().take_while(char::is_ascii_digit).count();
if n_digits == 0 {
Err("no digits in second fraction")?;
}
str = &rem[n_digits..];
}
if str != "z" && str != "Z" {
// parse time-numoffset
if str.len() != 6 {
Err("offset must be 6 characters long")?;
}
let sign: isize = match str.chars().next() {
Some('+') => -1,
Some('-') => 1,
_ => return Err("offset must begin with plus/minus")?,
};
str = &str[1..];
if !matches_char(str, 2, ':') {
Err("missing colon in offset at correct place")?
}
let mut zhm = str.splitn(2, ':').filter_map(|t| t.parse::<usize>().ok());
let (Some(zh), Some(zm)) = (zhm.next(), zhm.next()) else {
Err("non-positive hour/min in offset")?
};
if zh > 23 || zm > 59 {
Err("hour/min in offset out of range")?
}
// apply timezone
let mut hm = (h * 60 + m) as isize + sign * (zh * 60 + zm) as isize;
if hm < 0 {
hm += 24 * 60;
debug_assert!(hm >= 0);
}
let hm = hm as usize;
(h, m) = (hm / 60, hm % 60);
}
// check leap second
if !(s < 60 || (h == 23 && m == 59)) {
Err("invalid leap second")?
}
Ok(())
}
fn validate_date_time(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_date_time(s)
}
fn check_date_time(s: &str) -> Result<(), Box<dyn Error>> {
// min: yyyy-mm-ddThh:mm:ssZ
if s.len() < 20 {
Err("less than 20 characters long")?;
}
if !s.is_char_boundary(10) || !s[10..].starts_with(['t', 'T']) {
Err("11th character must be t or T")?;
}
if let Err(e) = check_date(&s[..10]) {
Err(format!("invalid date element: {e}"))?;
}
if let Err(e) = check_time(&s[11..]) {
Err(format!("invalid time element: {e}"))?;
}
Ok(())
}
fn validate_duration(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_duration(s)
}
// see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
fn check_duration(s: &str) -> Result<(), Box<dyn Error>> {
// must start with 'P'
let Some(s) = s.strip_prefix('P') else {
Err("must start with P")?
};
if s.is_empty() {
Err("nothing after P")?
}
// dur-week
if let Some(s) = s.strip_suffix('W') {
if s.is_empty() {
Err("no number in week")?
}
if !s.chars().all(|c| c.is_ascii_digit()) {
Err("invalid week")?
}
return Ok(());
}
static UNITS: [&str; 2] = ["YMD", "HMS"];
for (i, s) in s.split('T').enumerate() {
let mut s = s;
if i != 0 && s.is_empty() {
Err("no time elements")?
}
let Some(mut units) = UNITS.get(i).cloned() else {
Err("more than one T")?
};
while !s.is_empty() {
let digit_count = s.chars().take_while(char::is_ascii_digit).count();
if digit_count == 0 {
Err("missing number")?
}
s = &s[digit_count..];
let Some(unit) = s.chars().next() else {
Err("missing unit")?
};
let Some(j) = units.find(unit) else {
if UNITS[i].contains(unit) {
Err(format!("unit {unit} out of order"))?
}
Err(format!("invalid unit {unit}"))?
};
units = &units[j + 1..];
s = &s[1..];
}
}
Ok(())
}
// see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
fn validate_period(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
let Some(slash) = s.find('/') else {
Err("missing slash")?
};
let (start, end) = (&s[..slash], &s[slash + 1..]);
if start.starts_with('P') {
if let Err(e) = check_duration(start) {
Err(format!("invalid start duration: {e}"))?
}
if let Err(e) = check_date_time(end) {
Err(format!("invalid end date-time: {e}"))?
}
} else {
if let Err(e) = check_date_time(start) {
Err(format!("invalid start date-time: {e}"))?
}
if end.starts_with('P') {
if let Err(e) = check_duration(end) {
Err(format!("invalid end duration: {e}"))?;
}
} else if let Err(e) = check_date_time(end) {
Err(format!("invalid end date-time: {e}"))?;
}
}
Ok(())
}
fn validate_hostname(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_hostname(s)
}
// see https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
fn check_hostname(mut s: &str) -> Result<(), Box<dyn Error>> {
// entire hostname (including the delimiting dots but not a trailing dot) has a maximum of 253 ASCII characters
s = s.strip_suffix('.').unwrap_or(s);
if s.len() > 253 {
Err("more than 253 characters long")?
}
// Hostnames are composed of series of labels concatenated with dots, as are all domain names
for label in s.split('.') {
// Each label must be from 1 to 63 characters long
if !matches!(label.len(), 1..=63) {
Err("label must be 1 to 63 characters long")?;
}
// labels must not start or end with a hyphen
if label.starts_with('-') {
Err("label starts with hyphen")?;
}
if label.ends_with('-') {
Err("label ends with hyphen")?;
}
// labels may contain only the ASCII letters 'a' through 'z' (in a case-insensitive manner),
// the digits '0' through '9', and the hyphen ('-')
if let Some(ch) = label
.chars()
.find(|c| !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-'))
{
Err(format!("invalid character {ch:?}"))?;
}
}
Ok(())
}
fn validate_idn_hostname(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_idn_hostname(s)
}
fn check_idn_hostname(s: &str) -> Result<(), Box<dyn Error>> {
let s = idna::domain_to_ascii_strict(s)?;
let unicode = idna::domain_to_unicode(&s).0;
// see https://www.rfc-editor.org/rfc/rfc5892#section-2.6
{
static DISALLOWED: [char; 10] = [
'\u{0640}', // ARABIC TATWEEL
'\u{07FA}', // NKO LAJANYALAN
'\u{302E}', // HANGUL SINGLE DOT TONE MARK
'\u{302F}', // HANGUL DOUBLE DOT TONE MARK
'\u{3031}', // VERTICAL KANA REPEAT MARK
'\u{3032}', // VERTICAL KANA REPEAT WITH VOICED SOUND MARK
'\u{3033}', // VERTICAL KANA REPEAT MARK UPPER HALF
'\u{3034}', // VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HA
'\u{3035}', // VERTICAL KANA REPEAT MARK LOWER HALF
'\u{303B}', // VERTICAL IDEOGRAPHIC ITERATION MARK
];
if unicode.contains(DISALLOWED) {
Err("contains disallowed character")?;
}
}
// unicode string must not contain "--" in 3rd and 4th position
// and must not start and end with a '-'
// see https://www.rfc-editor.org/rfc/rfc5891#section-4.2.3.1
{
let count: usize = unicode
.chars()
.skip(2)
.take(2)
.map(|c| if c == '-' { 1 } else { 0 })
.sum();
if count == 2 {
Err("unicode string must not contain '--' in 3rd and 4th position")?;
}
}
// MIDDLE DOT is allowed between 'l' characters only
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.3
{
let middle_dot = '\u{00b7}';
let mut s = unicode.as_str();
while let Some(i) = s.find(middle_dot) {
let prefix = &s[..i];
let suffix = &s[i + middle_dot.len_utf8()..];
if !prefix.ends_with('l') || !suffix.ends_with('l') {
Err("MIDDLE DOT is allowed between 'l' characters only")?;
}
s = suffix;
}
}
// Greek KERAIA must be followed by Greek character
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.4
{
let keralia = '\u{0375}';
let greek = '\u{0370}'..='\u{03FF}';
let mut s = unicode.as_str();
while let Some(i) = s.find(keralia) {
let suffix = &s[i + keralia.len_utf8()..];
if !suffix.starts_with(|c| greek.contains(&c)) {
Err("Greek KERAIA must be followed by Greek character")?;
}
s = suffix;
}
}
// Hebrew GERESH must be preceded by Hebrew character
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.5
//
// Hebrew GERSHAYIM must be preceded by Hebrew character
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.6
{
let geresh = '\u{05F3}';
let gereshayim = '\u{05F4}';
let hebrew = '\u{0590}'..='\u{05FF}';
for ch in [geresh, gereshayim] {
let mut s = unicode.as_str();
while let Some(i) = s.find(ch) {
let prefix = &s[..i];
let suffix = &s[i + ch.len_utf8()..];
if !prefix.ends_with(|c| hebrew.contains(&c)) {
if i == 0 {
Err("Hebrew GERESH must be preceded by Hebrew character")?;
} else {
Err("Hebrew GERESHYIM must be preceded by Hebrew character")?;
}
}
s = suffix;
}
}
}
// KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.7
{
let katakana_middle_dot = '\u{30FB}';
let hiragana = '\u{3040}'..='\u{309F}';
let katakana = '\u{30A0}'..='\u{30FF}';
let han = '\u{4E00}'..='\u{9FFF}'; // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block): is this range correct??
if unicode.contains(katakana_middle_dot) {
if unicode.contains(|c| hiragana.contains(&c))
|| unicode.contains(|c| c != katakana_middle_dot && katakana.contains(&c))
|| unicode.contains(|c| han.contains(&c))
{
// ok
} else {
Err("KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han")?;
}
}
}
// ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.8
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.9
{
let arabic_indic_digits = '\u{0660}'..='\u{0669}';
let extended_arabic_indic_digits = '\u{06F0}'..='\u{06F9}';
if unicode.contains(|c| arabic_indic_digits.contains(&c))
&& unicode.contains(|c| extended_arabic_indic_digits.contains(&c))
{
Err("ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed")?;
}
}
// ZERO WIDTH JOINER must be preceded by Virama
// see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.2
{
let zero_width_jointer = '\u{200D}';
static VIRAMA: [char; 61] = [
'\u{094D}',
'\u{09CD}',
'\u{0A4D}',
'\u{0ACD}',
'\u{0B4D}',
'\u{0BCD}',
'\u{0C4D}',
'\u{0CCD}',
'\u{0D3B}',
'\u{0D3C}',
'\u{0D4D}',
'\u{0DCA}',
'\u{0E3A}',
'\u{0EBA}',
'\u{0F84}',
'\u{1039}',
'\u{103A}',
'\u{1714}',
'\u{1734}',
'\u{17D2}',
'\u{1A60}',
'\u{1B44}',
'\u{1BAA}',
'\u{1BAB}',
'\u{1BF2}',
'\u{1BF3}',
'\u{2D7F}',
'\u{A806}',
'\u{A82C}',
'\u{A8C4}',
'\u{A953}',
'\u{A9C0}',
'\u{AAF6}',
'\u{ABED}',
'\u{10A3F}',
'\u{11046}',
'\u{1107F}',
'\u{110B9}',
'\u{11133}',
'\u{11134}',
'\u{111C0}',
'\u{11235}',
'\u{112EA}',
'\u{1134D}',
'\u{11442}',
'\u{114C2}',
'\u{115BF}',
'\u{1163F}',
'\u{116B6}',
'\u{1172B}',
'\u{11839}',
'\u{1193D}',
'\u{1193E}',
'\u{119E0}',
'\u{11A34}',
'\u{11A47}',
'\u{11A99}',
'\u{11C3F}',
'\u{11D44}',
'\u{11D45}',
'\u{11D97}',
]; // https://www.compart.com/en/unicode/combining/9
let mut s = unicode.as_str();
while let Some(i) = s.find(zero_width_jointer) {
let prefix = &s[..i];
let suffix = &s[i + zero_width_jointer.len_utf8()..];
if !prefix.ends_with(VIRAMA) {
Err("ZERO WIDTH JOINER must be preceded by Virama")?;
}
s = suffix;
}
}
check_hostname(&s)
}
fn validate_email(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_email(s)
}
// see https://en.wikipedia.org/wiki/Email_address
fn check_email(s: &str) -> Result<(), Box<dyn Error>> {
// entire email address to be no more than 254 characters long
if s.len() > 254 {
Err("more than 254 characters long")?
}
// email address is generally recognized as having two parts joined with an at-sign
let Some(at) = s.rfind('@') else {
Err("missing @")?
};
let (local, domain) = (&s[..at], &s[at + 1..]);
// local part may be up to 64 characters long
if local.len() > 64 {
Err("local part more than 64 characters long")?
}
if local.len() > 1 && local.starts_with('"') && local.ends_with('"') {
// quoted
let local = &local[1..local.len() - 1];
if local.contains(['\\', '"']) {
Err("backslash and quote not allowed within quoted local part")?
}
} else {
// unquoted
if local.starts_with('.') {
Err("starts with dot")?
}
if local.ends_with('.') {
Err("ends with dot")?
}
// consecutive dots not allowed
if local.contains("..") {
Err("consecutive dots")?
}
// check allowd chars
if let Some(ch) = local
.chars()
.find(|c| !(c.is_ascii_alphanumeric() || ".!#$%&'*+-/=?^_`{|}~".contains(*c)))
{
Err(format!("invalid character {ch:?}"))?
}
}
// domain if enclosed in brackets, must match an IP address
if domain.starts_with('[') && domain.ends_with(']') {
let s = &domain[1..domain.len() - 1];
if let Some(s) = s.strip_prefix("IPv6:") {
if let Err(e) = s.parse::<Ipv6Addr>() {
Err(format!("invalid ipv6 address: {e}"))?
}
return Ok(());
}
if let Err(e) = s.parse::<Ipv4Addr>() {
Err(format!("invalid ipv4 address: {e}"))?
}
return Ok(());
}
// domain must match the requirements for a hostname
if let Err(e) = check_hostname(domain) {
Err(format!("invalid domain: {e}"))?
}
Ok(())
}
fn validate_idn_email(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
let Some(at) = s.rfind('@') else {
Err("missing @")?
};
let (local, domain) = (&s[..at], &s[at + 1..]);
let local = idna::domain_to_ascii_strict(local)?;
let domain = idna::domain_to_ascii_strict(domain)?;
if let Err(e) = check_idn_hostname(&domain) {
Err(format!("invalid domain: {e}"))?
}
check_email(&format!("{local}@{domain}"))
}
fn validate_json_pointer(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
check_json_pointer(s)
}
// see https://www.rfc-editor.org/rfc/rfc6901#section-3
fn check_json_pointer(s: &str) -> Result<(), Box<dyn Error>> {
if s.is_empty() {
return Ok(());
}
if !s.starts_with('/') {
Err("not starting with slash")?;
}
for token in s.split('/').skip(1) {
let mut chars = token.chars();
while let Some(ch) = chars.next() {
if ch == '~' {
if !matches!(chars.next(), Some('0' | '1')) {
Err("~ must be followed by 0 or 1")?;
}
} else if !matches!(ch, '\x00'..='\x2E' | '\x30'..='\x7D' | '\x7F'..='\u{10FFFF}') {
Err("contains disallowed character")?;
}
}
}
Ok(())
}
// see https://tools.ietf.org/html/draft-handrews-relative-json-pointer-01#section-3
fn validate_relative_json_pointer(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
// start with non-negative-integer
let num_digits = s.chars().take_while(char::is_ascii_digit).count();
if num_digits == 0 {
Err("must start with non-negative integer")?;
}
if num_digits > 1 && s.starts_with('0') {
Err("starts with zero")?;
}
let s = &s[num_digits..];
// followed by either json-pointer or '#'
if s == "#" {
return Ok(());
}
if let Err(e) = check_json_pointer(s) {
Err(format!("invalid json-pointer element: {e}"))?;
}
Ok(())
}
// see https://datatracker.ietf.org/doc/html/rfc4122#page-4
fn validate_uuid(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
static HEX_GROUPS: [usize; 5] = [8, 4, 4, 4, 12];
let mut i = 0;
for group in s.split('-') {
if i >= HEX_GROUPS.len() {
Err("more than 5 elements")?;
}
if group.len() != HEX_GROUPS[i] {
Err(format!(
"element {} must be {} characters long",
i + 1,
HEX_GROUPS[i]
))?;
}
if let Some(ch) = group.chars().find(|c| !c.is_ascii_hexdigit()) {
Err(format!("non-hex character {ch:?}"))?;
}
i += 1;
}
if i != HEX_GROUPS.len() {
Err("must have 5 elements")?;
}
Ok(())
}
fn validate_uri(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
if fluent_uri::UriRef::parse(s.as_str())?.scheme().is_none() {
Err("relative url")?;
};
Ok(())
}
fn validate_iri(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
match Url::parse(s) {
Ok(_) => Ok(()),
Err(url::ParseError::RelativeUrlWithoutBase) => Err("relative url")?,
Err(e) => Err(e)?,
}
}
static TEMP_URL: Lazy<Url> = Lazy::new(|| Url::parse("http://temp.com").unwrap());
fn parse_uri_reference(s: &str) -> Result<Url, Box<dyn Error>> {
if s.contains('\\') {
Err("contains \\\\")?;
}
Ok(TEMP_URL.join(s)?)
}
fn validate_uri_reference(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
fluent_uri::UriRef::parse(s.as_str())?;
Ok(())
}
fn validate_iri_reference(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
parse_uri_reference(s)?;
Ok(())
}
fn validate_uri_template(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(());
};
let url = parse_uri_reference(s)?;
let path = url.path();
// path we got has curly bases percent encoded
let path = percent_decode_str(path).decode_utf8()?;
// ensure curly brackets are not nested and balanced
for part in path.as_ref().split('/') {
let mut want = true;
for got in part
.chars()
.filter(|c| matches!(c, '{' | '}'))
.map(|c| c == '{')
{
if got != want {
Err("nested curly braces")?;
}
want = !want;
}
if !want {
Err("no matching closing brace")?
}
}
Ok(())
}

724
validator/src/lib.rs Normal file
View File

@ -0,0 +1,724 @@
/*! This crate supports JsonSchema validation for drafts `2020-12`, `2019-09`, `7`, `6` and `4`.
```rust,no_run
# use std::fs::File;
# use std::error::Error;
# use boon::*;
# use serde_json::Value;
# fn main() -> Result<(), Box<dyn Error>>{
let mut schemas = Schemas::new(); // container for compiled schemas
let mut compiler = Compiler::new();
let sch_index = compiler.compile("schema.json", &mut schemas)?;
let instance: Value = serde_json::from_reader(File::open("instance.json")?)?;
let valid = schemas.validate(&instance, sch_index, None).is_ok();
# Ok(())
# }
```
If schema file has no `$schema`, it assumes latest draft.
You can override this:
```rust,no_run
# use boon::*;
# let mut compiler = Compiler::new();
compiler.set_default_draft(Draft::V7);
```
The use of this option is HIGHLY encouraged to ensure continued
correct operation of your schema. The current default value will
not stay the same over time.
# Examples
- [example_from_strings]: loading schemas from Strings
- [example_from_https]: loading schemas from `http(s)`
- [example_custom_format]: registering custom format
- [example_custom_content_encoding]: registering custom contentEncoding
- [example_custom_content_media_type]: registering custom contentMediaType
# Compile Errors
```no_compile
println!("{compile_error}");
println!("{compile_error:#}"); // prints cause if any
```
Using alterate form in display will print cause if any.
This will be useful in cases like [`CompileError::LoadUrlError`],
as it would be useful to know whether the url does not exist or
the resource at url is not a valid json document.
# Validation Errors
[`ValidationError`] may have multiple `causes` resulting
in tree of errors.
`println!("{validation_error}")` prints:
```no_compile
jsonschema validation failed with file:///tmp/customer.json#
at '': missing properties 'age'
at '/billing_address': missing properties 'street_address', 'city', 'state'
```
The alternate form `println!("{validation_error:#}")` prints:
```no_compile
jsonschema validation failed with file:///tmp/customer.json#
[I#] [S#/required] missing properties 'age'
[I#/billing_address] [S#/properties/billing_address/$ref] validation failed with file:///tmp/address.json#
[I#/billing_address] [S#/required] missing properties 'street_address', 'city', 'state'
```
here `I` refers to the instance document and `S` refers to last schema document.
for example:
- after line 1: `S` refers to `file:///tmp/customer.json`
- after line 3: `S` refers to `file://tmp/address.json`
# Output Formats
[`ValidationError`] can be converted into following output formats:
- [flag] `validation_error.flag_output()`
- [basic] `validation_error.basic_output()`
- [detailed] `validation_error.detailed_output()`
The output object implements `serde::Serialize`.
It also implement `Display` to print json:
```no_compile
println!("{output}"); // prints unformatted json
println!("{output:#}"); // prints indented json
```
[example_from_strings]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L22
[example_from_https]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L62
[example_from_yaml_files]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L86
[example_custom_format]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L119
[example_custom_content_encoding]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L153
[example_custom_content_media_type]: https://github.com/santhosh-tekuri/boon/blob/d466730e5e5c7c663bd6739e74e39d1e2f7baae4/tests/examples.rs#L198
[flag]: https://json-schema.org/draft/2020-12/json-schema-core.html#name-flag
[basic]: https://json-schema.org/draft/2020-12/json-schema-core.html#name-basic
[detailed]: https://json-schema.org/draft/2020-12/json-schema-core.html#name-detailed
*/
mod compiler;
mod content;
mod draft;
mod ecma;
mod formats;
mod loader;
mod output;
mod root;
mod roots;
mod util;
mod validator;
#[cfg(not(target_arch = "wasm32"))]
pub use loader::FileLoader;
pub use {
compiler::{CompileError, Compiler, Draft},
content::{Decoder, MediaType},
formats::Format,
loader::{SchemeUrlLoader, UrlLoader},
output::{
AbsoluteKeywordLocation, FlagOutput, KeywordPath, OutputError, OutputUnit, SchemaToken,
},
validator::{InstanceLocation, InstanceToken},
};
use std::{borrow::Cow, collections::HashMap, error::Error, fmt::Display};
use ahash::AHashMap;
use regex::Regex;
use serde_json::{Number, Value};
use util::*;
/// Options for validation process
#[derive(Default, Debug, Clone, Copy)]
pub struct ValidationOptions {
/// treat unevaluated properties as an error
pub be_strict: bool,
}
/// Identifier to compiled schema.
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct SchemaIndex(usize);
/// Collection of compiled schemas.
#[derive(Default)]
pub struct Schemas {
list: Vec<Schema>,
map: HashMap<UrlPtr, usize>, // loc => schema-index
}
impl Schemas {
pub fn new() -> Self {
Self::default()
}
fn insert(&mut self, locs: Vec<UrlPtr>, compiled: Vec<Schema>) {
for (up, sch) in locs.into_iter().zip(compiled.into_iter()) {
let i = self.list.len();
self.list.push(sch);
self.map.insert(up, i);
}
}
fn get(&self, idx: SchemaIndex) -> &Schema {
&self.list[idx.0] // todo: return bug
}
fn get_by_loc(&self, up: &UrlPtr) -> Option<&Schema> {
self.map.get(up).and_then(|&i| self.list.get(i))
}
/// Returns true if `sch_index` is generated for this instance.
pub fn contains(&self, sch_index: SchemaIndex) -> bool {
self.list.get(sch_index.0).is_some()
}
pub fn size(&self) -> usize {
self.list.len()
}
/**
Validates `v` with schema identified by `sch_index`
# Panics
Panics if `sch_index` is not generated for this instance.
[`Schemas::contains`] can be used too ensure that it does not panic.
*/
pub fn validate<'s, 'v>(
&'s self,
v: &'v Value,
sch_index: SchemaIndex,
options: Option<ValidationOptions>,
) -> Result<(), ValidationError<'s, 'v>> {
let Some(sch) = self.list.get(sch_index.0) else {
panic!("Schemas::validate: schema index out of bounds");
};
validator::validate(v, sch, self, options)
}
}
#[derive(Default)]
struct Schema {
draft_version: usize,
idx: SchemaIndex,
loc: String,
resource: SchemaIndex,
dynamic_anchors: HashMap<String, SchemaIndex>,
all_props_evaluated: bool,
all_items_evaluated: bool,
num_items_evaluated: usize,
// type agnostic --
boolean: Option<bool>, // boolean schema
ref_: Option<SchemaIndex>,
recursive_ref: Option<SchemaIndex>,
recursive_anchor: bool,
dynamic_ref: Option<DynamicRef>,
dynamic_anchor: Option<String>,
types: Types,
enum_: Option<Enum>,
constant: Option<Value>,
not: Option<SchemaIndex>,
all_of: Vec<SchemaIndex>,
any_of: Vec<SchemaIndex>,
one_of: Vec<SchemaIndex>,
if_: Option<SchemaIndex>,
then: Option<SchemaIndex>,
else_: Option<SchemaIndex>,
format: Option<Format>,
// object --
min_properties: Option<usize>,
max_properties: Option<usize>,
required: Vec<String>,
properties: AHashMap<String, SchemaIndex>,
pattern_properties: Vec<(Regex, SchemaIndex)>,
property_names: Option<SchemaIndex>,
additional_properties: Option<Additional>,
dependent_required: Vec<(String, Vec<String>)>,
dependent_schemas: Vec<(String, SchemaIndex)>,
dependencies: Vec<(String, Dependency)>,
unevaluated_properties: Option<SchemaIndex>,
// array --
min_items: Option<usize>,
max_items: Option<usize>,
unique_items: bool,
min_contains: Option<usize>,
max_contains: Option<usize>,
contains: Option<SchemaIndex>,
items: Option<Items>,
additional_items: Option<Additional>,
prefix_items: Vec<SchemaIndex>,
items2020: Option<SchemaIndex>,
unevaluated_items: Option<SchemaIndex>,
// string --
min_length: Option<usize>,
max_length: Option<usize>,
pattern: Option<Regex>,
content_encoding: Option<Decoder>,
content_media_type: Option<MediaType>,
content_schema: Option<SchemaIndex>,
// number --
minimum: Option<Number>,
maximum: Option<Number>,
exclusive_minimum: Option<Number>,
exclusive_maximum: Option<Number>,
multiple_of: Option<Number>,
}
#[derive(Debug)]
struct Enum {
/// types that occur in enum
types: Types,
/// values in enum
values: Vec<Value>,
}
#[derive(Debug)]
enum Items {
SchemaRef(SchemaIndex),
SchemaRefs(Vec<SchemaIndex>),
}
#[derive(Debug)]
enum Additional {
Bool(bool),
SchemaRef(SchemaIndex),
}
#[derive(Debug)]
enum Dependency {
Props(Vec<String>),
SchemaRef(SchemaIndex),
}
struct DynamicRef {
sch: SchemaIndex,
anchor: Option<String>,
}
impl Schema {
fn new(loc: String) -> Self {
Self {
loc,
..Default::default()
}
}
}
/// JSON data types for JSONSchema
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum Type {
Null = 1,
Boolean = 2,
Number = 4,
Integer = 8,
String = 16,
Array = 32,
Object = 64,
}
impl Type {
fn of(v: &Value) -> Self {
match v {
Value::Null => Type::Null,
Value::Bool(_) => Type::Boolean,
Value::Number(_) => Type::Number,
Value::String(_) => Type::String,
Value::Array(_) => Type::Array,
Value::Object(_) => Type::Object,
}
}
fn from_str(value: &str) -> Option<Self> {
match value {
"null" => Some(Self::Null),
"boolean" => Some(Self::Boolean),
"number" => Some(Self::Number),
"integer" => Some(Self::Integer),
"string" => Some(Self::String),
"array" => Some(Self::Array),
"object" => Some(Self::Object),
_ => None,
}
}
fn primitive(v: &Value) -> bool {
!matches!(Self::of(v), Self::Array | Self::Object)
}
}
impl Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Type::Null => write!(f, "null"),
Type::Boolean => write!(f, "boolean"),
Type::Number => write!(f, "number"),
Type::Integer => write!(f, "integer"),
Type::String => write!(f, "string"),
Type::Array => write!(f, "array"),
Type::Object => write!(f, "object"),
}
}
}
/// Set of [`Type`]s
#[derive(Debug, Default, Clone, Copy)]
pub struct Types(u8);
impl Types {
fn is_empty(self) -> bool {
self.0 == 0
}
fn add(&mut self, t: Type) {
self.0 |= t as u8;
}
/// Returns `true` if this set contains given type.
pub fn contains(&self, t: Type) -> bool {
self.0 & t as u8 != 0
}
/// Returns an iterator over types.
pub fn iter(&self) -> impl Iterator<Item = Type> + '_ {
static TYPES: [Type; 7] = [
Type::Null,
Type::Boolean,
Type::Number,
Type::Integer,
Type::String,
Type::Array,
Type::Object,
];
TYPES.iter().cloned().filter(|t| self.contains(*t))
}
}
impl FromIterator<Type> for Types {
fn from_iter<T: IntoIterator<Item = Type>>(iter: T) -> Self {
let mut types = Types::default();
for t in iter {
types.add(t);
}
types
}
}
/// Error type for validation failures.
#[derive(Debug)]
pub struct ValidationError<'s, 'v> {
/// The absolute, dereferenced schema location.
pub schema_url: &'s str,
/// The location of the JSON value within the instance being validated
pub instance_location: InstanceLocation<'v>,
/// kind of error
pub kind: ErrorKind<'s, 'v>,
/// Holds nested errors
pub causes: Vec<ValidationError<'s, 'v>>,
}
impl Error for ValidationError<'_, '_> {}
/// A list specifying general categories of validation errors.
#[derive(Debug)]
pub enum ErrorKind<'s, 'v> {
Group,
Schema {
url: &'s str,
},
ContentSchema,
PropertyName {
prop: String,
},
Reference {
kw: &'static str,
url: &'s str,
},
RefCycle {
url: &'s str,
kw_loc1: String,
kw_loc2: String,
},
FalseSchema,
Type {
got: Type,
want: Types,
},
Enum {
want: &'s Vec<Value>,
},
Const {
want: &'s Value,
},
Format {
got: Cow<'v, Value>,
want: &'static str,
err: Box<dyn Error>,
},
MinProperties {
got: usize,
want: usize,
},
MaxProperties {
got: usize,
want: usize,
},
AdditionalProperties {
got: Vec<Cow<'v, str>>,
},
Required {
want: Vec<&'s str>,
},
Dependency {
/// dependency of prop that failed.
prop: &'s str,
/// missing props.
missing: Vec<&'s str>,
},
DependentRequired {
/// dependency of prop that failed.
prop: &'s str,
/// missing props.
missing: Vec<&'s str>,
},
MinItems {
got: usize,
want: usize,
},
MaxItems {
got: usize,
want: usize,
},
Contains,
MinContains {
got: Vec<usize>,
want: usize,
},
MaxContains {
got: Vec<usize>,
want: usize,
},
UniqueItems {
got: [usize; 2],
},
AdditionalItems {
got: usize,
},
MinLength {
got: usize,
want: usize,
},
MaxLength {
got: usize,
want: usize,
},
Pattern {
got: Cow<'v, str>,
want: &'s str,
},
ContentEncoding {
want: &'static str,
err: Box<dyn Error>,
},
ContentMediaType {
got: Vec<u8>,
want: &'static str,
err: Box<dyn Error>,
},
Minimum {
got: Cow<'v, Number>,
want: &'s Number,
},
Maximum {
got: Cow<'v, Number>,
want: &'s Number,
},
ExclusiveMinimum {
got: Cow<'v, Number>,
want: &'s Number,
},
ExclusiveMaximum {
got: Cow<'v, Number>,
want: &'s Number,
},
MultipleOf {
got: Cow<'v, Number>,
want: &'s Number,
},
Not,
/// none of the subschemas matched
AllOf,
/// none of the subschemas matched.
AnyOf,
/// - `None`: none of the schemas matched.
/// - Some(i, j): subschemas at i, j matched
OneOf(Option<(usize, usize)>),
}
impl Display for ErrorKind<'_, '_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Group => write!(f, "validation failed"),
Self::Schema { url } => write!(f, "validation failed with {url}"),
Self::ContentSchema => write!(f, "contentSchema failed"),
Self::PropertyName { prop } => write!(f, "invalid property {}", quote(prop)),
Self::Reference { .. } => {
write!(f, "validation failed")
}
Self::RefCycle {
url,
kw_loc1,
kw_loc2,
} => write!(
f,
"both {} and {} resolve to {url} causing reference cycle",
quote(&kw_loc1.to_string()),
quote(&kw_loc2.to_string())
),
Self::FalseSchema => write!(f, "false schema"),
Self::Type { got, want } => {
// todo: why join not working for Type struct ??
let want = join_iter(want.iter(), " or ");
write!(f, "want {want}, but got {got}",)
}
Self::Enum { want } => {
if want.iter().all(Type::primitive) {
if want.len() == 1 {
write!(f, "value must be ")?;
display(f, &want[0])
} else {
let want = join_iter(want.iter().map(string), ", ");
write!(f, "value must be one of {want}")
}
} else {
write!(f, "enum failed")
}
}
Self::Const { want } => {
if Type::primitive(want) {
write!(f, "value must be ")?;
display(f, want)
} else {
write!(f, "const failed")
}
}
Self::Format { got, want, err } => {
display(f, got)?;
write!(f, " is not valid {want}: {err}")
}
Self::MinProperties { got, want } => write!(
f,
"minimum {want} properties required, but got {got} properties"
),
Self::MaxProperties { got, want } => write!(
f,
"maximum {want} properties required, but got {got} properties"
),
Self::AdditionalProperties { got } => {
write!(
f,
"additionalProperties {} not allowed",
join_iter(got.iter().map(quote), ", ")
)
}
Self::Required { want } => write!(
f,
"missing properties {}",
join_iter(want.iter().map(quote), ", ")
),
Self::Dependency { prop, missing } => {
write!(
f,
"properties {} required, if {} property exists",
join_iter(missing.iter().map(quote), ", "),
quote(prop)
)
}
Self::DependentRequired { prop, missing } => write!(
f,
"properties {} required, if {} property exists",
join_iter(missing.iter().map(quote), ", "),
quote(prop)
),
Self::MinItems { got, want } => {
write!(f, "minimum {want} items required, but got {got} items")
}
Self::MaxItems { got, want } => {
write!(f, "maximum {want} items required, but got {got} items")
}
Self::MinContains { got, want } => {
if got.is_empty() {
write!(
f,
"minimum {want} items required to match contains schema, but found none",
)
} else {
write!(
f,
"minimum {want} items required to match contains schema, but found {} items at {}",
got.len(),
join_iter(got, ", ")
)
}
}
Self::Contains => write!(f, "no items match contains schema"),
Self::MaxContains { got, want } => {
write!(
f,
"maximum {want} items required to match contains schema, but found {} items at {}",
got.len(),
join_iter(got, ", ")
)
}
Self::UniqueItems { got: [i, j] } => write!(f, "items at {i} and {j} are equal"),
Self::AdditionalItems { got } => write!(f, "last {got} additionalItems not allowed"),
Self::MinLength { got, want } => write!(f, "length must be >={want}, but got {got}"),
Self::MaxLength { got, want } => write!(f, "length must be <={want}, but got {got}"),
Self::Pattern { got, want } => {
write!(f, "{} does not match pattern {}", quote(got), quote(want))
}
Self::ContentEncoding { want, err } => {
write!(f, "value is not {} encoded: {err}", quote(want))
}
Self::ContentMediaType { want, err, .. } => {
write!(f, "value is not of mediatype {}: {err}", quote(want))
}
Self::Minimum { got, want } => write!(f, "must be >={want}, but got {got}"),
Self::Maximum { got, want } => write!(f, "must be <={want}, but got {got}"),
Self::ExclusiveMinimum { got, want } => write!(f, "must be > {want} but got {got}"),
Self::ExclusiveMaximum { got, want } => write!(f, "must be < {want} but got {got}"),
Self::MultipleOf { got, want } => write!(f, "{got} is not multipleOf {want}"),
Self::Not => write!(f, "not failed"),
Self::AllOf => write!(f, "allOf failed",),
Self::AnyOf => write!(f, "anyOf failed"),
Self::OneOf(None) => write!(f, "oneOf failed, none matched"),
Self::OneOf(Some((i, j))) => write!(f, "oneOf failed, subschemas {i}, {j} matched"),
}
}
}
fn display(f: &mut std::fmt::Formatter, v: &Value) -> std::fmt::Result {
match v {
Value::String(s) => write!(f, "{}", quote(s)),
Value::Array(_) | Value::Object(_) => write!(f, "value"),
_ => write!(f, "{v}"),
}
}
fn string(primitive: &Value) -> String {
if let Value::String(s) = primitive {
quote(s)
} else {
format!("{primitive}")
}
}

243
validator/src/loader.rs Normal file
View File

@ -0,0 +1,243 @@
use std::{
cell::RefCell,
collections::{HashMap, HashSet},
error::Error,
};
#[cfg(not(target_arch = "wasm32"))]
use std::fs::File;
use appendlist::AppendList;
use once_cell::sync::Lazy;
use serde_json::Value;
use url::Url;
use crate::{
compiler::CompileError,
draft::{latest, Draft},
util::split,
UrlPtr,
};
/// A trait for loading json from given `url`
pub trait UrlLoader {
/// Loads json from given absolute `url`.
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>>;
}
// --
#[cfg(not(target_arch = "wasm32"))]
pub struct FileLoader;
#[cfg(not(target_arch = "wasm32"))]
impl UrlLoader for FileLoader {
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>> {
let url = Url::parse(url)?;
let path = url.to_file_path().map_err(|_| "invalid file path")?;
let file = File::open(path)?;
Ok(serde_json::from_reader(file)?)
}
}
// --
#[derive(Default)]
pub struct SchemeUrlLoader {
loaders: HashMap<&'static str, Box<dyn UrlLoader>>,
}
impl SchemeUrlLoader {
pub fn new() -> Self {
Self::default()
}
/// Registers [`UrlLoader`] for given url `scheme`
pub fn register(&mut self, scheme: &'static str, url_loader: Box<dyn UrlLoader>) {
self.loaders.insert(scheme, url_loader);
}
}
impl UrlLoader for SchemeUrlLoader {
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>> {
let url = Url::parse(url)?;
let Some(loader) = self.loaders.get(url.scheme()) else {
return Err(CompileError::UnsupportedUrlScheme {
url: url.as_str().to_owned(),
}
.into());
};
loader.load(url.as_str())
}
}
// --
pub(crate) struct DefaultUrlLoader {
doc_map: RefCell<HashMap<Url, usize>>,
doc_list: AppendList<Value>,
loader: Box<dyn UrlLoader>,
}
impl DefaultUrlLoader {
#[cfg_attr(target_arch = "wasm32", allow(unused_mut))]
pub fn new() -> Self {
let mut loader = SchemeUrlLoader::new();
#[cfg(not(target_arch = "wasm32"))]
loader.register("file", Box::new(FileLoader));
Self {
doc_map: Default::default(),
doc_list: AppendList::new(),
loader: Box::new(loader),
}
}
pub fn get_doc(&self, url: &Url) -> Option<&Value> {
self.doc_map
.borrow()
.get(url)
.and_then(|i| self.doc_list.get(*i))
}
pub fn add_doc(&self, url: Url, json: Value) {
if self.get_doc(&url).is_some() {
return;
}
self.doc_list.push(json);
self.doc_map
.borrow_mut()
.insert(url, self.doc_list.len() - 1);
}
pub fn use_loader(&mut self, loader: Box<dyn UrlLoader>) {
self.loader = loader;
}
pub(crate) fn load(&self, url: &Url) -> Result<&Value, CompileError> {
if let Some(doc) = self.get_doc(url) {
return Ok(doc);
}
// check in STD_METAFILES
let doc = if let Some(content) = load_std_meta(url.as_str()) {
serde_json::from_str::<Value>(content).map_err(|e| CompileError::LoadUrlError {
url: url.to_string(),
src: e.into(),
})?
} else {
self.loader
.load(url.as_str())
.map_err(|src| CompileError::LoadUrlError {
url: url.as_str().to_owned(),
src,
})?
};
self.add_doc(url.clone(), doc);
self.get_doc(url)
.ok_or(CompileError::Bug("doc must exist".into()))
}
pub(crate) fn get_draft(
&self,
up: &UrlPtr,
doc: &Value,
default_draft: &'static Draft,
mut cycle: HashSet<Url>,
) -> Result<&'static Draft, CompileError> {
let Value::Object(obj) = &doc else {
return Ok(default_draft);
};
let Some(Value::String(sch)) = obj.get("$schema") else {
return Ok(default_draft);
};
if let Some(draft) = Draft::from_url(sch) {
return Ok(draft);
}
let (sch, _) = split(sch);
let sch = Url::parse(sch).map_err(|e| CompileError::InvalidMetaSchemaUrl {
url: up.to_string(),
src: e.into(),
})?;
if up.ptr.is_empty() && sch == up.url {
return Err(CompileError::UnsupportedDraft { url: sch.into() });
}
if !cycle.insert(sch.clone()) {
return Err(CompileError::MetaSchemaCycle { url: sch.into() });
}
let doc = self.load(&sch)?;
let up = UrlPtr {
url: sch,
ptr: "".into(),
};
self.get_draft(&up, doc, default_draft, cycle)
}
pub(crate) fn get_meta_vocabs(
&self,
doc: &Value,
draft: &'static Draft,
) -> Result<Option<Vec<String>>, CompileError> {
let Value::Object(obj) = &doc else {
return Ok(None);
};
let Some(Value::String(sch)) = obj.get("$schema") else {
return Ok(None);
};
if Draft::from_url(sch).is_some() {
return Ok(None);
}
let (sch, _) = split(sch);
let sch = Url::parse(sch).map_err(|e| CompileError::ParseUrlError {
url: sch.to_string(),
src: e.into(),
})?;
let doc = self.load(&sch)?;
draft.get_vocabs(&sch, doc)
}
}
pub(crate) static STD_METAFILES: Lazy<HashMap<String, &str>> = Lazy::new(|| {
let mut files = HashMap::new();
macro_rules! add {
($path:expr) => {
files.insert(
$path["metaschemas/".len()..].to_owned(),
include_str!($path),
);
};
}
add!("metaschemas/draft-04/schema");
add!("metaschemas/draft-06/schema");
add!("metaschemas/draft-07/schema");
add!("metaschemas/draft/2019-09/schema");
add!("metaschemas/draft/2019-09/meta/core");
add!("metaschemas/draft/2019-09/meta/applicator");
add!("metaschemas/draft/2019-09/meta/validation");
add!("metaschemas/draft/2019-09/meta/meta-data");
add!("metaschemas/draft/2019-09/meta/format");
add!("metaschemas/draft/2019-09/meta/content");
add!("metaschemas/draft/2020-12/schema");
add!("metaschemas/draft/2020-12/meta/core");
add!("metaschemas/draft/2020-12/meta/applicator");
add!("metaschemas/draft/2020-12/meta/unevaluated");
add!("metaschemas/draft/2020-12/meta/validation");
add!("metaschemas/draft/2020-12/meta/meta-data");
add!("metaschemas/draft/2020-12/meta/content");
add!("metaschemas/draft/2020-12/meta/format-annotation");
add!("metaschemas/draft/2020-12/meta/format-assertion");
files
});
fn load_std_meta(url: &str) -> Option<&'static str> {
let meta = url
.strip_prefix("http://json-schema.org/")
.or_else(|| url.strip_prefix("https://json-schema.org/"));
if let Some(meta) = meta {
if meta == "schema" {
return load_std_meta(latest().url);
}
return STD_METAFILES.get(meta).cloned();
}
None
}

View File

@ -0,0 +1,151 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"positiveInteger": {
"type": "integer",
"minimum": 0
},
"positiveIntegerDefault0": {
"allOf": [ { "$ref": "#/definitions/positiveInteger" }, { "default": 0 } ]
},
"simpleTypes": {
"enum": [ "array", "boolean", "integer", "null", "number", "object", "string" ]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"minItems": 1,
"uniqueItems": true
}
},
"type": "object",
"properties": {
"id": {
"type": "string",
"format": "uriref"
},
"$schema": {
"type": "string",
"format": "uri"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": {},
"multipleOf": {
"type": "number",
"minimum": 0,
"exclusiveMinimum": true
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "boolean",
"default": false
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "boolean",
"default": false
},
"maxLength": { "$ref": "#/definitions/positiveInteger" },
"minLength": { "$ref": "#/definitions/positiveIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": {
"anyOf": [
{ "type": "boolean" },
{ "$ref": "#" }
],
"default": {}
},
"items": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/schemaArray" }
],
"default": {}
},
"maxItems": { "$ref": "#/definitions/positiveInteger" },
"minItems": { "$ref": "#/definitions/positiveIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"maxProperties": { "$ref": "#/definitions/positiveInteger" },
"minProperties": { "$ref": "#/definitions/positiveIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": {
"anyOf": [
{ "type": "boolean" },
{ "$ref": "#" }
],
"default": {}
},
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/stringArray" }
]
}
},
"enum": {
"type": "array",
"minItems": 1,
"uniqueItems": true
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" },
"format": { "type": "string" },
"$ref": { "type": "string" }
},
"dependencies": {
"exclusiveMaximum": [ "maximum" ],
"exclusiveMinimum": [ "minimum" ]
},
"default": {}
}

View File

@ -0,0 +1,151 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"$id": "http://json-schema.org/draft-06/schema#",
"title": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"allOf": [
{ "$ref": "#/definitions/nonNegativeInteger" },
{ "default": 0 }
]
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
},
"type": ["object", "boolean"],
"properties": {
"$id": {
"type": "string",
"format": "uri-reference"
},
"$schema": {
"type": "string",
"format": "uri"
},
"$ref": {
"type": "string",
"format": "uri-reference"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": {},
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/definitions/nonNegativeInteger" },
"minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": { "$ref": "#" },
"items": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/schemaArray" }
],
"default": {}
},
"maxItems": { "$ref": "#/definitions/nonNegativeInteger" },
"minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"contains": { "$ref": "#" },
"maxProperties": { "$ref": "#/definitions/nonNegativeInteger" },
"minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": { "$ref": "#" },
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"regexProperties": true,
"additionalProperties": { "$ref": "#" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/stringArray" }
]
}
},
"propertyNames": { "$ref": "#" },
"const": {},
"enum": {
"type": "array",
"minItems": 1,
"uniqueItems": true
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"format": { "type": "string" },
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" }
},
"default": {}
}

View File

@ -0,0 +1,172 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://json-schema.org/draft-07/schema#",
"title": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"allOf": [
{ "$ref": "#/definitions/nonNegativeInteger" },
{ "default": 0 }
]
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
},
"type": ["object", "boolean"],
"properties": {
"$id": {
"type": "string",
"format": "uri-reference"
},
"$schema": {
"type": "string",
"format": "uri"
},
"$ref": {
"type": "string",
"format": "uri-reference"
},
"$comment": {
"type": "string"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": true,
"readOnly": {
"type": "boolean",
"default": false
},
"writeOnly": {
"type": "boolean",
"default": false
},
"examples": {
"type": "array",
"items": true
},
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/definitions/nonNegativeInteger" },
"minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": { "$ref": "#" },
"items": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/schemaArray" }
],
"default": true
},
"maxItems": { "$ref": "#/definitions/nonNegativeInteger" },
"minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"contains": { "$ref": "#" },
"maxProperties": { "$ref": "#/definitions/nonNegativeInteger" },
"minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": { "$ref": "#" },
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"propertyNames": { "format": "regex" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/stringArray" }
]
}
},
"propertyNames": { "$ref": "#" },
"const": true,
"enum": {
"type": "array",
"items": true,
"minItems": 1,
"uniqueItems": true
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"format": { "type": "string" },
"contentMediaType": { "type": "string" },
"contentEncoding": { "type": "string" },
"if": { "$ref": "#" },
"then": { "$ref": "#" },
"else": { "$ref": "#" },
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" }
},
"default": true
}

View File

@ -0,0 +1,55 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/applicator",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/applicator": true
},
"$recursiveAnchor": true,
"title": "Applicator vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"additionalItems": { "$recursiveRef": "#" },
"unevaluatedItems": { "$recursiveRef": "#" },
"items": {
"anyOf": [
{ "$recursiveRef": "#" },
{ "$ref": "#/$defs/schemaArray" }
]
},
"contains": { "$recursiveRef": "#" },
"additionalProperties": { "$recursiveRef": "#" },
"unevaluatedProperties": { "$recursiveRef": "#" },
"properties": {
"type": "object",
"additionalProperties": { "$recursiveRef": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$recursiveRef": "#" },
"propertyNames": { "format": "regex" },
"default": {}
},
"dependentSchemas": {
"type": "object",
"additionalProperties": {
"$recursiveRef": "#"
}
},
"propertyNames": { "$recursiveRef": "#" },
"if": { "$recursiveRef": "#" },
"then": { "$recursiveRef": "#" },
"else": { "$recursiveRef": "#" },
"allOf": { "$ref": "#/$defs/schemaArray" },
"anyOf": { "$ref": "#/$defs/schemaArray" },
"oneOf": { "$ref": "#/$defs/schemaArray" },
"not": { "$recursiveRef": "#" }
},
"$defs": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$recursiveRef": "#" }
}
}
}

View File

@ -0,0 +1,15 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/content",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/content": true
},
"$recursiveAnchor": true,
"title": "Content vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"contentMediaType": { "type": "string" },
"contentEncoding": { "type": "string" },
"contentSchema": { "$recursiveRef": "#" }
}
}

View File

@ -0,0 +1,56 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/core",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/core": true
},
"$recursiveAnchor": true,
"title": "Core vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"$id": {
"type": "string",
"format": "uri-reference",
"$comment": "Non-empty fragments not allowed.",
"pattern": "^[^#]*#?$"
},
"$schema": {
"type": "string",
"format": "uri"
},
"$anchor": {
"type": "string",
"pattern": "^[A-Za-z][-A-Za-z0-9.:_]*$"
},
"$ref": {
"type": "string",
"format": "uri-reference"
},
"$recursiveRef": {
"type": "string",
"format": "uri-reference"
},
"$recursiveAnchor": {
"type": "boolean",
"default": false
},
"$vocabulary": {
"type": "object",
"propertyNames": {
"type": "string",
"format": "uri"
},
"additionalProperties": {
"type": "boolean"
}
},
"$comment": {
"type": "string"
},
"$defs": {
"type": "object",
"additionalProperties": { "$recursiveRef": "#" },
"default": {}
}
}
}

View File

@ -0,0 +1,13 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/format",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/format": true
},
"$recursiveAnchor": true,
"title": "Format vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"format": { "type": "string" }
}
}

View File

@ -0,0 +1,35 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/meta-data",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/meta-data": true
},
"$recursiveAnchor": true,
"title": "Meta-data vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": true,
"deprecated": {
"type": "boolean",
"default": false
},
"readOnly": {
"type": "boolean",
"default": false
},
"writeOnly": {
"type": "boolean",
"default": false
},
"examples": {
"type": "array",
"items": true
}
}
}

View File

@ -0,0 +1,97 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/meta/validation",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/validation": true
},
"$recursiveAnchor": true,
"title": "Validation vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/$defs/nonNegativeInteger" },
"minLength": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"maxItems": { "$ref": "#/$defs/nonNegativeInteger" },
"minItems": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"maxContains": { "$ref": "#/$defs/nonNegativeInteger" },
"minContains": {
"$ref": "#/$defs/nonNegativeInteger",
"default": 1
},
"maxProperties": { "$ref": "#/$defs/nonNegativeInteger" },
"minProperties": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/$defs/stringArray" },
"dependentRequired": {
"type": "object",
"additionalProperties": {
"$ref": "#/$defs/stringArray"
}
},
"const": true,
"enum": {
"type": "array",
"items": true
},
"type": {
"anyOf": [
{ "$ref": "#/$defs/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/$defs/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
}
},
"$defs": {
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"$ref": "#/$defs/nonNegativeInteger",
"default": 0
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
}
}

View File

@ -0,0 +1,41 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/schema",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/core": true,
"https://json-schema.org/draft/2019-09/vocab/applicator": true,
"https://json-schema.org/draft/2019-09/vocab/validation": true,
"https://json-schema.org/draft/2019-09/vocab/meta-data": true,
"https://json-schema.org/draft/2019-09/vocab/format": false,
"https://json-schema.org/draft/2019-09/vocab/content": true
},
"$recursiveAnchor": true,
"title": "Core and Validation specifications meta-schema",
"allOf": [
{"$ref": "meta/core"},
{"$ref": "meta/applicator"},
{"$ref": "meta/validation"},
{"$ref": "meta/meta-data"},
{"$ref": "meta/format"},
{"$ref": "meta/content"}
],
"type": ["object", "boolean"],
"properties": {
"definitions": {
"$comment": "While no longer an official keyword as it is replaced by $defs, this keyword is retained in the meta-schema to prevent incompatible extensions as it remains in common use.",
"type": "object",
"additionalProperties": { "$recursiveRef": "#" },
"default": {}
},
"dependencies": {
"$comment": "\"dependencies\" is no longer a keyword, but schema authors should avoid redefining it to facilitate a smooth transition to \"dependentSchemas\" and \"dependentRequired\"",
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$recursiveRef": "#" },
{ "$ref": "meta/validation#/$defs/stringArray" }
]
}
}
}
}

View File

@ -0,0 +1,47 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/applicator",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/applicator": true
},
"$dynamicAnchor": "meta",
"title": "Applicator vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"prefixItems": { "$ref": "#/$defs/schemaArray" },
"items": { "$dynamicRef": "#meta" },
"contains": { "$dynamicRef": "#meta" },
"additionalProperties": { "$dynamicRef": "#meta" },
"properties": {
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" },
"propertyNames": { "format": "regex" },
"default": {}
},
"dependentSchemas": {
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" },
"default": {}
},
"propertyNames": { "$dynamicRef": "#meta" },
"if": { "$dynamicRef": "#meta" },
"then": { "$dynamicRef": "#meta" },
"else": { "$dynamicRef": "#meta" },
"allOf": { "$ref": "#/$defs/schemaArray" },
"anyOf": { "$ref": "#/$defs/schemaArray" },
"oneOf": { "$ref": "#/$defs/schemaArray" },
"not": { "$dynamicRef": "#meta" }
},
"$defs": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$dynamicRef": "#meta" }
}
}
}

View File

@ -0,0 +1,15 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/content",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/content": true
},
"$dynamicAnchor": "meta",
"title": "Content vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"contentEncoding": { "type": "string" },
"contentMediaType": { "type": "string" },
"contentSchema": { "$dynamicRef": "#meta" }
}
}

View File

@ -0,0 +1,50 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/core",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/core": true
},
"$dynamicAnchor": "meta",
"title": "Core vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"$id": {
"$ref": "#/$defs/uriReferenceString",
"$comment": "Non-empty fragments not allowed.",
"pattern": "^[^#]*#?$"
},
"$schema": { "$ref": "#/$defs/uriString" },
"$ref": { "$ref": "#/$defs/uriReferenceString" },
"$anchor": { "$ref": "#/$defs/anchorString" },
"$dynamicRef": { "$ref": "#/$defs/uriReferenceString" },
"$dynamicAnchor": { "$ref": "#/$defs/anchorString" },
"$vocabulary": {
"type": "object",
"propertyNames": { "$ref": "#/$defs/uriString" },
"additionalProperties": {
"type": "boolean"
}
},
"$comment": {
"type": "string"
},
"$defs": {
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" }
}
},
"$defs": {
"anchorString": {
"type": "string",
"pattern": "^[A-Za-z_][-A-Za-z0-9._]*$"
},
"uriString": {
"type": "string",
"format": "uri"
},
"uriReferenceString": {
"type": "string",
"format": "uri-reference"
}
}
}

View File

@ -0,0 +1,13 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/format-annotation",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/format-annotation": true
},
"$dynamicAnchor": "meta",
"title": "Format vocabulary meta-schema for annotation results",
"type": ["object", "boolean"],
"properties": {
"format": { "type": "string" }
}
}

View File

@ -0,0 +1,13 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/format-assertion",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/format-assertion": true
},
"$dynamicAnchor": "meta",
"title": "Format vocabulary meta-schema for assertion results",
"type": ["object", "boolean"],
"properties": {
"format": { "type": "string" }
}
}

View File

@ -0,0 +1,35 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/meta-data",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/meta-data": true
},
"$dynamicAnchor": "meta",
"title": "Meta-data vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": true,
"deprecated": {
"type": "boolean",
"default": false
},
"readOnly": {
"type": "boolean",
"default": false
},
"writeOnly": {
"type": "boolean",
"default": false
},
"examples": {
"type": "array",
"items": true
}
}
}

View File

@ -0,0 +1,14 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/unevaluated",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/unevaluated": true
},
"$dynamicAnchor": "meta",
"title": "Unevaluated applicator vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"unevaluatedItems": { "$dynamicRef": "#meta" },
"unevaluatedProperties": { "$dynamicRef": "#meta" }
}
}

View File

@ -0,0 +1,97 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/meta/validation",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/validation": true
},
"$dynamicAnchor": "meta",
"title": "Validation vocabulary meta-schema",
"type": ["object", "boolean"],
"properties": {
"type": {
"anyOf": [
{ "$ref": "#/$defs/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/$defs/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"const": true,
"enum": {
"type": "array",
"items": true
},
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/$defs/nonNegativeInteger" },
"minLength": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"maxItems": { "$ref": "#/$defs/nonNegativeInteger" },
"minItems": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"maxContains": { "$ref": "#/$defs/nonNegativeInteger" },
"minContains": {
"$ref": "#/$defs/nonNegativeInteger",
"default": 1
},
"maxProperties": { "$ref": "#/$defs/nonNegativeInteger" },
"minProperties": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/$defs/stringArray" },
"dependentRequired": {
"type": "object",
"additionalProperties": {
"$ref": "#/$defs/stringArray"
}
}
},
"$defs": {
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"$ref": "#/$defs/nonNegativeInteger",
"default": 0
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
}
}

View File

@ -0,0 +1,57 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/schema",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/core": true,
"https://json-schema.org/draft/2020-12/vocab/applicator": true,
"https://json-schema.org/draft/2020-12/vocab/unevaluated": true,
"https://json-schema.org/draft/2020-12/vocab/validation": true,
"https://json-schema.org/draft/2020-12/vocab/meta-data": true,
"https://json-schema.org/draft/2020-12/vocab/format-annotation": true,
"https://json-schema.org/draft/2020-12/vocab/content": true
},
"$dynamicAnchor": "meta",
"title": "Core and Validation specifications meta-schema",
"allOf": [
{"$ref": "meta/core"},
{"$ref": "meta/applicator"},
{"$ref": "meta/unevaluated"},
{"$ref": "meta/validation"},
{"$ref": "meta/meta-data"},
{"$ref": "meta/format-annotation"},
{"$ref": "meta/content"}
],
"type": ["object", "boolean"],
"$comment": "This meta-schema also defines keywords that have appeared in previous drafts in order to prevent incompatible extensions as they remain in common use.",
"properties": {
"definitions": {
"$comment": "\"definitions\" has been replaced by \"$defs\".",
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" },
"deprecated": true,
"default": {}
},
"dependencies": {
"$comment": "\"dependencies\" has been split and replaced by \"dependentSchemas\" and \"dependentRequired\" in order to serve their differing semantics.",
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$dynamicRef": "#meta" },
{ "$ref": "meta/validation#/$defs/stringArray" }
]
},
"deprecated": true,
"default": {}
},
"$recursiveAnchor": {
"$comment": "\"$recursiveAnchor\" has been replaced by \"$dynamicAnchor\".",
"$ref": "meta/core#/$defs/anchorString",
"deprecated": true
},
"$recursiveRef": {
"$comment": "\"$recursiveRef\" has been replaced by \"$dynamicRef\".",
"$ref": "meta/core#/$defs/uriReferenceString",
"deprecated": true
}
}
}

622
validator/src/output.rs Normal file
View File

@ -0,0 +1,622 @@
use std::{
borrow::Cow,
fmt::{Display, Formatter, Write},
};
use serde::{
ser::{SerializeMap, SerializeSeq},
Serialize,
};
use crate::{util::*, ErrorKind, InstanceLocation, ValidationError};
impl<'s> ValidationError<'s, '_> {
fn absolute_keyword_location(&self) -> AbsoluteKeywordLocation<'s> {
if let ErrorKind::Reference { url, .. } = &self.kind {
AbsoluteKeywordLocation {
schema_url: url,
keyword_path: None,
}
} else {
AbsoluteKeywordLocation {
schema_url: self.schema_url,
keyword_path: self.kind.keyword_path(),
}
}
}
fn skip(&self) -> bool {
self.causes.len() == 1 && matches!(self.kind, ErrorKind::Reference { .. })
}
/// The `Flag` output format, merely the boolean result.
pub fn flag_output(&self) -> FlagOutput {
FlagOutput { valid: false }
}
/// The `Basic` structure, a flat list of output units.
pub fn basic_output(&self) -> OutputUnit<'_, '_, '_> {
let mut outputs = vec![];
let mut in_ref = InRef::default();
let mut kw_loc = KeywordLocation::default();
for node in DfsIterator::new(self) {
match node {
DfsItem::Pre(e) => {
in_ref.pre(e);
kw_loc.pre(e);
if e.skip() || matches!(e.kind, ErrorKind::Schema { .. }) {
continue;
}
let absolute_keyword_location = if in_ref.get() {
Some(e.absolute_keyword_location())
} else {
None
};
outputs.push(OutputUnit {
valid: false,
keyword_location: kw_loc.get(e),
absolute_keyword_location,
instance_location: &e.instance_location,
error: OutputError::Leaf(&e.kind),
});
}
DfsItem::Post(e) => {
in_ref.post();
kw_loc.post();
if e.skip() || matches!(e.kind, ErrorKind::Schema { .. }) {
continue;
}
}
}
}
let error = if outputs.is_empty() {
OutputError::Leaf(&self.kind)
} else {
OutputError::Branch(outputs)
};
OutputUnit {
valid: false,
keyword_location: String::new(),
absolute_keyword_location: None,
instance_location: &self.instance_location,
error,
}
}
/// The `Detailed` structure, based on the schema.
pub fn detailed_output(&self) -> OutputUnit<'_, '_, '_> {
let mut root = None;
let mut stack: Vec<OutputUnit> = vec![];
let mut in_ref = InRef::default();
let mut kw_loc = KeywordLocation::default();
for node in DfsIterator::new(self) {
match node {
DfsItem::Pre(e) => {
in_ref.pre(e);
kw_loc.pre(e);
if e.skip() {
continue;
}
let absolute_keyword_location = if in_ref.get() {
Some(e.absolute_keyword_location())
} else {
None
};
stack.push(OutputUnit {
valid: false,
keyword_location: kw_loc.get(e),
absolute_keyword_location,
instance_location: &e.instance_location,
error: OutputError::Leaf(&e.kind),
});
}
DfsItem::Post(e) => {
in_ref.post();
kw_loc.post();
if e.skip() {
continue;
}
let output = stack.pop().unwrap();
if let Some(parent) = stack.last_mut() {
match &mut parent.error {
OutputError::Leaf(_) => {
parent.error = OutputError::Branch(vec![output]);
}
OutputError::Branch(v) => v.push(output),
}
} else {
root.replace(output);
}
}
}
}
root.unwrap()
}
}
// DfsIterator --
impl Display for ValidationError<'_, '_> {
/// Formats error hierarchy. Use `#` to show the schema location.
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut indent = Indent::default();
let mut sloc = SchemaLocation::default();
// let mut kw_loc = KeywordLocation::default();
for node in DfsIterator::new(self) {
match node {
DfsItem::Pre(e) => {
// kw_loc.pre(e);
if e.skip() {
continue;
}
indent.pre(f)?;
if f.alternate() {
sloc.pre(e);
}
if let ErrorKind::Schema { .. } = &e.kind {
write!(f, "jsonschema {}", e.kind)?;
} else {
write!(f, "at {}", quote(&e.instance_location.to_string()))?;
if f.alternate() {
write!(f, " [{}]", sloc)?;
// write!(f, " [{}]", kw_loc.get(e))?;
// write!(f, " [{}]", e.absolute_keyword_location())?;
}
write!(f, ": {}", e.kind)?;
}
}
DfsItem::Post(e) => {
// kw_loc.post();
if e.skip() {
continue;
}
indent.post();
sloc.post();
}
}
}
Ok(())
}
}
struct DfsIterator<'a, 'v, 's> {
root: Option<&'a ValidationError<'v, 's>>,
stack: Vec<Frame<'a, 'v, 's>>,
}
impl<'a, 'v, 's> DfsIterator<'a, 'v, 's> {
fn new(err: &'a ValidationError<'v, 's>) -> Self {
DfsIterator {
root: Some(err),
stack: vec![],
}
}
}
impl<'a, 'v, 's> Iterator for DfsIterator<'a, 'v, 's> {
type Item = DfsItem<&'a ValidationError<'v, 's>>;
fn next(&mut self) -> Option<Self::Item> {
let Some(mut frame) = self.stack.pop() else {
if let Some(err) = self.root.take() {
self.stack.push(Frame::from(err));
return Some(DfsItem::Pre(err));
} else {
return None;
}
};
if frame.causes.is_empty() {
return Some(DfsItem::Post(frame.err));
}
let err = &frame.causes[0];
frame.causes = &frame.causes[1..];
self.stack.push(frame);
self.stack.push(Frame::from(err));
Some(DfsItem::Pre(err))
}
}
struct Frame<'a, 'v, 's> {
err: &'a ValidationError<'v, 's>,
causes: &'a [ValidationError<'v, 's>],
}
impl<'a, 'v, 's> Frame<'a, 'v, 's> {
fn from(err: &'a ValidationError<'v, 's>) -> Self {
Self {
err,
causes: &err.causes,
}
}
}
enum DfsItem<T> {
Pre(T),
Post(T),
}
// Indent --
#[derive(Default)]
struct Indent {
n: usize,
}
impl Indent {
fn pre(&mut self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if self.n > 0 {
writeln!(f)?;
for _ in 0..self.n - 1 {
write!(f, " ")?;
}
write!(f, "- ")?;
}
self.n += 1;
Ok(())
}
fn post(&mut self) {
self.n -= 1;
}
}
// SchemaLocation
#[derive(Default)]
struct SchemaLocation<'a, 's, 'v> {
stack: Vec<&'a ValidationError<'s, 'v>>,
}
impl<'a, 's, 'v> SchemaLocation<'a, 's, 'v> {
fn pre(&mut self, e: &'a ValidationError<'s, 'v>) {
self.stack.push(e);
}
fn post(&mut self) {
self.stack.pop();
}
}
impl Display for SchemaLocation<'_, '_, '_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut iter = self.stack.iter().cloned();
let cur = iter.next_back().unwrap();
let cur: Cow<str> = match &cur.kind {
ErrorKind::Schema { url } => Cow::Borrowed(url),
ErrorKind::Reference { url, .. } => Cow::Borrowed(url),
_ => Cow::Owned(cur.absolute_keyword_location().to_string()),
};
let Some(prev) = iter.next_back() else {
return write!(f, "{cur}");
};
let p = match &prev.kind {
ErrorKind::Schema { url } => {
let (p, _) = split(url);
p
}
ErrorKind::Reference { url, .. } => {
let (p, _) = split(url);
p
}
_ => {
let (p, _) = split(prev.schema_url);
p
}
};
let (c, frag) = split(cur.as_ref());
if c == p {
write!(f, "S#{frag}")
} else {
write!(f, "{cur}")
}
}
}
// KeywordLocation --
#[derive(Default)]
struct KeywordLocation<'a> {
loc: String,
stack: Vec<(&'a str, usize)>, // (schema_url, len)
}
impl<'a> KeywordLocation<'a> {
fn pre(&mut self, e: &'a ValidationError) {
let cur = match &e.kind {
ErrorKind::Schema { url } => url,
ErrorKind::Reference { url, .. } => url,
_ => e.schema_url,
};
if let Some((prev, _)) = self.stack.last() {
self.loc.push_str(&e.schema_url[prev.len()..]); // todo: url-decode
if let ErrorKind::Reference { kw, .. } = &e.kind {
self.loc.push('/');
self.loc.push_str(kw);
}
}
self.stack.push((cur, self.loc.len()));
}
fn post(&mut self) {
self.stack.pop();
if let Some((_, len)) = self.stack.last() {
self.loc.truncate(*len);
}
}
fn get(&mut self, cur: &'a ValidationError) -> String {
if let ErrorKind::Reference { .. } = &cur.kind {
self.loc.clone()
} else if let Some(kw_path) = &cur.kind.keyword_path() {
let len = self.loc.len();
self.loc.push('/');
write!(self.loc, "{}", kw_path).expect("write kw_path to String should not fail");
let loc = self.loc.clone();
self.loc.truncate(len);
loc
} else {
self.loc.clone()
}
}
}
#[derive(Default)]
struct InRef {
stack: Vec<bool>,
}
impl InRef {
fn pre(&mut self, e: &ValidationError) {
let in_ref: bool = self.get() || matches!(e.kind, ErrorKind::Reference { .. });
self.stack.push(in_ref);
}
fn post(&mut self) {
self.stack.pop();
}
fn get(&self) -> bool {
self.stack.last().cloned().unwrap_or_default()
}
}
// output formats --
/// Simplest output format, merely the boolean result.
pub struct FlagOutput {
pub valid: bool,
}
impl Serialize for FlagOutput {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let mut map = serializer.serialize_map(Some(1))?;
map.serialize_entry("valid", &self.valid)?;
map.end()
}
}
impl Display for FlagOutput {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write_json_to_fmt(f, self)
}
}
/// Single OutputUnit used in Basic/Detailed output formats.
pub struct OutputUnit<'e, 's, 'v> {
pub valid: bool,
pub keyword_location: String,
/// The absolute, dereferenced location of the validating keyword
pub absolute_keyword_location: Option<AbsoluteKeywordLocation<'s>>,
/// The location of the JSON value within the instance being validated
pub instance_location: &'e InstanceLocation<'v>,
pub error: OutputError<'e, 's, 'v>,
}
impl Serialize for OutputUnit<'_, '_, '_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let n = 4 + self.absolute_keyword_location.as_ref().map_or(0, |_| 1);
let mut map = serializer.serialize_map(Some(n))?;
map.serialize_entry("valid", &self.valid)?;
map.serialize_entry("keywordLocation", &self.keyword_location.to_string())?;
if let Some(s) = &self.absolute_keyword_location {
map.serialize_entry("absoluteKeywordLocation", &s.to_string())?;
}
map.serialize_entry("instanceLocation", &self.instance_location.to_string())?;
let pname = match self.error {
OutputError::Leaf(_) => "error",
OutputError::Branch(_) => "errors",
};
map.serialize_entry(pname, &self.error)?;
map.end()
}
}
impl Display for OutputUnit<'_, '_, '_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write_json_to_fmt(f, self)
}
}
/// Error of [`OutputUnit`].
pub enum OutputError<'e, 's, 'v> {
/// Single.
Leaf(&'e ErrorKind<'s, 'v>),
/// Nested.
Branch(Vec<OutputUnit<'e, 's, 'v>>),
}
impl Serialize for OutputError<'_, '_, '_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
OutputError::Leaf(kind) => serializer.serialize_str(&kind.to_string()),
OutputError::Branch(units) => {
let mut seq = serializer.serialize_seq(Some(units.len()))?;
for unit in units {
seq.serialize_element(unit)?;
}
seq.end()
}
}
}
}
// AbsoluteKeywordLocation --
impl<'s> ErrorKind<'s, '_> {
pub fn keyword_path(&self) -> Option<KeywordPath<'s>> {
#[inline(always)]
fn kw(kw: &'static str) -> Option<KeywordPath<'static>> {
Some(KeywordPath {
keyword: kw,
token: None,
})
}
#[inline(always)]
fn kw_prop<'s>(kw: &'static str, prop: &'s str) -> Option<KeywordPath<'s>> {
Some(KeywordPath {
keyword: kw,
token: Some(SchemaToken::Prop(prop)),
})
}
use ErrorKind::*;
match self {
Group => None,
Schema { .. } => None,
ContentSchema => kw("contentSchema"),
PropertyName { .. } => kw("propertyNames"),
Reference { kw: kword, .. } => kw(kword),
RefCycle { .. } => None,
FalseSchema => None,
Type { .. } => kw("type"),
Enum { .. } => kw("enum"),
Const { .. } => kw("const"),
Format { .. } => kw("format"),
MinProperties { .. } => kw("minProperties"),
MaxProperties { .. } => kw("maxProperties"),
AdditionalProperties { .. } => kw("additionalProperty"),
Required { .. } => kw("required"),
Dependency { prop, .. } => kw_prop("dependencies", prop),
DependentRequired { prop, .. } => kw_prop("dependentRequired", prop),
MinItems { .. } => kw("minItems"),
MaxItems { .. } => kw("maxItems"),
Contains => kw("contains"),
MinContains { .. } => kw("minContains"),
MaxContains { .. } => kw("maxContains"),
UniqueItems { .. } => kw("uniqueItems"),
AdditionalItems { .. } => kw("additionalItems"),
MinLength { .. } => kw("minLength"),
MaxLength { .. } => kw("maxLength"),
Pattern { .. } => kw("pattern"),
ContentEncoding { .. } => kw("contentEncoding"),
ContentMediaType { .. } => kw("contentMediaType"),
Minimum { .. } => kw("minimum"),
Maximum { .. } => kw("maximum"),
ExclusiveMinimum { .. } => kw("exclusiveMinimum"),
ExclusiveMaximum { .. } => kw("exclusiveMaximum"),
MultipleOf { .. } => kw("multipleOf"),
Not => kw("not"),
AllOf => kw("allOf"),
AnyOf => kw("anyOf"),
OneOf(_) => kw("oneOf"),
}
}
}
/// The absolute, dereferenced location of the validating keyword
#[derive(Debug, Clone)]
pub struct AbsoluteKeywordLocation<'s> {
/// The absolute, dereferenced schema location.
pub schema_url: &'s str,
/// Location within the `schema_url`.
pub keyword_path: Option<KeywordPath<'s>>,
}
impl Display for AbsoluteKeywordLocation<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.schema_url.fmt(f)?;
if let Some(path) = &self.keyword_path {
f.write_str("/")?;
path.keyword.fmt(f)?;
if let Some(token) = &path.token {
f.write_str("/")?;
match token {
SchemaToken::Prop(p) => write!(f, "{}", escape(p))?, // todo: url-encode
SchemaToken::Item(i) => write!(f, "{i}")?,
}
}
}
Ok(())
}
}
#[derive(Debug, Clone)]
/// JsonPointer in schema.
pub struct KeywordPath<'s> {
/// The first token.
pub keyword: &'static str,
/// Optinal token within keyword.
pub token: Option<SchemaToken<'s>>,
}
impl Display for KeywordPath<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.keyword.fmt(f)?;
if let Some(token) = &self.token {
f.write_str("/")?;
token.fmt(f)?;
}
Ok(())
}
}
/// Token for schema.
#[derive(Debug, Clone)]
pub enum SchemaToken<'s> {
/// Token for property.
Prop(&'s str),
/// Token for array item.
Item(usize),
}
impl Display for SchemaToken<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SchemaToken::Prop(p) => write!(f, "{}", escape(p)),
SchemaToken::Item(i) => write!(f, "{i}"),
}
}
}
// helpers --
fn write_json_to_fmt<T>(f: &mut std::fmt::Formatter, value: &T) -> Result<(), std::fmt::Error>
where
T: ?Sized + Serialize,
{
let s = if f.alternate() {
serde_json::to_string_pretty(value)
} else {
serde_json::to_string(value)
};
let s = s.map_err(|_| std::fmt::Error)?;
f.write_str(&s)
}

128
validator/src/root.rs Normal file
View File

@ -0,0 +1,128 @@
use std::collections::{HashMap, HashSet};
use crate::{compiler::CompileError, draft::*, util::*};
use serde_json::Value;
use url::Url;
pub(crate) struct Root {
pub(crate) draft: &'static Draft,
pub(crate) resources: HashMap<JsonPointer, Resource>, // ptr => _
pub(crate) url: Url,
pub(crate) meta_vocabs: Option<Vec<String>>,
}
impl Root {
pub(crate) fn has_vocab(&self, name: &str) -> bool {
if self.draft.version < 2019 || name == "core" {
return true;
}
if let Some(vocabs) = &self.meta_vocabs {
return vocabs.iter().any(|s| s == name);
}
self.draft.default_vocabs.contains(&name)
}
fn resolve_fragment_in(&self, frag: &Fragment, res: &Resource) -> Result<UrlPtr, CompileError> {
let ptr = match frag {
Fragment::Anchor(anchor) => {
let Some(ptr) = res.anchors.get(anchor) else {
return Err(CompileError::AnchorNotFound {
url: self.url.to_string(),
reference: UrlFrag::format(&res.id, frag.as_str()),
});
};
ptr.clone()
}
Fragment::JsonPointer(ptr) => res.ptr.concat(ptr),
};
Ok(UrlPtr {
url: self.url.clone(),
ptr,
})
}
pub(crate) fn resolve_fragment(&self, frag: &Fragment) -> Result<UrlPtr, CompileError> {
let res = self.resources.get("").ok_or(CompileError::Bug(
format!("no root resource found for {}", self.url).into(),
))?;
self.resolve_fragment_in(frag, res)
}
// resolves `UrlFrag` to `UrlPtr` from root.
// returns `None` if it is external.
pub(crate) fn resolve(&self, uf: &UrlFrag) -> Result<Option<UrlPtr>, CompileError> {
let res = {
if uf.url == self.url {
self.resources.get("").ok_or(CompileError::Bug(
format!("no root resource found for {}", self.url).into(),
))?
} else {
// look for resource with id==uf.url
let Some(res) = self.resources.values().find(|res| res.id == uf.url) else {
return Ok(None); // external url
};
res
}
};
self.resolve_fragment_in(&uf.frag, res).map(Some)
}
pub(crate) fn resource(&self, ptr: &JsonPointer) -> &Resource {
let mut ptr = ptr.as_str();
loop {
if let Some(res) = self.resources.get(ptr) {
return res;
}
let Some((prefix, _)) = ptr.rsplit_once('/') else {
break;
};
ptr = prefix;
}
self.resources.get("").expect("root resource should exist")
}
pub(crate) fn base_url(&self, ptr: &JsonPointer) -> &Url {
&self.resource(ptr).id
}
pub(crate) fn add_subschema(
&mut self,
doc: &Value,
ptr: &JsonPointer,
) -> Result<(), CompileError> {
let v = ptr.lookup(doc, &self.url)?;
let base_url = self.base_url(ptr).clone();
self.draft
.collect_resources(v, &base_url, ptr.clone(), &self.url, &mut self.resources)?;
// collect anchors
if !self.resources.contains_key(ptr) {
let res = self.resource(ptr);
if let Some(res) = self.resources.get_mut(&res.ptr.clone()) {
self.draft.collect_anchors(v, ptr, res, &self.url)?;
}
}
Ok(())
}
}
#[derive(Debug)]
pub(crate) struct Resource {
pub(crate) ptr: JsonPointer, // from root
pub(crate) id: Url,
pub(crate) anchors: HashMap<Anchor, JsonPointer>, // anchor => ptr
pub(crate) dynamic_anchors: HashSet<Anchor>,
}
impl Resource {
pub(crate) fn new(ptr: JsonPointer, id: Url) -> Self {
Self {
ptr,
id,
anchors: HashMap::new(),
dynamic_anchors: HashSet::new(),
}
}
}

107
validator/src/roots.rs Normal file
View File

@ -0,0 +1,107 @@
use std::collections::{HashMap, HashSet};
use crate::{compiler::CompileError, draft::*, loader::DefaultUrlLoader, root::Root, util::*};
use serde_json::Value;
use url::Url;
// --
pub(crate) struct Roots {
pub(crate) default_draft: &'static Draft,
map: HashMap<Url, Root>,
pub(crate) loader: DefaultUrlLoader,
}
impl Roots {
fn new() -> Self {
Self {
default_draft: latest(),
map: Default::default(),
loader: DefaultUrlLoader::new(),
}
}
}
impl Default for Roots {
fn default() -> Self {
Self::new()
}
}
impl Roots {
pub(crate) fn get(&self, url: &Url) -> Option<&Root> {
self.map.get(url)
}
pub(crate) fn resolve_fragment(&mut self, uf: UrlFrag) -> Result<UrlPtr, CompileError> {
self.or_load(uf.url.clone())?;
let Some(root) = self.map.get(&uf.url) else {
return Err(CompileError::Bug("or_load didn't add".into()));
};
root.resolve_fragment(&uf.frag)
}
pub(crate) fn ensure_subschema(&mut self, up: &UrlPtr) -> Result<(), CompileError> {
self.or_load(up.url.clone())?;
let Some(root) = self.map.get_mut(&up.url) else {
return Err(CompileError::Bug("or_load didn't add".into()));
};
if !root.draft.is_subschema(up.ptr.as_str()) {
let doc = self.loader.load(&root.url)?;
let v = up.ptr.lookup(doc, &up.url)?;
root.draft.validate(up, v)?;
root.add_subschema(doc, &up.ptr)?;
}
Ok(())
}
pub(crate) fn or_load(&mut self, url: Url) -> Result<(), CompileError> {
debug_assert!(url.fragment().is_none(), "trying to add root with fragment");
if self.map.contains_key(&url) {
return Ok(());
}
let doc = self.loader.load(&url)?;
let r = self.create_root(url.clone(), doc)?;
self.map.insert(url, r);
Ok(())
}
pub(crate) fn create_root(&self, url: Url, doc: &Value) -> Result<Root, CompileError> {
let draft = {
let up = UrlPtr {
url: url.clone(),
ptr: "".into(),
};
self.loader
.get_draft(&up, doc, self.default_draft, HashSet::new())?
};
let vocabs = self.loader.get_meta_vocabs(doc, draft)?;
let resources = {
let mut m = HashMap::default();
draft.collect_resources(doc, &url, "".into(), &url, &mut m)?;
m
};
if !matches!(url.host_str(), Some("json-schema.org")) {
draft.validate(
&UrlPtr {
url: url.clone(),
ptr: "".into(),
},
doc,
)?;
}
Ok(Root {
draft,
resources,
url: url.clone(),
meta_vocabs: vocabs,
})
}
pub(crate) fn insert(&mut self, roots: &mut HashMap<Url, Root>) {
self.map.extend(roots.drain());
}
}

545
validator/src/util.rs Normal file
View File

@ -0,0 +1,545 @@
use std::{
borrow::{Borrow, Cow},
fmt::Display,
hash::{Hash, Hasher},
str::FromStr,
};
use ahash::{AHashMap, AHasher};
use percent_encoding::{percent_decode_str, AsciiSet, CONTROLS};
use serde_json::Value;
use url::Url;
use crate::CompileError;
// --
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub(crate) struct JsonPointer(pub(crate) String);
impl JsonPointer {
pub(crate) fn escape(token: &str) -> Cow<'_, str> {
const SPECIAL: [char; 2] = ['~', '/'];
if token.contains(SPECIAL) {
token.replace('~', "~0").replace('/', "~1").into()
} else {
token.into()
}
}
pub(crate) fn unescape(mut tok: &str) -> Result<Cow<'_, str>, ()> {
let Some(mut tilde) = tok.find('~') else {
return Ok(Cow::Borrowed(tok));
};
let mut s = String::with_capacity(tok.len());
loop {
s.push_str(&tok[..tilde]);
tok = &tok[tilde + 1..];
match tok.chars().next() {
Some('1') => s.push('/'),
Some('0') => s.push('~'),
_ => return Err(()),
}
tok = &tok[1..];
let Some(i) = tok.find('~') else {
s.push_str(tok);
break;
};
tilde = i;
}
Ok(Cow::Owned(s))
}
pub(crate) fn lookup<'a>(
&self,
mut v: &'a Value,
v_url: &Url,
) -> Result<&'a Value, CompileError> {
for tok in self.0.split('/').skip(1) {
let Ok(tok) = Self::unescape(tok) else {
let loc = UrlFrag::format(v_url, self.as_str());
return Err(CompileError::InvalidJsonPointer(loc));
};
match v {
Value::Object(obj) => {
if let Some(pvalue) = obj.get(tok.as_ref()) {
v = pvalue;
continue;
}
}
Value::Array(arr) => {
if let Ok(i) = usize::from_str(tok.as_ref()) {
if let Some(item) = arr.get(i) {
v = item;
continue;
}
};
}
_ => {}
}
let loc = UrlFrag::format(v_url, self.as_str());
return Err(CompileError::JsonPointerNotFound(loc));
}
Ok(v)
}
pub(crate) fn as_str(&self) -> &str {
&self.0
}
pub(crate) fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub(crate) fn concat(&self, next: &Self) -> Self {
JsonPointer(format!("{}{}", self.0, next.0))
}
pub(crate) fn append(&self, tok: &str) -> Self {
Self(format!("{}/{}", self, Self::escape(tok)))
}
pub(crate) fn append2(&self, tok1: &str, tok2: &str) -> Self {
Self(format!(
"{}/{}/{}",
self,
Self::escape(tok1),
Self::escape(tok2)
))
}
}
impl Display for JsonPointer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl Borrow<str> for JsonPointer {
fn borrow(&self) -> &str {
&self.0
}
}
impl From<&str> for JsonPointer {
fn from(value: &str) -> Self {
Self(value.into())
}
}
// --
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub(crate) struct Anchor(pub(crate) String);
impl Display for Anchor {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl Borrow<str> for Anchor {
fn borrow(&self) -> &str {
&self.0
}
}
impl From<&str> for Anchor {
fn from(value: &str) -> Self {
Self(value.into())
}
}
// --
#[derive(Debug, Clone, Eq, PartialEq)]
pub(crate) enum Fragment {
Anchor(Anchor),
JsonPointer(JsonPointer),
}
impl Fragment {
pub(crate) fn split(s: &str) -> Result<(&str, Fragment), CompileError> {
let (u, frag) = split(s);
let frag = percent_decode_str(frag)
.decode_utf8()
.map_err(|src| CompileError::ParseUrlError {
url: s.to_string(),
src: src.into(),
})?
.to_string();
let frag = if frag.is_empty() || frag.starts_with('/') {
Fragment::JsonPointer(JsonPointer(frag))
} else {
Fragment::Anchor(Anchor(frag))
};
Ok((u, frag))
}
pub(crate) fn encode(frag: &str) -> String {
// https://url.spec.whatwg.org/#fragment-percent-encode-set
const FRAGMENT: &AsciiSet = &CONTROLS
.add(b'%')
.add(b' ')
.add(b'"')
.add(b'<')
.add(b'>')
.add(b'`');
percent_encoding::utf8_percent_encode(frag, FRAGMENT).to_string()
}
pub(crate) fn as_str(&self) -> &str {
match self {
Fragment::Anchor(s) => &s.0,
Fragment::JsonPointer(s) => &s.0,
}
}
}
// --
#[derive(Clone)]
pub(crate) struct UrlFrag {
pub(crate) url: Url,
pub(crate) frag: Fragment,
}
impl UrlFrag {
pub(crate) fn absolute(input: &str) -> Result<UrlFrag, CompileError> {
let (u, frag) = Fragment::split(input)?;
// note: windows drive letter is treated as url scheme by url parser
#[cfg(not(target_arch = "wasm32"))]
if std::env::consts::OS == "windows" && starts_with_windows_drive(u) {
let url = Url::from_file_path(u)
.map_err(|_| CompileError::Bug(format!("failed to convert {u} into url").into()))?;
return Ok(UrlFrag { url, frag });
}
match Url::parse(u) {
Ok(url) => Ok(UrlFrag { url, frag }),
#[cfg(not(target_arch = "wasm32"))]
Err(url::ParseError::RelativeUrlWithoutBase) => {
let p = std::path::absolute(u).map_err(|e| CompileError::ParseUrlError {
url: u.to_owned(),
src: e.into(),
})?;
let url = Url::from_file_path(p).map_err(|_| {
CompileError::Bug(format!("failed to convert {u} into url").into())
})?;
Ok(UrlFrag { url, frag })
}
Err(e) => Err(CompileError::ParseUrlError {
url: u.to_owned(),
src: e.into(),
}),
}
}
pub(crate) fn join(url: &Url, input: &str) -> Result<UrlFrag, CompileError> {
let (input, frag) = Fragment::split(input)?;
if input.is_empty() {
return Ok(UrlFrag {
url: url.clone(),
frag,
});
}
let url = url.join(input).map_err(|e| CompileError::ParseUrlError {
url: input.to_string(),
src: e.into(),
})?;
Ok(UrlFrag { url, frag })
}
pub(crate) fn format(url: &Url, frag: &str) -> String {
if frag.is_empty() {
url.to_string()
} else {
format!("{}#{}", url, Fragment::encode(frag))
}
}
}
impl Display for UrlFrag {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}#{}", self.url, Fragment::encode(self.frag.as_str()))
}
}
// --
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub(crate) struct UrlPtr {
pub(crate) url: Url,
pub(crate) ptr: JsonPointer,
}
impl UrlPtr {
pub(crate) fn lookup<'a>(&self, doc: &'a Value) -> Result<&'a Value, CompileError> {
self.ptr.lookup(doc, &self.url)
}
pub(crate) fn format(&self, tok: &str) -> String {
format!(
"{}#{}/{}",
self.url,
Fragment::encode(self.ptr.as_str()),
Fragment::encode(JsonPointer::escape(tok).as_ref()),
)
}
}
impl Display for UrlPtr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}#{}", self.url, Fragment::encode(self.ptr.as_str()))
}
}
// --
pub(crate) fn is_integer(v: &Value) -> bool {
match v {
Value::Number(n) => {
n.is_i64() || n.is_u64() || n.as_f64().filter(|n| n.fract() == 0.0).is_some()
}
_ => false,
}
}
#[cfg(not(target_arch = "wasm32"))]
fn starts_with_windows_drive(p: &str) -> bool {
p.chars().next().filter(char::is_ascii_uppercase).is_some() && p[1..].starts_with(":\\")
}
/// returns single-quoted string
pub(crate) fn quote<T>(s: &T) -> String
where
T: AsRef<str> + std::fmt::Debug + ?Sized,
{
let s = format!("{s:?}").replace(r#"\""#, "\"").replace('\'', r"\'");
format!("'{}'", &s[1..s.len() - 1])
}
pub(crate) fn join_iter<T>(iterable: T, sep: &str) -> String
where
T: IntoIterator,
T::Item: Display,
{
iterable
.into_iter()
.map(|e| e.to_string())
.collect::<Vec<_>>()
.join(sep)
}
pub(crate) fn escape(token: &str) -> Cow<'_, str> {
JsonPointer::escape(token)
}
pub(crate) fn split(url: &str) -> (&str, &str) {
if let Some(i) = url.find('#') {
(&url[..i], &url[i + 1..])
} else {
(url, "")
}
}
/// serde_json treats 0 and 0.0 not equal. so we cannot simply use v1==v2
pub(crate) fn equals(v1: &Value, v2: &Value) -> bool {
match (v1, v2) {
(Value::Null, Value::Null) => true,
(Value::Bool(b1), Value::Bool(b2)) => b1 == b2,
(Value::Number(n1), Value::Number(n2)) => {
if let (Some(n1), Some(n2)) = (n1.as_u64(), n2.as_u64()) {
return n1 == n2;
}
if let (Some(n1), Some(n2)) = (n1.as_i64(), n2.as_i64()) {
return n1 == n2;
}
if let (Some(n1), Some(n2)) = (n1.as_f64(), n2.as_f64()) {
return n1 == n2;
}
false
}
(Value::String(s1), Value::String(s2)) => s1 == s2,
(Value::Array(arr1), Value::Array(arr2)) => {
if arr1.len() != arr2.len() {
return false;
}
arr1.iter().zip(arr2).all(|(e1, e2)| equals(e1, e2))
}
(Value::Object(obj1), Value::Object(obj2)) => {
if obj1.len() != obj2.len() {
return false;
}
for (k1, v1) in obj1 {
if let Some(v2) = obj2.get(k1) {
if !equals(v1, v2) {
return false;
}
} else {
return false;
}
}
true
}
_ => false,
}
}
pub(crate) fn duplicates(arr: &Vec<Value>) -> Option<(usize, usize)> {
match arr.as_slice() {
[e0, e1] => {
if equals(e0, e1) {
return Some((0, 1));
}
}
[e0, e1, e2] => {
if equals(e0, e1) {
return Some((0, 1));
} else if equals(e0, e2) {
return Some((0, 2));
} else if equals(e1, e2) {
return Some((1, 2));
}
}
_ => {
let len = arr.len();
if len <= 20 {
for i in 0..len - 1 {
for j in i + 1..len {
if equals(&arr[i], &arr[j]) {
return Some((i, j));
}
}
}
} else {
let mut seen = AHashMap::with_capacity(len);
for (i, item) in arr.iter().enumerate() {
if let Some(j) = seen.insert(HashedValue(item), i) {
return Some((j, i));
}
}
}
}
}
None
}
// HashedValue --
// Based on implementation proposed by Sven Marnach:
// https://stackoverflow.com/questions/60882381/what-is-the-fastest-correct-way-to-detect-that-there-are-no-duplicates-in-a-json
pub(crate) struct HashedValue<'a>(pub(crate) &'a Value);
impl PartialEq for HashedValue<'_> {
fn eq(&self, other: &Self) -> bool {
equals(self.0, other.0)
}
}
impl Eq for HashedValue<'_> {}
impl Hash for HashedValue<'_> {
fn hash<H: Hasher>(&self, state: &mut H) {
match self.0 {
Value::Null => state.write_u32(3_221_225_473), // chosen randomly
Value::Bool(ref b) => b.hash(state),
Value::Number(ref num) => {
if let Some(num) = num.as_f64() {
num.to_bits().hash(state);
} else if let Some(num) = num.as_u64() {
num.hash(state);
} else if let Some(num) = num.as_i64() {
num.hash(state);
}
}
Value::String(ref str) => str.hash(state),
Value::Array(ref arr) => {
for item in arr {
HashedValue(item).hash(state);
}
}
Value::Object(ref obj) => {
let mut hash = 0;
for (pname, pvalue) in obj {
// We have no way of building a new hasher of type `H`, so we
// hardcode using the default hasher of a hash map.
let mut hasher = AHasher::default();
pname.hash(&mut hasher);
HashedValue(pvalue).hash(&mut hasher);
hash ^= hasher.finish();
}
state.write_u64(hash);
}
}
}
}
#[cfg(test)]
mod tests {
use ahash::AHashMap;
use serde_json::json;
use super::*;
#[test]
fn test_quote() {
assert_eq!(quote(r#"abc"def'ghi"#), r#"'abc"def\'ghi'"#);
}
#[test]
fn test_fragment_split() {
let tests = [
("#", Fragment::JsonPointer("".into())),
("#/a/b", Fragment::JsonPointer("/a/b".into())),
("#abcd", Fragment::Anchor("abcd".into())),
("#%61%62%63%64", Fragment::Anchor("abcd".into())),
(
"#%2F%61%62%63%64%2fef",
Fragment::JsonPointer("/abcd/ef".into()),
), // '/' is encoded
("#abcd+ef", Fragment::Anchor("abcd+ef".into())), // '+' should not traslate to space
];
for test in tests {
let (_, got) = Fragment::split(test.0).unwrap();
assert_eq!(got, test.1, "Fragment::split({:?})", test.0);
}
}
#[test]
fn test_unescape() {
let tests = [
("bar~0", Some("bar~")),
("bar~1", Some("bar/")),
("bar~01", Some("bar~1")),
("bar~", None),
("bar~~", None),
];
for (tok, want) in tests {
let res = JsonPointer::unescape(tok).ok();
let got = res.as_ref().map(|c| c.as_ref());
assert_eq!(got, want, "unescape({:?})", tok)
}
}
#[test]
fn test_equals() {
let tests = [["1.0", "1"], ["-1.0", "-1"]];
for [a, b] in tests {
let a = serde_json::from_str(a).unwrap();
let b = serde_json::from_str(b).unwrap();
assert!(equals(&a, &b));
}
}
#[test]
fn test_hashed_value() {
let mut seen = AHashMap::with_capacity(10);
let (v1, v2) = (json!(2), json!(2.0));
assert!(equals(&v1, &v2));
assert!(seen.insert(HashedValue(&v1), 1).is_none());
assert!(seen.insert(HashedValue(&v2), 1).is_some());
}
}

1221
validator/src/validator.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
[
{
"description": "zero fraction",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"const": 2
},
"tests": [
{
"description": "with fraction",
"data": 2.0,
"valid": true
},
{
"description": "without fraction",
"data": 2,
"valid": true
}
]
}
]

View File

@ -0,0 +1,26 @@
[
{
"description": "guard against infinite recursion",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {
"alice": {
"$anchor": "alice",
"allOf": [{"$ref": "#bob"}]
},
"bob": {
"$anchor": "bob",
"allOf": [{"$ref": "#alice"}]
}
},
"$ref": "#alice"
},
"tests": [
{
"description": "infinite recursion detected",
"data": {},
"valid": false
}
]
}
]

View File

@ -0,0 +1,143 @@
[
{
"description": "validation of binary-encoded media type documents with schema",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"contentMediaType": "application/json",
"contentEncoding": "base64",
"contentSchema": { "required": ["foo"], "properties": { "foo": { "type": "string" } } }
},
"tests": [
{
"description": "a valid base64-encoded JSON document",
"data": "eyJmb28iOiAiYmFyIn0K",
"valid": true
},
{
"description": "another valid base64-encoded JSON document",
"data": "eyJib28iOiAyMCwgImZvbyI6ICJiYXoifQ==",
"valid": true
},
{
"description": "an invalid base64-encoded JSON document; validates false",
"data": "eyJib28iOiAyMH0=",
"valid": false
},
{
"description": "an empty object as a base64-encoded JSON document; validates false",
"data": "e30=",
"valid": false
},
{
"description": "an empty array as a base64-encoded JSON document",
"data": "W10=",
"valid": true
},
{
"description": "a validly-encoded invalid JSON document; validates false",
"data": "ezp9Cg==",
"valid": false
},
{
"description": "an invalid base64 string that is valid JSON; validates false",
"data": "{}",
"valid": false
},
{
"description": "ignores non-strings",
"data": 100,
"valid": true
}
]
},
{
"description": "contentSchema without contentMediaType",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"contentEncoding": "base64",
"contentSchema": { "required": ["foo"], "properties": { "foo": { "type": "string" } } }
},
"tests": [
{
"description": "a valid base64-encoded JSON document",
"data": "eyJmb28iOiAiYmFyIn0K",
"valid": true
},
{
"description": "another valid base64-encoded JSON document",
"data": "eyJib28iOiAyMCwgImZvbyI6ICJiYXoifQ==",
"valid": true
},
{
"description": "an invalid base64-encoded JSON document; validates true",
"data": "eyJib28iOiAyMH0=",
"valid": true
},
{
"description": "an empty object as a base64-encoded JSON document; validates true",
"data": "e30=",
"valid": true
},
{
"description": "an empty array as a base64-encoded JSON document",
"data": "W10=",
"valid": true
},
{
"description": "a validly-encoded invalid JSON document; validates true",
"data": "ezp9Cg==",
"valid": true
},
{
"description": "an invalid base64 string that is valid JSON; validates false",
"data": "{}",
"valid": false
},
{
"description": "ignores non-strings",
"data": 100,
"valid": true
}
]
},
{
"description": "contentSchema without contentEncoding",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"contentMediaType": "application/json",
"contentSchema": { "required": ["foo"], "properties": { "foo": { "type": "string" } } }
},
"tests": [
{
"description": "a valid JSON document",
"data": "{\"foo\": \"bar\"}",
"valid": true
},
{
"description": "another valid base64-encoded JSON document",
"data": "{\"boo\": 20, \"foo\": \"baz\"}",
"valid": true
},
{
"description": "an empty object; validates false",
"data": "{}",
"valid": false
},
{
"description": "an empty array; validates false",
"data": "[]",
"valid": true
},
{
"description": "invalid JSON document; validates false",
"data": "[}",
"valid": false
},
{
"description": "ignores non-strings",
"data": 100,
"valid": true
}
]
}
]

View File

@ -0,0 +1,16 @@
[
{
"description": "validation of date strings",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"format": "date"
},
"tests": [
{
"description": "contains alphabets",
"data": "yyyy-mm-dd",
"valid": false
}
]
}
]

View File

@ -0,0 +1,16 @@
[
{
"description": "validation of duration strings",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"format": "duration"
},
"tests": [
{
"description": "more than one T",
"data": "PT1MT1S",
"valid": false
}
]
}
]

View File

@ -0,0 +1,31 @@
[
{
"description": "validation of duration strings",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"format": "email"
},
"tests": [
{
"description": "non printable character",
"data": "a\tb@gmail.com",
"valid": false
},
{
"description": "tab ok if quoted",
"data": "\"a\tb\"@gmail.com",
"valid": true
},
{
"description": "quote inside quoted",
"data": "\"a\"b\"@gmail.com",
"valid": false
},
{
"description": "backslash inside quoted",
"data": "\"a\\b\"@gmail.com",
"valid": false
}
]
}
]

View File

@ -0,0 +1,23 @@
[
{
"description": "validation of time strings",
"schema": { "format": "time" },
"tests": [
{
"description": "contains alphabets",
"data": "ab:cd:efZ",
"valid": false
},
{
"description": "no digit in second fraction",
"data": "23:20:50.Z",
"valid": false
},
{
"description": "alphabets in offset",
"data": "08:30:06+ab:cd",
"valid": false
}
]
}
]

View File

@ -0,0 +1,26 @@
[
{
"description": "special characters",
"schema": {
"properties": {
"a%20b/c": { "type": "number" }
}
},
"tests": [
{
"description": "valid",
"data": {
"a%20b/c": 1
},
"valid": true
},
{
"description": "invalid",
"data": {
"a%20b/c": "hello"
},
"valid": false
}
]
}
]

View File

@ -0,0 +1,74 @@
[
{
"description": "percent-encoded json-pointer",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {
"a b": {"type": "number"}
},
"$ref": "#/$defs/a%20b"
},
"tests": [
{
"description": "match",
"data": 1,
"valid": true
},
{
"description": "mismatch",
"data": "foobar",
"valid": false
}
]
},
{
"description": "precent in resource ptr",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {
"a%20b": {
"$id": "http://temp.com/ab",
"type": "number"
}
},
"$ref": "http://temp.com/ab"
},
"tests": [
{
"description": "match",
"data": 1,
"valid": true
},
{
"description": "mismatch",
"data": "foobar",
"valid": false
}
]
},
{
"description": "precent in anchor ptr",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {
"a%20b": {
"$anchor": "abcd",
"type": "number"
}
},
"$ref": "#abcd"
},
"tests": [
{
"description": "match",
"data": 1,
"valid": true
},
{
"description": "mismatch",
"data": "foobar",
"valid": false
}
]
}
]

View File

@ -0,0 +1,57 @@
[
{
"description": "unevaluatedProperties with a failing $ref",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {
"child": {
"type": "object",
"properties": {
"prop2": { "type": "string" }
},
"unevaluatedProperties": false
}
},
"type": "object",
"properties": {
"prop1": { "type": "string" },
"child_schema": { "$ref": "#/$defs/child" }
},
"unevaluatedProperties": false
},
"tests": [
{
"description": "unevaluated property in child should fail validation",
"data": {
"prop1": "value1",
"child_schema": {
"prop2": "value2",
"extra_prop_in_child": "this should fail"
}
},
"valid": false
},
{
"description": "a valid instance should pass",
"data": {
"prop1": "value1",
"child_schema": {
"prop2": "value2"
}
},
"valid": true
},
{
"description": "unevaluated property in parent should fail",
"data": {
"prop1": "value1",
"child_schema": {
"prop2": "value2"
},
"extra_prop_in_parent": "this should fail"
},
"valid": false
}
]
}
]

View File

@ -0,0 +1,21 @@
[
{
"description": "zero fraction",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"uniqueItems": true
},
"tests": [
{
"description": "with fraction",
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, 2.0],
"valid": false
},
{
"description": "without fraction",
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, 2],
"valid": false
}
]
}
]

View File

@ -0,0 +1,27 @@
[
{
"description": "percent in dependencies",
"schema": {
"dependencies": {
"a%20b": { "required": ["x"] }
}
},
"tests": [
{
"description": "valid",
"data": {
"a%20b": null,
"x": 1
},
"valid": true
},
{
"description": "invalid",
"data": {
"a%20b": null
},
"valid": false
}
]
}
]

View File

@ -0,0 +1,50 @@
[
{
"description": "skip then when if is false",
"schema": {
"if": false,
"then": {
"$ref": "blah/blah.json"
},
"else": {
"type": "number"
}
},
"tests": [
{
"description": "number is valid",
"data": 0,
"valid": true
},
{
"description": "string is invalid",
"data": "hello",
"valid": false
}
]
},
{
"description": "skip else when if is true",
"schema": {
"if": true,
"then": {
"type": "number"
},
"else": {
"$ref": "blah/blah.json"
}
},
"tests": [
{
"description": "number is valid",
"data": 0,
"valid": true
},
{
"description": "string is invalid",
"data": "hello",
"valid": false
}
]
}
]

View File

@ -0,0 +1,98 @@
[
{
"description": "validation of period",
"schema": { "format": "period" },
"tests": [
{
"description": "all string formats ignore integers",
"data": 12,
"valid": true
},
{
"description": "all string formats ignore floats",
"data": 13.7,
"valid": true
},
{
"description": "all string formats ignore objects",
"data": {},
"valid": true
},
{
"description": "all string formats ignore arrays",
"data": [],
"valid": true
},
{
"description": "all string formats ignore booleans",
"data": false,
"valid": true
},
{
"description": "all string formats ignore nulls",
"data": null,
"valid": true
},
{
"description": "both-explicit",
"data": "1963-06-19T08:30:06Z/1963-06-19T08:30:07Z",
"valid": true
},
{
"description": "start-explicit",
"data": "1963-06-19T08:30:06Z/P4DT12H30M5S",
"valid": true
},
{
"description": "end-explicit",
"data": "P4DT12H30M5S/1963-06-19T08:30:06Z",
"valid": true
},
{
"description": "none-explicit",
"data": "P4DT12H30M5S/P4DT12H30M5S",
"valid": false
},
{
"description": "just date",
"data": "1963-06-19T08:30:06Z",
"valid": false
},
{
"description": "just duration",
"data": "P4DT12H30M5S",
"valid": false
},
{
"description": "more than two",
"data": "1963-06-19T08:30:06Z/1963-06-19T08:30:07Z/1963-06-19T08:30:07Z",
"valid": false
},
{
"description": "separated by space",
"data": "1963-06-19T08:30:06Z 1963-06-19T08:30:07Z",
"valid": false
},
{
"description": "separated by hyphen",
"data": "1963-06-19T08:30:06Z-1963-06-19T08:30:07Z",
"valid": false
},
{
"description": "invalid components",
"data": "foo/bar",
"valid": false
},
{
"description": "emtpy components",
"data": "/",
"valid": false
},
{
"description": "empty string",
"data": "",
"valid": false
}
]
}
]

View File

@ -0,0 +1,87 @@
use std::error::Error;
use boon::{Compiler, Schemas};
use serde_json::json;
#[test]
fn test_metaschema_resource() -> Result<(), Box<dyn Error>> {
let main_schema = json!({
"$schema": "http://tmp.com/meta.json",
"type": "number"
});
let meta_schema = json!({
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/applicator": true,
"https://json-schema.org/draft/2020-12/vocab/core": true
},
"allOf": [
{ "$ref": "https://json-schema.org/draft/2020-12/meta/applicator" },
{ "$ref": "https://json-schema.org/draft/2020-12/meta/core" }
]
});
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.add_resource("schema.json", main_schema)?;
compiler.add_resource("http://tmp.com/meta.json", meta_schema)?;
compiler.compile("schema.json", &mut schemas)?;
Ok(())
}
#[test]
fn test_compile_anchor() -> Result<(), Box<dyn Error>> {
let schema = json!({
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {
"x": {
"$anchor": "a1",
"type": "number"
}
}
});
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.add_resource("schema.json", schema)?;
let sch_index1 = compiler.compile("schema.json#a1", &mut schemas)?;
let sch_index2 = compiler.compile("schema.json#/$defs/x", &mut schemas)?;
assert_eq!(sch_index1, sch_index2);
Ok(())
}
#[test]
fn test_compile_nonstd() -> Result<(), Box<dyn Error>> {
let schema = json!({
"components": {
"schemas": {
"foo" : {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {
"x": {
"$anchor": "a",
"type": "number"
},
"y": {
"$id": "http://temp.com/y",
"type": "string"
}
},
"oneOf": [
{ "$ref": "#a" },
{ "$ref": "http://temp.com/y" }
]
}
}
}
});
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.add_resource("schema.json", schema)?;
compiler.compile("schema.json#/components/schemas/foo", &mut schemas)?;
Ok(())
}

View File

@ -0,0 +1,33 @@
{
"remotes": {
"http://localhost:1234/draft2020-12/locationIndependentIdentifier.json": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {
"refToInteger": {
"$ref": "#foo"
},
"A": {
"$anchor": "foo",
"type": "integer"
}
}
}
},
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {
"a": {
"$ref": "http://localhost:1234/draft2020-12/locationIndependentIdentifier.json#foo"
},
"b": {
"$ref": "http://localhost:1234/draft2020-12/locationIndependentIdentifier.json#foo"
}
}
},
"data": {
"a": 1,
"b": "hello"
},
"valid": false
}

41
validator/tests/debug.rs Normal file
View File

@ -0,0 +1,41 @@
use std::{error::Error, fs::File};
use boon::{Compiler, Schemas, UrlLoader};
use serde_json::{Map, Value};
#[test]
fn test_debug() -> Result<(), Box<dyn Error>> {
let test: Value = serde_json::from_reader(File::open("tests/debug.json")?)?;
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.enable_format_assertions();
compiler.enable_content_assertions();
let remotes = Remotes(test["remotes"].as_object().unwrap().clone());
compiler.use_loader(Box::new(remotes));
let url = "http://debug.com/schema.json";
compiler.add_resource(url, test["schema"].clone())?;
let sch = compiler.compile(url, &mut schemas)?;
let result = schemas.validate(&test["data"], sch, None);
if let Err(e) = &result {
for line in format!("{e}").lines() {
println!(" {line}");
}
for line in format!("{e:#}").lines() {
println!(" {line}");
}
println!("{:#}", e.detailed_output());
}
assert_eq!(result.is_ok(), test["valid"].as_bool().unwrap());
Ok(())
}
struct Remotes(Map<String, Value>);
impl UrlLoader for Remotes {
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>> {
if let Some(v) = self.0.get(url) {
return Ok(v.clone());
}
Err("remote not found")?
}
}

230
validator/tests/examples.rs Normal file
View File

@ -0,0 +1,230 @@
use std::{error::Error, fs::File};
use boon::{Compiler, Decoder, FileLoader, Format, MediaType, Schemas, SchemeUrlLoader, UrlLoader};
use serde::de::IgnoredAny;
use serde_json::{json, Value};
use url::Url;
#[test]
fn example_from_files() -> Result<(), Box<dyn Error>> {
let schema_file = "tests/examples/schema.json";
let instance: Value = serde_json::from_reader(File::open("tests/examples/instance.json")?)?;
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
let sch_index = compiler.compile(schema_file, &mut schemas)?;
let result = schemas.validate(&instance, sch_index, None);
assert!(result.is_ok());
Ok(())
}
/**
This example shows how to load json schema from strings.
The schema url used plays important role in resolving
schema references.
You can see that `cat.json` is resolved internally to
another string schema where as dog.json is resolved
to local file.
*/
#[test]
fn example_from_strings() -> Result<(), Box<dyn Error>> {
let cat_schema: Value = json!({
"type": "object",
"properties": {
"speak": { "const": "meow" }
},
"required": ["speak"]
});
let pet_schema: Value = json!({
"oneOf": [
{ "$ref": "dog.json" },
{ "$ref": "cat.json" }
]
});
let instance: Value = json!({"speak": "bow"});
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.add_resource("tests/examples/pet.json", pet_schema)?;
compiler.add_resource("tests/examples/cat.json", cat_schema)?;
let sch_index = compiler.compile("tests/examples/pet.json", &mut schemas)?;
let result = schemas.validate(&instance, sch_index, None);
assert!(result.is_ok());
Ok(())
}
#[test]
#[ignore]
fn example_from_https() -> Result<(), Box<dyn Error>> {
let schema_url = "https://json-schema.org/learn/examples/geographical-location.schema.json";
let instance: Value = json!({"latitude": 48.858093, "longitude": 2.294694});
struct HttpUrlLoader;
impl UrlLoader for HttpUrlLoader {
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>> {
let reader = ureq::get(url).call()?.into_reader();
Ok(serde_json::from_reader(reader)?)
}
}
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
let mut loader = SchemeUrlLoader::new();
loader.register("file", Box::new(FileLoader));
loader.register("http", Box::new(HttpUrlLoader));
loader.register("https", Box::new(HttpUrlLoader));
compiler.use_loader(Box::new(loader));
let sch_index = compiler.compile(schema_url, &mut schemas)?;
let result = schemas.validate(&instance, sch_index, None);
assert!(result.is_ok());
Ok(())
}
#[test]
fn example_from_yaml_files() -> Result<(), Box<dyn Error>> {
let schema_file = "tests/examples/schema.yml";
let instance: Value = serde_yaml::from_reader(File::open("tests/examples/instance.yml")?)?;
struct FileUrlLoader;
impl UrlLoader for FileUrlLoader {
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>> {
let url = Url::parse(url)?;
let path = url.to_file_path().map_err(|_| "invalid file path")?;
let file = File::open(&path)?;
if path
.extension()
.filter(|&ext| ext == "yaml" || ext == "yml")
.is_some()
{
Ok(serde_yaml::from_reader(file)?)
} else {
Ok(serde_json::from_reader(file)?)
}
}
}
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
let mut loader = SchemeUrlLoader::new();
loader.register("file", Box::new(FileUrlLoader));
compiler.use_loader(Box::new(loader));
let sch_index = compiler.compile(schema_file, &mut schemas)?;
let result = schemas.validate(&instance, sch_index, None);
assert!(result.is_ok());
Ok(())
}
#[test]
fn example_custom_format() -> Result<(), Box<dyn Error>> {
let schema_url = "http://tmp/schema.json";
let schema: Value = json!({"type": "string", "format": "palindrome"});
let instance: Value = json!("step on no pets");
fn is_palindrome(v: &Value) -> Result<(), Box<dyn Error>> {
let Value::String(s) = v else {
return Ok(()); // applicable only on strings
};
let mut chars = s.chars();
while let (Some(c1), Some(c2)) = (chars.next(), chars.next_back()) {
if c1 != c2 {
Err("char mismatch")?;
}
}
Ok(())
}
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.enable_format_assertions(); // in draft2020-12 format assertions are not enabled by default
compiler.register_format(Format {
name: "palindrome",
func: is_palindrome,
});
compiler.add_resource(schema_url, schema)?;
let sch_index = compiler.compile(schema_url, &mut schemas)?;
let result = schemas.validate(&instance, sch_index, None);
assert!(result.is_ok());
Ok(())
}
#[test]
fn example_custom_content_encoding() -> Result<(), Box<dyn Error>> {
let schema_url = "http://tmp/schema.json";
let schema: Value = json!({"type": "string", "contentEncoding": "hex"});
let instance: Value = json!("aBcdxyz");
fn decode(b: u8) -> Result<u8, Box<dyn Error>> {
match b {
b'0'..=b'9' => Ok(b - b'0'),
b'a'..=b'f' => Ok(b - b'a' + 10),
b'A'..=b'F' => Ok(b - b'A' + 10),
_ => Err("decode_hex: non-hex char")?,
}
}
fn decode_hex(s: &str) -> Result<Vec<u8>, Box<dyn Error>> {
if s.len() % 2 != 0 {
Err("decode_hex: odd length")?;
}
let mut bytes = s.bytes();
let mut out = Vec::with_capacity(s.len() / 2);
for _ in 0..out.len() {
if let (Some(b1), Some(b2)) = (bytes.next(), bytes.next()) {
out.push(decode(b1)? << 4 | decode(b2)?);
} else {
Err("decode_hex: non-ascii char")?;
}
}
Ok(out)
}
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.enable_content_assertions(); // content assertions are not enabled by default
compiler.register_content_encoding(Decoder {
name: "hex",
func: decode_hex,
});
compiler.add_resource(schema_url, schema)?;
let sch_index = compiler.compile(schema_url, &mut schemas)?;
let result = schemas.validate(&instance, sch_index, None);
assert!(result.is_err());
Ok(())
}
#[test]
fn example_custom_content_media_type() -> Result<(), Box<dyn Error>> {
let schema_url = "http://tmp/schema.json";
let schema: Value = json!({"type": "string", "contentMediaType": "application/yaml"});
let instance: Value = json!("name:foobar");
fn check_yaml(bytes: &[u8], deserialize: bool) -> Result<Option<Value>, Box<dyn Error>> {
if deserialize {
return Ok(Some(serde_yaml::from_slice(bytes)?));
}
serde_yaml::from_slice::<IgnoredAny>(bytes)?;
Ok(None)
}
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.enable_content_assertions(); // content assertions are not enabled by default
compiler.register_content_media_type(MediaType {
name: "application/yaml",
json_compatible: true,
func: check_yaml,
});
compiler.add_resource(schema_url, schema)?;
let sch_index = compiler.compile(schema_url, &mut schemas)?;
let result = schemas.validate(&instance, sch_index, None);
assert!(result.is_ok());
Ok(())
}

View File

@ -0,0 +1,7 @@
{
"type": "object",
"properties": {
"speak": { "const": "bow" }
},
"required": ["speak"]
}

View File

@ -0,0 +1,4 @@
{
"firstName": "Santhosh Kumar",
"lastName": "Tekuri"
}

View File

@ -0,0 +1,2 @@
firstName: Santhosh Kumar
lastName: Tekuri

View File

@ -0,0 +1,12 @@
{
"type": "object",
"properties": {
"firstName": {
"type": "string"
},
"lastName": {
"type": "string"
}
},
"required": ["firstName", "lastName"]
}

View File

@ -0,0 +1,12 @@
{
"type": "object",
"properties": {
"firstName": {
"type": "string"
},
"lastName": {
"type": "string"
}
},
"required": ["firstName", "lastName"]
}

View File

@ -0,0 +1,9 @@
type: object
properties:
firstName:
type: string
lastName:
type: string
required:
- firstName
- lastName

View File

@ -0,0 +1,44 @@
use std::fs;
use boon::{CompileError, Compiler, Schemas};
fn test(path: &str) -> Result<(), CompileError> {
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.compile(path, &mut schemas)?;
Ok(())
}
#[test]
fn test_absolute() -> Result<(), CompileError> {
let path = fs::canonicalize("tests/examples/schema.json").unwrap();
test(path.to_string_lossy().as_ref())
}
#[test]
fn test_relative_slash() -> Result<(), CompileError> {
test("tests/examples/schema.json")
}
#[test]
#[cfg(windows)]
fn test_relative_backslash() -> Result<(), CompileError> {
test("tests\\examples\\schema.json")
}
#[test]
fn test_absolutei_space() -> Result<(), CompileError> {
let path = fs::canonicalize("tests/examples/sample schema.json").unwrap();
test(path.to_string_lossy().as_ref())
}
#[test]
fn test_relative_slash_space() -> Result<(), CompileError> {
test("tests/examples/sample schema.json")
}
#[test]
#[cfg(windows)]
fn test_relative_backslash_space() -> Result<(), CompileError> {
test("tests\\examples\\sample schema.json")
}

View File

@ -0,0 +1,244 @@
[
{
"description": "InvalidJsonPointer",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$ref": "#/a~0b~~cd"
},
"errors": [
"InvalidJsonPointer(\"http://fake.com/schema.json#/a~0b~~cd\")"
]
},
{
"description": "UnsupportedUrlScheme",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$ref": "ftp://mars.com/schema.json"
},
"errors": [
"UnsupportedUrlScheme { url: \"ftp://mars.com/schema.json\" }"
]
},
{
"description": "ValidationError",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"patternProperties": {
"^(abc]": {
"type": "string"
}
}
},
"errors": [
"ValidationError { url: \"http://fake.com/schema.json#\""
]
},
{
"description": "ValidationError-nonsubschema",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"dummy": {
"type": 1
},
"$ref": "#/dummy"
},
"errors": [
"ValidationError { url: \"http://fake.com/schema.json#/dummy\""
]
},
{
"description": "JsonPointerNotFound-obj",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$ref": "#/$defs/something"
},
"errors": [
"JsonPointerNotFound(\"http://fake.com/schema.json#/$defs/something\")"
]
},
{
"description": "JsonPointerNotFound-arr-pos",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$ref": "#/dummy/0",
"dummy": []
},
"errors": [
"JsonPointerNotFound(\"http://fake.com/schema.json#/dummy/0\")"
]
},
{
"description": "JsonPointerNotFound-arr-neg",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$ref": "#/dummy/-1",
"dummy": []
},
"errors": [
"JsonPointerNotFound(\"http://fake.com/schema.json#/dummy/-1\")"
]
},
{
"description": "JsonPointerNotFound-primitive",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$ref": "#/$schema/something"
},
"errors": [
"JsonPointerNotFound(\"http://fake.com/schema.json#/$schema/something\")"
]
},
{
"description": "InvalidRegex",
"schema": {
"$schema": "https://json-schema.org/draft-04/schema",
"patternProperties": {
"^(abc]": {
"type": "string"
}
}
},
"errors": [
"InvalidRegex { url: \"http://fake.com/schema.json#/patternProperties\", regex: \"^(abc]\", "
]
},
{
"description": "DuplicateId",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {
"a": {
"$id": "http://a.com/b",
"$defs": {
"b": {
"$id": "a.json"
},
"c": {
"$id": "a.json"
}
}
}
}
},
"errors": [
"DuplicateId { url: \"http://fake.com/schema.json\", id: \"http://a.com/a.json\", ",
"\"/$defs/a/$defs/b\"",
"\"/$defs/a/$defs/c\""
]
},
{
"description": "DuplicateAnchor",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {
"a": {
"$id": "http://a.com/b",
"$defs": {
"b": {
"$anchor": "a1"
},
"c": {
"$anchor": "a1"
}
}
}
}
},
"errors": [
"DuplicateAnchor { anchor: \"a1\", url: \"http://fake.com/schema.json\", ",
"\"/$defs/a/$defs/b\"",
"\"/$defs/a/$defs/c\""
]
},
{
"description": "UnsupportedDraft",
"remotes": {
"http://remotes/a.json": {
"$schema": "http://remotes/b.json"
},
"http://remotes/b.json": {
"$schema": "http://remotes/b.json"
}
},
"schema": {
"$schema": "http://remotes/a.json"
},
"errors": [
"UnsupportedDraft { url: \"http://remotes/b.json\" }"
]
},
{
"description": "MetaSchemaCycle",
"remotes": {
"http://remotes/a.json": {
"$schema": "http://remotes/b.json"
},
"http://remotes/b.json": {
"$schema": "http://remotes/a.json"
}
},
"schema": {
"$schema": "http://remotes/a.json"
},
"errors": [
"MetaSchemaCycle { url: \"http://remotes/a.json\" }"
]
},
{
"description": "AnchorNotFound-local",
"schema": {
"$ref": "sample.json#abcd",
"$defs": {
"a": {
"$id": "sample.json"
}
}
},
"errors": [
"AnchorNotFound { url: \"http://fake.com/schema.json\", reference: \"http://fake.com/sample.json#abcd\" }"
]
},
{
"description": "AnchorNotFound-remote",
"remotes": {
"http://remotes/a.json": {}
},
"schema": {
"$ref": "http://remotes/a.json#abcd"
},
"errors": [
"AnchorNotFound { url: \"http://remotes/a.json\", reference: \"http://remotes/a.json#abcd\" }"
]
},
{
"description": "UnsupportedVocabulary-required",
"remotes": {
"http://remotes/a.json": {
"$schema": "https://json-schema.org/draft/2020-12/schema#",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/format": true
}
}
},
"schema": {
"$schema": "http://remotes/a.json"
},
"errors": [
"UnsupportedVocabulary { url: \"http://remotes/a.json\", vocabulary: \"https://json-schema.org/draft/2019-09/vocab/format\" }"
]
},
{
"description": "UnsupportedVocabulary-optioanl",
"remotes": {
"http://remotes/a.json": {
"$schema": "https://json-schema.org/draft/2020-12/schema#",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/format": false
}
}
},
"schema": {
"$schema": "http://remotes/a.json"
}
}
]

View File

@ -0,0 +1,67 @@
use std::{collections::HashMap, error::Error, fs::File};
use boon::{CompileError, Compiler, Schemas, UrlLoader};
use serde::Deserialize;
use serde_json::Value;
#[derive(Debug, Deserialize)]
struct Test {
description: String,
remotes: Option<HashMap<String, Value>>,
schema: Value,
errors: Option<Vec<String>>,
}
#[test]
fn test_invalid_schemas() -> Result<(), Box<dyn Error>> {
let file = File::open("tests/invalid-schemas.json")?;
let tests: Vec<Test> = serde_json::from_reader(file)?;
for test in tests {
println!("{}", test.description);
match compile(&test) {
Ok(_) => {
if test.errors.is_some() {
Err("want compilation to fail")?
}
}
Err(e) => {
println!(" {e}");
let error = format!("{e:?}");
let Some(errors) = &test.errors else {
Err("want compilation to succeed")?
};
for want in errors {
if !error.contains(want) {
println!(" got {error}");
println!(" want {want}");
panic!("error mismatch");
}
}
}
}
}
Ok(())
}
fn compile(test: &Test) -> Result<(), CompileError> {
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
let url = "http://fake.com/schema.json";
if let Some(remotes) = &test.remotes {
compiler.use_loader(Box::new(Remotes(remotes.clone())));
}
compiler.add_resource(url, test.schema.clone())?;
compiler.compile(url, &mut schemas)?;
Ok(())
}
struct Remotes(HashMap<String, Value>);
impl UrlLoader for Remotes {
fn load(&self, url: &str) -> Result<Value, Box<dyn Error>> {
if let Some(v) = self.0.get(url) {
return Ok(v.clone());
}
Err("remote not found")?
}
}

122
validator/tests/output.rs Normal file
View File

@ -0,0 +1,122 @@
use std::{env, error::Error, fs::File, path::Path};
use boon::{Compiler, Draft, Schemas};
use serde::{Deserialize, Serialize};
use serde_json::Value;
#[test]
fn test_suites() -> Result<(), Box<dyn Error>> {
if let Ok(suite) = env::var("TEST_SUITE") {
test_suite(&suite)?;
} else {
test_suite("tests/JSON-Schema-Test-Suite")?;
test_suite("tests/Extra-Suite")?;
}
Ok(())
}
fn test_suite(suite: &str) -> Result<(), Box<dyn Error>> {
test_folder(suite, "draft2019-09", Draft::V2019_09)?;
test_folder(suite, "draft2020-12", Draft::V2020_12)?;
Ok(())
}
fn test_folder(suite: &str, folder: &str, draft: Draft) -> Result<(), Box<dyn Error>> {
let output_schema_url = format!(
"https://json-schema.org/draft/{}/output/schema",
folder.strip_prefix("draft").unwrap()
);
let prefix = Path::new(suite).join("output-tests");
let folder = prefix.join(folder);
let content = folder.join("content");
if !content.is_dir() {
return Ok(());
}
let output_schema: Value =
serde_json::from_reader(File::open(folder.join("output-schema.json"))?)?;
for entry in content.read_dir()? {
let entry = entry?;
if !entry.file_type()?.is_file() {
continue;
};
let entry_path = entry.path();
println!("{}", entry_path.strip_prefix(&prefix)?.to_str().unwrap());
let groups: Vec<Group> = serde_json::from_reader(File::open(entry_path)?)?;
for group in groups {
println!(" {}", group.description);
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.set_default_draft(draft);
let schema_url = "http://output-tests/schema";
compiler.add_resource(schema_url, group.schema)?;
let sch = compiler.compile(schema_url, &mut schemas)?;
for test in group.tests {
println!(" {}", test.description);
match schemas.validate(&test.data, sch, None) {
Ok(_) => println!(" validation success"),
Err(e) => {
if let Some(sch) = test.output.basic {
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.set_default_draft(draft);
compiler.add_resource(&output_schema_url, output_schema.clone())?;
let schema_url = "http://output-tests/schema";
compiler.add_resource(schema_url, sch)?;
let sch = compiler.compile(schema_url, &mut schemas)?;
let basic: Value = serde_json::from_str(&e.basic_output().to_string())?;
let result = schemas.validate(&basic, sch, None);
if let Err(e) = result {
println!("{basic:#}\n");
for line in format!("{e}").lines() {
println!(" {line}");
}
panic!("basic output did not match");
}
}
if let Some(sch) = test.output.detailed {
let mut schemas = Schemas::new();
let mut compiler = Compiler::new();
compiler.set_default_draft(draft);
compiler.add_resource(&output_schema_url, output_schema.clone())?;
let schema_url = "http://output-tests/schema";
compiler.add_resource(schema_url, sch)?;
let sch = compiler.compile(schema_url, &mut schemas)?;
let detailed: Value =
serde_json::from_str(&e.detailed_output().to_string())?;
let result = schemas.validate(&detailed, sch, None);
if let Err(e) = result {
println!("{detailed:#}\n");
for line in format!("{e}").lines() {
println!(" {line}");
}
panic!("detailed output did not match");
}
}
}
}
}
}
}
Ok(())
}
#[derive(Debug, Serialize, Deserialize)]
struct Group {
description: String,
schema: Value,
tests: Vec<Test>,
}
#[derive(Debug, Serialize, Deserialize)]
struct Test {
description: String,
data: Value,
output: Output,
}
#[derive(Debug, Serialize, Deserialize)]
struct Output {
basic: Option<Value>,
detailed: Option<Value>,
}

120
validator/tests/suite.rs Normal file
View File

@ -0,0 +1,120 @@
use std::{env, error::Error, ffi::OsStr, fs::File, path::Path};
use boon::{Compiler, Draft, Schemas, UrlLoader};
use serde::{Deserialize, Serialize};
use serde_json::Value;
static SKIP: [&str; 2] = [
"zeroTerminatedFloats.json", // only draft4: this behavior is changed in later drafts
"float-overflow.json",
];
#[derive(Debug, Serialize, Deserialize)]
struct Group {
description: String,
schema: Value,
tests: Vec<Test>,
}
#[derive(Debug, Serialize, Deserialize)]
struct Test {
description: String,
data: Value,
valid: bool,
}
#[test]
fn test_suites() -> Result<(), Box<dyn Error>> {
if let Ok(suite) = env::var("TEST_SUITE") {
test_suite(&suite)?;
} else {
test_suite("tests/JSON-Schema-Test-Suite")?;
test_suite("tests/Extra-Test-Suite")?;
}
Ok(())
}
fn test_suite(suite: &str) -> Result<(), Box<dyn Error>> {
if !Path::new(suite).exists() {
Err(format!("test suite {suite} does not exist"))?;
}
test_dir(suite, "draft4", Draft::V4)?;
test_dir(suite, "draft6", Draft::V6)?;
test_dir(suite, "draft7", Draft::V7)?;
test_dir(suite, "draft2019-09", Draft::V2019_09)?;
test_dir(suite, "draft2020-12", Draft::V2020_12)?;
Ok(())
}
fn test_dir(suite: &str, path: &str, draft: Draft) -> Result<(), Box<dyn Error>> {
let prefix = Path::new(suite).join("tests");
let dir = prefix.join(path);
if !dir.is_dir() {
return Ok(());
}
for entry in dir.read_dir()? {
let entry = entry?;
let file_type = entry.file_type()?;
let tmp_entry_path = entry.path();
let entry_path = tmp_entry_path.strip_prefix(&prefix)?.to_str().unwrap();
if file_type.is_file() {
if !SKIP.iter().any(|n| OsStr::new(n) == entry.file_name()) {
test_file(suite, entry_path, draft)?;
}
} else if file_type.is_dir() {
test_dir(suite, entry_path, draft)?;
}
}
Ok(())
}
fn test_file(suite: &str, path: &str, draft: Draft) -> Result<(), Box<dyn Error>> {
println!("FILE: {path}");
let path = Path::new(suite).join("tests").join(path);
let optional = path.components().any(|comp| comp.as_os_str() == "optional");
let file = File::open(path)?;
let url = "http://testsuite.com/schema.json";
let groups: Vec<Group> = serde_json::from_reader(file)?;
for group in groups {
println!("{}", group.description);
let mut schemas = Schemas::default();
let mut compiler = Compiler::default();
compiler.set_default_draft(draft);
if optional {
compiler.enable_format_assertions();
compiler.enable_content_assertions();
}
compiler.use_loader(Box::new(RemotesLoader(suite.to_owned())));
compiler.add_resource(url, group.schema)?;
let sch_index = compiler.compile(url, &mut schemas)?;
for test in group.tests {
println!(" {}", test.description);
let result = schemas.validate(&test.data, sch_index, None);
if let Err(e) = &result {
for line in format!("{e}").lines() {
println!(" {line}");
}
for line in format!("{e:#}").lines() {
println!(" {line}");
}
}
assert_eq!(result.is_ok(), test.valid);
}
}
Ok(())
}
struct RemotesLoader(String);
impl UrlLoader for RemotesLoader {
fn load(&self, url: &str) -> Result<Value, Box<dyn std::error::Error>> {
// remotes folder --
if let Some(path) = url.strip_prefix("http://localhost:1234/") {
let path = Path::new(&self.0).join("remotes").join(path);
let file = File::open(path)?;
let json: Value = serde_json::from_reader(file)?;
return Ok(json);
}
Err("no internet")?
}
}

View File

@ -1 +1 @@
1.0.10
1.0.40