fixing ordering checkpoint
This commit is contained in:
128
scripts/format_fixtures.py
Normal file
128
scripts/format_fixtures.py
Normal file
@ -0,0 +1,128 @@
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
def format_sql(sql_str):
|
||||
"""
|
||||
Given a single-line SQL string from the test runner,
|
||||
formats it with beautiful indentation according to the
|
||||
rules seen in the jspg project fixtures.
|
||||
"""
|
||||
|
||||
# 1. First, let's normalize spaces around operators to make splitting easier.
|
||||
# We'll use a simple regex tokenizer.
|
||||
# The actual SQL doesn't have spaces around =, >, <, etc.
|
||||
sql_str = re.sub(r'([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)=([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)', r'\1.\2 = \3.\4', sql_str)
|
||||
sql_str = re.sub(r"([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)='([a-zA-Z0-9_]+)'", r"\1.\2 = '\3'", sql_str)
|
||||
sql_str = re.sub(r"([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)>([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)", r"\1.\2 > \3.\4", sql_str)
|
||||
sql_str = sql_str.replace("AND ", " AND ")
|
||||
sql_str = sql_str.replace("WHERE NOT", "WHERE NOT")
|
||||
|
||||
# We'll just run a basic custom state-machine formatter
|
||||
# Let's clean up tokens to preserve spaces.
|
||||
|
||||
# We will build the string by adding newlines and indentation where appropriate.
|
||||
out = []
|
||||
indent = 0
|
||||
i = 0
|
||||
|
||||
# A quick helper to match and consume
|
||||
def match(prefix):
|
||||
if sql_str[i:].startswith(prefix):
|
||||
return True
|
||||
return False
|
||||
|
||||
in_build_object = []
|
||||
|
||||
# Let's just use a simpler replacement strategy for line breaks,
|
||||
# then iterate over lines to fix indentation.
|
||||
|
||||
# Pre-process for line breaks:
|
||||
s = sql_str
|
||||
|
||||
# Break before certain keywords
|
||||
s = s.replace("(SELECT COALESCE", "\n(SELECT COALESCE")
|
||||
s = s.replace("FROM ", "\nFROM ")
|
||||
s = s.replace("JOIN ", "\nJOIN ")
|
||||
s = s.replace("WHERE ", "\nWHERE\n ")
|
||||
s = s.replace(" AND ", "\n AND ")
|
||||
|
||||
# Break before keys in jsonb_build_object, but only if they are followed by a subquery
|
||||
# We'll do this by matching: ,'key_name',(SELECT
|
||||
s = re.sub(r",('([^']+)')\s*,\s*\(SELECT", r",\n\1,\n(SELECT", s)
|
||||
|
||||
# Also break scalar keys in jsonb_build_object
|
||||
s = re.sub(r",('([^']+)')\s*,", r",\n\1, ", s)
|
||||
s = s.replace("jsonb_build_object('", "jsonb_build_object(\n'")
|
||||
|
||||
# CASE statements
|
||||
s = s.replace("CASE WHEN", "CASE\nWHEN")
|
||||
s = s.replace("THEN(", "THEN\n(")
|
||||
s = s.replace("ELSE NULL END", "\nELSE NULL END")
|
||||
s = s.replace(" WHEN ", "\nWHEN ")
|
||||
|
||||
lines = [l.strip() for l in s.split('\n') if l.strip()]
|
||||
|
||||
# Now we do a pass to compute indentations based on parenthesis matching and keywords.
|
||||
formatted_lines = []
|
||||
current_indent = 0
|
||||
|
||||
for idx, line in enumerate(lines):
|
||||
# Calculate indent delta before
|
||||
close_paren_count = 0
|
||||
while line.startswith(')'):
|
||||
close_paren_count += 1
|
||||
line = line[1:]
|
||||
|
||||
if close_paren_count > 0:
|
||||
current_indent = max(0, current_indent - 2 * close_paren_count)
|
||||
# Prepend the closed parens to the line properly if there's text left,
|
||||
# or just emit them if it's just parens.
|
||||
if line:
|
||||
pass # We handle adding them back later
|
||||
else:
|
||||
formatted_lines.append(" " * current_indent + ")" * close_paren_count)
|
||||
continue
|
||||
|
||||
# Handle specific keywords
|
||||
if line.startswith("FROM ") or line.startswith("JOIN ") or line.startswith("WHERE"):
|
||||
pass # Keep parent indent
|
||||
elif line.startswith("AND "):
|
||||
line = " " + line
|
||||
elif line.startswith("WHEN "):
|
||||
line = " " + line
|
||||
elif line.startswith("ELSE "):
|
||||
line = " " + line
|
||||
|
||||
# If it's a key value pair in build_object, we indent
|
||||
if line.startswith("'") and "jsonb_build_object" not in line:
|
||||
# We add 2 extra spaces for the items inside build_object
|
||||
line = " " + line
|
||||
|
||||
if line.startswith("(SELECT jsonb_build_object"):
|
||||
line = " " + line
|
||||
|
||||
formatted_line = (" " * current_indent) + (")" * close_paren_count) + line
|
||||
|
||||
# Calculate indent delta after
|
||||
open_paren_count = line.count('(') - line.count(')')
|
||||
current_indent += max(0, open_paren_count * 2)
|
||||
|
||||
formatted_lines.append(formatted_line)
|
||||
|
||||
return formatted_lines
|
||||
|
||||
def format_sql_regex(sql_str):
|
||||
# The actual jspg parser output might be tricky, let's use a simpler heuristic formatting
|
||||
# based exactly on the user's provided output format.
|
||||
# It requires custom tokenizing because of nested SELECTs.
|
||||
|
||||
# Let's try to tokenise
|
||||
tokens = re.split(r"(\(SELECT COALESCE|\(SELECT jsonb_build_object|FROM|JOIN|WHERE|AND|CASE|WHEN|THEN|ELSE NULL END|\n|,\s*')", sql_str)
|
||||
|
||||
pass
|
||||
|
||||
# We will actually just run `cargo test -- --nocapture` to grab the actual SQLs
|
||||
# and do some string replacements.
|
||||
# Given the complexity, let's build a dedicated node-based formatter in python.
|
||||
@ -1,6 +1,6 @@
|
||||
use crate::database::Database;
|
||||
use std::sync::Arc;
|
||||
use indexmap::IndexMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub struct Compiler<'a> {
|
||||
pub db: &'a Database,
|
||||
@ -378,10 +378,7 @@ impl<'a> Compiler<'a> {
|
||||
return Ok(("NULL".to_string(), "string".to_string()));
|
||||
}
|
||||
|
||||
case_statements.sort();
|
||||
|
||||
let sql = format!("CASE {} ELSE NULL END", case_statements.join(" "));
|
||||
|
||||
Ok((sql, "object".to_string()))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user