From ce9c9baac98677fbd0ba21168bd799528b50ca0c Mon Sep 17 00:00:00 2001 From: Alex Groleau Date: Thu, 14 May 2026 03:26:03 -0400 Subject: [PATCH] fixing ordering checkpoint --- scripts/format_fixtures.py | 128 +++++++++++++++++++++++++++++++++++++ src/queryer/compiler.rs | 5 +- 2 files changed, 129 insertions(+), 4 deletions(-) create mode 100644 scripts/format_fixtures.py diff --git a/scripts/format_fixtures.py b/scripts/format_fixtures.py new file mode 100644 index 0000000..5fcfeca --- /dev/null +++ b/scripts/format_fixtures.py @@ -0,0 +1,128 @@ +import json +import re +import subprocess +import os + +def format_sql(sql_str): + """ + Given a single-line SQL string from the test runner, + formats it with beautiful indentation according to the + rules seen in the jspg project fixtures. + """ + + # 1. First, let's normalize spaces around operators to make splitting easier. + # We'll use a simple regex tokenizer. + # The actual SQL doesn't have spaces around =, >, <, etc. + sql_str = re.sub(r'([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)=([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)', r'\1.\2 = \3.\4', sql_str) + sql_str = re.sub(r"([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)='([a-zA-Z0-9_]+)'", r"\1.\2 = '\3'", sql_str) + sql_str = re.sub(r"([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)>([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)", r"\1.\2 > \3.\4", sql_str) + sql_str = sql_str.replace("AND ", " AND ") + sql_str = sql_str.replace("WHERE NOT", "WHERE NOT") + + # We'll just run a basic custom state-machine formatter + # Let's clean up tokens to preserve spaces. + + # We will build the string by adding newlines and indentation where appropriate. + out = [] + indent = 0 + i = 0 + + # A quick helper to match and consume + def match(prefix): + if sql_str[i:].startswith(prefix): + return True + return False + + in_build_object = [] + + # Let's just use a simpler replacement strategy for line breaks, + # then iterate over lines to fix indentation. + + # Pre-process for line breaks: + s = sql_str + + # Break before certain keywords + s = s.replace("(SELECT COALESCE", "\n(SELECT COALESCE") + s = s.replace("FROM ", "\nFROM ") + s = s.replace("JOIN ", "\nJOIN ") + s = s.replace("WHERE ", "\nWHERE\n ") + s = s.replace(" AND ", "\n AND ") + + # Break before keys in jsonb_build_object, but only if they are followed by a subquery + # We'll do this by matching: ,'key_name',(SELECT + s = re.sub(r",('([^']+)')\s*,\s*\(SELECT", r",\n\1,\n(SELECT", s) + + # Also break scalar keys in jsonb_build_object + s = re.sub(r",('([^']+)')\s*,", r",\n\1, ", s) + s = s.replace("jsonb_build_object('", "jsonb_build_object(\n'") + + # CASE statements + s = s.replace("CASE WHEN", "CASE\nWHEN") + s = s.replace("THEN(", "THEN\n(") + s = s.replace("ELSE NULL END", "\nELSE NULL END") + s = s.replace(" WHEN ", "\nWHEN ") + + lines = [l.strip() for l in s.split('\n') if l.strip()] + + # Now we do a pass to compute indentations based on parenthesis matching and keywords. + formatted_lines = [] + current_indent = 0 + + for idx, line in enumerate(lines): + # Calculate indent delta before + close_paren_count = 0 + while line.startswith(')'): + close_paren_count += 1 + line = line[1:] + + if close_paren_count > 0: + current_indent = max(0, current_indent - 2 * close_paren_count) + # Prepend the closed parens to the line properly if there's text left, + # or just emit them if it's just parens. + if line: + pass # We handle adding them back later + else: + formatted_lines.append(" " * current_indent + ")" * close_paren_count) + continue + + # Handle specific keywords + if line.startswith("FROM ") or line.startswith("JOIN ") or line.startswith("WHERE"): + pass # Keep parent indent + elif line.startswith("AND "): + line = " " + line + elif line.startswith("WHEN "): + line = " " + line + elif line.startswith("ELSE "): + line = " " + line + + # If it's a key value pair in build_object, we indent + if line.startswith("'") and "jsonb_build_object" not in line: + # We add 2 extra spaces for the items inside build_object + line = " " + line + + if line.startswith("(SELECT jsonb_build_object"): + line = " " + line + + formatted_line = (" " * current_indent) + (")" * close_paren_count) + line + + # Calculate indent delta after + open_paren_count = line.count('(') - line.count(')') + current_indent += max(0, open_paren_count * 2) + + formatted_lines.append(formatted_line) + + return formatted_lines + +def format_sql_regex(sql_str): + # The actual jspg parser output might be tricky, let's use a simpler heuristic formatting + # based exactly on the user's provided output format. + # It requires custom tokenizing because of nested SELECTs. + + # Let's try to tokenise + tokens = re.split(r"(\(SELECT COALESCE|\(SELECT jsonb_build_object|FROM|JOIN|WHERE|AND|CASE|WHEN|THEN|ELSE NULL END|\n|,\s*')", sql_str) + + pass + +# We will actually just run `cargo test -- --nocapture` to grab the actual SQLs +# and do some string replacements. +# Given the complexity, let's build a dedicated node-based formatter in python. diff --git a/src/queryer/compiler.rs b/src/queryer/compiler.rs index 17eec70..bd18a75 100644 --- a/src/queryer/compiler.rs +++ b/src/queryer/compiler.rs @@ -1,6 +1,6 @@ use crate::database::Database; -use std::sync::Arc; use indexmap::IndexMap; +use std::sync::Arc; pub struct Compiler<'a> { pub db: &'a Database, @@ -378,10 +378,7 @@ impl<'a> Compiler<'a> { return Ok(("NULL".to_string(), "string".to_string())); } - case_statements.sort(); - let sql = format!("CASE {} ELSE NULL END", case_statements.join(" ")); - Ok((sql, "object".to_string())) }