import json import re import subprocess import os def format_sql(sql_str): """ Given a single-line SQL string from the test runner, formats it with beautiful indentation according to the rules seen in the jspg project fixtures. """ # 1. First, let's normalize spaces around operators to make splitting easier. # We'll use a simple regex tokenizer. # The actual SQL doesn't have spaces around =, >, <, etc. sql_str = re.sub(r'([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)=([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)', r'\1.\2 = \3.\4', sql_str) sql_str = re.sub(r"([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)='([a-zA-Z0-9_]+)'", r"\1.\2 = '\3'", sql_str) sql_str = re.sub(r"([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)>([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)", r"\1.\2 > \3.\4", sql_str) sql_str = sql_str.replace("AND ", " AND ") sql_str = sql_str.replace("WHERE NOT", "WHERE NOT") # We'll just run a basic custom state-machine formatter # Let's clean up tokens to preserve spaces. # We will build the string by adding newlines and indentation where appropriate. out = [] indent = 0 i = 0 # A quick helper to match and consume def match(prefix): if sql_str[i:].startswith(prefix): return True return False in_build_object = [] # Let's just use a simpler replacement strategy for line breaks, # then iterate over lines to fix indentation. # Pre-process for line breaks: s = sql_str # Break before certain keywords s = s.replace("(SELECT COALESCE", "\n(SELECT COALESCE") s = s.replace("FROM ", "\nFROM ") s = s.replace("JOIN ", "\nJOIN ") s = s.replace("WHERE ", "\nWHERE\n ") s = s.replace(" AND ", "\n AND ") # Break before keys in jsonb_build_object, but only if they are followed by a subquery # We'll do this by matching: ,'key_name',(SELECT s = re.sub(r",('([^']+)')\s*,\s*\(SELECT", r",\n\1,\n(SELECT", s) # Also break scalar keys in jsonb_build_object s = re.sub(r",('([^']+)')\s*,", r",\n\1, ", s) s = s.replace("jsonb_build_object('", "jsonb_build_object(\n'") # CASE statements s = s.replace("CASE WHEN", "CASE\nWHEN") s = s.replace("THEN(", "THEN\n(") s = s.replace("ELSE NULL END", "\nELSE NULL END") s = s.replace(" WHEN ", "\nWHEN ") lines = [l.strip() for l in s.split('\n') if l.strip()] # Now we do a pass to compute indentations based on parenthesis matching and keywords. formatted_lines = [] current_indent = 0 for idx, line in enumerate(lines): # Calculate indent delta before close_paren_count = 0 while line.startswith(')'): close_paren_count += 1 line = line[1:] if close_paren_count > 0: current_indent = max(0, current_indent - 2 * close_paren_count) # Prepend the closed parens to the line properly if there's text left, # or just emit them if it's just parens. if line: pass # We handle adding them back later else: formatted_lines.append(" " * current_indent + ")" * close_paren_count) continue # Handle specific keywords if line.startswith("FROM ") or line.startswith("JOIN ") or line.startswith("WHERE"): pass # Keep parent indent elif line.startswith("AND "): line = " " + line elif line.startswith("WHEN "): line = " " + line elif line.startswith("ELSE "): line = " " + line # If it's a key value pair in build_object, we indent if line.startswith("'") and "jsonb_build_object" not in line: # We add 2 extra spaces for the items inside build_object line = " " + line if line.startswith("(SELECT jsonb_build_object"): line = " " + line formatted_line = (" " * current_indent) + (")" * close_paren_count) + line # Calculate indent delta after open_paren_count = line.count('(') - line.count(')') current_indent += max(0, open_paren_count * 2) formatted_lines.append(formatted_line) return formatted_lines def format_sql_regex(sql_str): # The actual jspg parser output might be tricky, let's use a simpler heuristic formatting # based exactly on the user's provided output format. # It requires custom tokenizing because of nested SELECTs. # Let's try to tokenise tokens = re.split(r"(\(SELECT COALESCE|\(SELECT jsonb_build_object|FROM|JOIN|WHERE|AND|CASE|WHEN|THEN|ELSE NULL END|\n|,\s*')", sql_str) pass # We will actually just run `cargo test -- --nocapture` to grab the actual SQLs # and do some string replacements. # Given the complexity, let's build a dedicated node-based formatter in python.