fixing ordering checkpoint

2026-05-14 03:26:03 -04:00
parent 3034406706
commit ce9c9baac9
2 changed files with 129 additions and 4 deletions
--- a/scripts/format_fixtures.py
+++ b/scripts/format_fixtures.py
@ -0,0 +1,128 @@
+import json
+import re
+import subprocess
+import os
+
+def format_sql(sql_str):
+    """
+    Given a single-line SQL string from the test runner,
+    formats it with beautiful indentation according to the 
+    rules seen in the jspg project fixtures.
+    """
+    
+    # 1. First, let's normalize spaces around operators to make splitting easier.
+    # We'll use a simple regex tokenizer.
+    # The actual SQL doesn't have spaces around =, >, <, etc.
+    sql_str = re.sub(r'([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)=([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)', r'\1.\2 = \3.\4', sql_str)
+    sql_str = re.sub(r"([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)='([a-zA-Z0-9_]+)'", r"\1.\2 = '\3'", sql_str)
+    sql_str = re.sub(r"([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)>([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)", r"\1.\2 > \3.\4", sql_str)
+    sql_str = sql_str.replace("AND ", " AND ")
+    sql_str = sql_str.replace("WHERE NOT", "WHERE NOT")
+    
+    # We'll just run a basic custom state-machine formatter
+    # Let's clean up tokens to preserve spaces.
+    
+    # We will build the string by adding newlines and indentation where appropriate.
+    out = []
+    indent = 0
+    i = 0
+    
+    # A quick helper to match and consume
+    def match(prefix):
+        if sql_str[i:].startswith(prefix):
+            return True
+        return False
+
+    in_build_object = []
+    
+    # Let's just use a simpler replacement strategy for line breaks, 
+    # then iterate over lines to fix indentation.
+    
+    # Pre-process for line breaks:
+    s = sql_str
+    
+    # Break before certain keywords
+    s = s.replace("(SELECT COALESCE", "\n(SELECT COALESCE")
+    s = s.replace("FROM ", "\nFROM ")
+    s = s.replace("JOIN ", "\nJOIN ")
+    s = s.replace("WHERE ", "\nWHERE\n  ")
+    s = s.replace(" AND ", "\n  AND ")
+    
+    # Break before keys in jsonb_build_object, but only if they are followed by a subquery
+    # We'll do this by matching: ,'key_name',(SELECT
+    s = re.sub(r",('([^']+)')\s*,\s*\(SELECT", r",\n\1,\n(SELECT", s)
+    
+    # Also break scalar keys in jsonb_build_object
+    s = re.sub(r",('([^']+)')\s*,", r",\n\1, ", s)
+    s = s.replace("jsonb_build_object('", "jsonb_build_object(\n'")
+    
+    # CASE statements
+    s = s.replace("CASE WHEN", "CASE\nWHEN")
+    s = s.replace("THEN(", "THEN\n(")
+    s = s.replace("ELSE NULL END", "\nELSE NULL END")
+    s = s.replace(" WHEN ", "\nWHEN ")
+    
+    lines = [l.strip() for l in s.split('\n') if l.strip()]
+    
+    # Now we do a pass to compute indentations based on parenthesis matching and keywords.
+    formatted_lines = []
+    current_indent = 0
+    
+    for idx, line in enumerate(lines):
+        # Calculate indent delta before
+        close_paren_count = 0
+        while line.startswith(')'):
+            close_paren_count += 1
+            line = line[1:]
+        
+        if close_paren_count > 0:
+            current_indent = max(0, current_indent - 2 * close_paren_count)
+            # Prepend the closed parens to the line properly if there's text left,
+            # or just emit them if it's just parens.
+            if line:
+                pass # We handle adding them back later
+            else:
+                formatted_lines.append(" " * current_indent + ")" * close_paren_count)
+                continue
+                
+        # Handle specific keywords
+        if line.startswith("FROM ") or line.startswith("JOIN ") or line.startswith("WHERE"):
+            pass # Keep parent indent
+        elif line.startswith("AND "):
+            line = "  " + line
+        elif line.startswith("WHEN "):
+            line = "  " + line
+        elif line.startswith("ELSE "):
+            line = "  " + line
+        
+        # If it's a key value pair in build_object, we indent
+        if line.startswith("'") and "jsonb_build_object" not in line:
+            # We add 2 extra spaces for the items inside build_object
+            line = "  " + line
+            
+        if line.startswith("(SELECT jsonb_build_object"):
+            line = "  " + line
+            
+        formatted_line = (" " * current_indent) + (")" * close_paren_count) + line
+        
+        # Calculate indent delta after
+        open_paren_count = line.count('(') - line.count(')')
+        current_indent += max(0, open_paren_count * 2)
+        
+        formatted_lines.append(formatted_line)
+        
+    return formatted_lines
+
+def format_sql_regex(sql_str):
+    # The actual jspg parser output might be tricky, let's use a simpler heuristic formatting
+    # based exactly on the user's provided output format.
+    # It requires custom tokenizing because of nested SELECTs.
+    
+    # Let's try to tokenise
+    tokens = re.split(r"(\(SELECT COALESCE|\(SELECT jsonb_build_object|FROM|JOIN|WHERE|AND|CASE|WHEN|THEN|ELSE NULL END|\n|,\s*')", sql_str)
+    
+    pass
+
+# We will actually just run `cargo test -- --nocapture` to grab the actual SQLs
+# and do some string replacements.
+# Given the complexity, let's build a dedicated node-based formatter in python.
--- a/src/queryer/compiler.rs
+++ b/src/queryer/compiler.rs
@ -1,6 +1,6 @@
 use crate::database::Database;
-use std::sync::Arc;
 use indexmap::IndexMap;
+use std::sync::Arc;

 pub struct Compiler<'a> {
  pub db: &'a Database,
@ -378,10 +378,7 @@ impl<'a> Compiler<'a> {
      return Ok(("NULL".to_string(), "string".to_string()));
    }

-    case_statements.sort();
-
    let sql = format!("CASE {} ELSE NULL END", case_statements.join(" "));
-
    Ok((sql, "object".to_string()))
  }