From 15255129c6b75a0e86a6516a48f7a5a149d7c41d Mon Sep 17 00:00:00 2001
From: tkaunlaky-e6 <tanay@e6x.io>
Date: Mon, 2 Mar 2026 14:46:23 +0530
Subject: [PATCH 1/3] Preserve French and EU language characters in
 normalize_unicode_spaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The isascii() check was replacing ALL non-ASCII characters with spaces,
which corrupted French chars like é, ç, ü (e.g. Téléchargement became
T l chargement). Now uses unicodedata.category() to only normalize
actual Unicode whitespace/separators (Zs, Zl, Zp) and U+FFFD, preserving
all letter characters from EU languages.
---
 apis/utils/helpers.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/apis/utils/helpers.py b/apis/utils/helpers.py
index fcbc511c73..7dbfcf7ea7 100644
--- a/apis/utils/helpers.py
+++ b/apis/utils/helpers.py
@@ -10,7 +10,6 @@
 from sqlglot import exp, parse_one
 import typing as t
 from sqlglot.dialects.e6 import E6
-from curses.ascii import isascii
 
 FUNCTIONS_FILE = os.path.join(os.path.dirname(__file__), "supported_functions_in_all_dialects.json")
 logger = logging.getLogger(__name__)
@@ -564,15 +563,13 @@ def normalize_unicode_spaces(sql: str) -> str:
                 in_quote = ch
                 out_chars.append(ch)
             else:
-                # Normalize replacement-char
-                if not isascii(ch):
+                # Normalize only Unicode whitespace/separators and U+FFFD,
+                # preserve all other non-ASCII chars (French, German, EU languages, etc.)
+                cat = unicodedata.category(ch)
+                if ch == "\uFFFD" or cat in ("Zs", "Zl", "Zp") or (ch.isspace() and ch not in "\r\n"):
                     out_chars.append(" ")
                 else:
-                    cat = unicodedata.category(ch)
-                    if (cat in ("Zs", "Zl", "Zp")) or (ch.isspace() and ch not in "\r\n"):
-                        out_chars.append(" ")
-                    else:
-                        out_chars.append(ch)
+                    out_chars.append(ch)
         i += 1
 
     return "".join(out_chars)

From 7cb6b75f2ca2bb0a970f3edf78270d151e887e55 Mon Sep 17 00:00:00 2001
From: tkaunlaky-e6 <tanay@e6x.io>
Date: Mon, 2 Mar 2026 14:51:15 +0530
Subject: [PATCH 2/3] Apply ruff-format linter fixes

---
 apis/utils/helpers.py |  6 +++++-
 sqlglot/transforms.py | 12 ++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/apis/utils/helpers.py b/apis/utils/helpers.py
index 7dbfcf7ea7..c43e43b23b 100644
--- a/apis/utils/helpers.py
+++ b/apis/utils/helpers.py
@@ -566,7 +566,11 @@ def normalize_unicode_spaces(sql: str) -> str:
                 # Normalize only Unicode whitespace/separators and U+FFFD,
                 # preserve all other non-ASCII chars (French, German, EU languages, etc.)
                 cat = unicodedata.category(ch)
-                if ch == "\uFFFD" or cat in ("Zs", "Zl", "Zp") or (ch.isspace() and ch not in "\r\n"):
+                if (
+                    ch == "\ufffd"
+                    or cat in ("Zs", "Zl", "Zp")
+                    or (ch.isspace() and ch not in "\r\n")
+                ):
                     out_chars.append(" ")
                 else:
                     out_chars.append(ch)
diff --git a/sqlglot/transforms.py b/sqlglot/transforms.py
index 8a83023056..8b787250f1 100644
--- a/sqlglot/transforms.py
+++ b/sqlglot/transforms.py
@@ -936,9 +936,9 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
             if not left_join_table:
                 continue
 
-            assert not (
-                len(left_join_table) > 1
-            ), "Cannot combine JOIN predicates from different tables"
+            assert not (len(left_join_table) > 1), (
+                "Cannot combine JOIN predicates from different tables"
+            )
 
             for col in join_cols:
                 col.set("join_mark", False)
@@ -968,9 +968,9 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
 
         if query_from.alias_or_name in new_joins:
             only_old_joins = old_joins.keys() - new_joins.keys()
-            assert (
-                len(only_old_joins) >= 1
-            ), "Cannot determine which table to use in the new FROM clause"
+            assert len(only_old_joins) >= 1, (
+                "Cannot determine which table to use in the new FROM clause"
+            )
 
             new_from_name = list(only_old_joins)[0]
             query.set("from", exp.From(this=old_joins[new_from_name].this))

From 19590a4d8c083b7a79f1d61f45347d57acf8c590 Mon Sep 17 00:00:00 2001
From: tkaunlaky-e6 <tanay@e6x.io>
Date: Mon, 2 Mar 2026 19:05:00 +0530
Subject: [PATCH 3/3] Fix ruff-format for CI (ruff 0.7.2 assert formatting)

---
 sqlglot/transforms.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sqlglot/transforms.py b/sqlglot/transforms.py
index 8b787250f1..8a83023056 100644
--- a/sqlglot/transforms.py
+++ b/sqlglot/transforms.py
@@ -936,9 +936,9 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
             if not left_join_table:
                 continue
 
-            assert not (len(left_join_table) > 1), (
-                "Cannot combine JOIN predicates from different tables"
-            )
+            assert not (
+                len(left_join_table) > 1
+            ), "Cannot combine JOIN predicates from different tables"
 
             for col in join_cols:
                 col.set("join_mark", False)
@@ -968,9 +968,9 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
 
         if query_from.alias_or_name in new_joins:
             only_old_joins = old_joins.keys() - new_joins.keys()
-            assert len(only_old_joins) >= 1, (
-                "Cannot determine which table to use in the new FROM clause"
-            )
+            assert (
+                len(only_old_joins) >= 1
+            ), "Cannot determine which table to use in the new FROM clause"
 
             new_from_name = list(only_old_joins)[0]
             query.set("from", exp.From(this=old_joins[new_from_name].this))