Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
b68e9e1
TRANSLATE to REPLACE function mapping
NiranjGaurav Jul 22, 2025
7a7f372
Add FIND_IN_SET function mapping from Databricks to E6
Jul 22, 2025
2b0b695
Merge branch 'learning_collabrative' of https://github.com/tkaunlaky-…
Jul 22, 2025
06c9a76
TRANSLATE to REPLACE function mapping as well as TYPEOF function mapp…
NiranjGaurav Jul 22, 2025
a606981
TRANSLATE to REPLACE function mapping as well as TYPEOF function mapp…
NiranjGaurav Jul 22, 2025
a22853a
Zepto saturday, sunday issue sorted
NiranjGaurav Jul 23, 2025
30b253a
Zepto saturday, sunday issue sorted
NiranjGaurav Jul 23, 2025
bb45aab
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
a668190
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
e7c28c7
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
a5c33d2
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
ff9a07c
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
e3f2891
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
220284d
Merge branch 'main' into learning_collabrative
NiranjGaurav Jul 26, 2025
81624dc
Add FIND_IN_SET function mapping from Databricks to E6
Jul 22, 2025
93981f3
TRANSLATE to REPLACE function mapping
NiranjGaurav Jul 22, 2025
b181dd3
TRANSLATE to REPLACE function mapping as well as TYPEOF function mapp…
NiranjGaurav Jul 22, 2025
32adb0a
TRANSLATE to REPLACE function mapping as well as TYPEOF function mapp…
NiranjGaurav Jul 22, 2025
7d0795f
Zepto saturday, sunday issue sorted
NiranjGaurav Jul 23, 2025
adc0236
Zepto saturday, sunday issue sorted
NiranjGaurav Jul 23, 2025
5d1b252
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
759ed2f
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
7dd5859
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
ca613b7
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
8ed6d43
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
e8dc9df
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
1d03bc6
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 31, 2025
64bc5bb
[FIX]: Added WIDTH_BUCKET, RAND, CORR, COVAR_POP, URL_DECODE to suppo…
Adithyak-0926 Jul 23, 2025
59b97a4
[FIX]: Quick fix for moengage in not_sql
Adithyak-0926 Jul 23, 2025
4d289ad
Zepto saturday, sunday issue sorted
NiranjGaurav Jul 23, 2025
8b9fa90
SPACE function (databricks) to REPEAT(' ', n) in e6
NiranjGaurav Jul 21, 2025
c1be1e8
SPACE function (databricks) to REPEAT(' ', n) in e6
NiranjGaurav Jul 23, 2025
c15bc7a
SPACE function (databricks) to REPEAT(' ', n) in e6
NiranjGaurav Jul 22, 2025
8168c8c
SPACE function (databricks) to REPEAT(' ', n) in e6
NiranjGaurav Jul 23, 2025
679e440
TIMEDIFF to TIMESTAMP_DIFF
NiranjGaurav Jul 21, 2025
26a7107
SPACE function (databricks) to REPEAT(' ', n) in e6
NiranjGaurav Jul 23, 2025
207e6ca
SPACE function (databricks) to REPEAT(' ', n) in e6
NiranjGaurav Jul 23, 2025
2e6ec88
Interval issue sorted
NiranjGaurav Jul 23, 2025
6133d6f
Interval issue sorted
NiranjGaurav Jul 23, 2025
68fc41b
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
10b51aa
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 24, 2025
ea3380d
Map TIMESTAMP_SECONDS to FROM_UNIXTIME
Jul 18, 2025
09cd13d
Refactor TIMESTAMP_SECONDS to use UnixToTime with scale parameter
Jul 22, 2025
ce3601a
Ran make check
Jul 23, 2025
188c04a
Add TIMESTAMP_SECONDS to Spark parser
Jul 24, 2025
4bd6acb
[FIX]: Ran make check
Adithyak-0926 Jul 24, 2025
b74c8a1
JSON isuue
NiranjGaurav Jul 25, 2025
8cb7344
JSON isuue
NiranjGaurav Jul 25, 2025
bf7689b
JSON isuue
NiranjGaurav Jul 25, 2025
b78de56
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 31, 2025
c920949
P1 - SATURDAY & SUNDAY keyword issue
NiranjGaurav Jul 31, 2025
6d85489
Ran make check and removed the comments added in the databricks parse…
NiranjGaurav Jul 31, 2025
ce058f4
Merge remote-tracking branch 'origin/learning_collabrative' into lear…
NiranjGaurav Jul 31, 2025
cb0a212
Rebase issues solved and ran make check
NiranjGaurav Jul 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions apis/utils/supported_functions_in_all_dialects.json
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,7 @@
"DISTINCT",
"STDDEV",
"FILTER_ARRAY",
"FIND_IN_SET",
"TIMESTAMP",
"REGEXP_CONTAINS",
"CASE",
Expand Down Expand Up @@ -785,13 +786,15 @@
"LAST_DAY_OF_MONTH",
"FORMAT_DATETIME",
"COUNT_IF",
"ARRAY_INTERSECT"
"WIDTH_BUCKET",
"RAND",
"CORR",
"COVAR_POP",
"URL_DECODE"
"TRANSFORM",
"ARRAY_INTERSECT"
"URL_DECODE",
"TYPEOF",
"TIMEDIFF",
"INTERVAL"
],
"databricks": [
"ABS",
Expand Down
4 changes: 4 additions & 0 deletions sqlglot/dialects/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class Parser(Spark.Parser):
"DATE_ADD": build_date_delta(exp.DateAdd),
"DATEDIFF": build_date_delta(exp.DateDiff),
"DATE_DIFF": build_date_delta(exp.DateDiff),
"FIND_IN_SET": exp.FindInSet.from_arg_list,
"GETDATE": exp.CurrentTimestamp.from_arg_list,
"GET_JSON_OBJECT": _build_json_extract,
"TO_DATE": build_formatted_time(exp.TsOrDsToDate, "databricks"),
Expand All @@ -115,6 +116,9 @@ class Parser(Spark.Parser):
"TIMEDIFF": lambda args: exp.TimestampDiff(
unit=seq_get(args, 0), this=seq_get(args, 1), expression=seq_get(args, 2)
),
"TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(
this=seq_get(args, 0), scale=exp.Literal.string("seconds")
),
}

FACTOR = {
Expand Down
5 changes: 5 additions & 0 deletions sqlglot/dialects/e6.py
Original file line number Diff line number Diff line change
Expand Up @@ -1494,6 +1494,7 @@ def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition:
),
"TRUNC": date_trunc_to_time,
"TRIM": lambda self: self._parse_trim(),
"TYPEOF": lambda args: exp.TypeOf(this=seq_get(args, 0)),
"UNNEST": lambda args: exp.Explode(this=seq_get(args, 0)),
# TODO:: I have removed the _parse_unnest_sql, was it really required
# It was added due to some requirements before but those were asked to remove afterwards so it should not matter now
Expand Down Expand Up @@ -2188,12 +2189,16 @@ def split_sql(self, expression: exp.Split | exp.RegexpSplit):
TRANSFORMS = {
**generator.Generator.TRANSFORMS,
exp.Anonymous: anonymous_sql,
exp.FindInSet: lambda self, e: self.func(
"ARRAY_POSITION", e.this, self.func("SPLIT", e.expression, exp.Literal.string(","))
),
exp.AnyValue: rename_func("ARBITRARY"),
exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"),
exp.ArgMax: rename_func("MAX_BY"),
exp.ArgMin: rename_func("MIN_BY"),
exp.Array: array_sql,
exp.TypeOf: rename_func("TYPEOF"),
exp.ArrayAgg: rename_func("ARRAY_AGG"),
exp.ArrayConcat: rename_func("ARRAY_CONCAT"),
exp.ArrayIntersect: rename_func("ARRAY_INTERSECT"),
Expand Down
1 change: 1 addition & 0 deletions sqlglot/dialects/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class Parser(Spark2.Parser):
"TIMESTAMPDIFF": build_date_delta(exp.TimestampDiff),
"DATEDIFF": _build_datediff,
"DATE_DIFF": _build_datediff,
"TYPEOF": lambda args: exp.TypeOf(this=seq_get(args, 0)),
"TIMESTAMP_LTZ": _build_as_cast("TIMESTAMP_LTZ"),
"TIMESTAMP_NTZ": _build_as_cast("TIMESTAMP_NTZ"),
"TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(
Expand Down
19 changes: 19 additions & 0 deletions sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5495,6 +5495,10 @@ class ToArray(Func):
pass


class TypeOf(Func):
arg_types = {"this": True}


# https://materialize.com/docs/sql/types/list/
class List(Func):
arg_types = {"expressions": False}
Expand Down Expand Up @@ -6858,6 +6862,21 @@ class StrPosition(Func):
}


class FindInSet(Func):
"""
FIND_IN_SET function that returns the position of a string within a comma-separated list of strings.

Returns:
The position (1-based) of searchExpr in sourceExpr, or 0 if not found or if searchExpr contains a comma.

Args:
this: The string to search for (searchExpr)
expression: The comma-separated list of strings to search in (sourceExpr)
"""

arg_types = {"this": True, "expression": True}


class StrToDate(Func):
arg_types = {"this": True, "format": False, "safe": False}

Expand Down
24 changes: 12 additions & 12 deletions sqlglot/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,9 +888,9 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
continue

predicate = column.find_ancestor(exp.Predicate, exp.Select)
assert isinstance(predicate, exp.Binary), (
"Columns can only be marked with (+) when involved in a binary operation"
)
assert isinstance(
predicate, exp.Binary
), "Columns can only be marked with (+) when involved in a binary operation"

predicate_parent = predicate.parent
join_predicate = predicate.pop()
Expand All @@ -902,9 +902,9 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
c for c in join_predicate.right.find_all(exp.Column) if c.args.get("join_mark")
]

assert not (left_columns and right_columns), (
"The (+) marker cannot appear in both sides of a binary predicate"
)
assert not (
left_columns and right_columns
), "The (+) marker cannot appear in both sides of a binary predicate"

marked_column_tables = set()
for col in left_columns or right_columns:
Expand All @@ -914,9 +914,9 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
col.set("join_mark", False)
marked_column_tables.add(table)

assert len(marked_column_tables) == 1, (
"Columns of only a single table can be marked with (+) in a given binary predicate"
)
assert (
len(marked_column_tables) == 1
), "Columns of only a single table can be marked with (+) in a given binary predicate"

# Add predicate if join already copied, or add join if it is new
join_this = old_joins.get(col.table, query_from).this
Expand All @@ -938,9 +938,9 @@ def eliminate_join_marks(expression: exp.Expression) -> exp.Expression:
only_old_join_sources = old_joins.keys() - new_joins.keys()

if query_from.alias_or_name in new_joins:
assert len(only_old_join_sources) >= 1, (
"Cannot determine which table to use in the new FROM clause"
)
assert (
len(only_old_join_sources) >= 1
), "Cannot determine which table to use in the new FROM clause"

new_from_name = list(only_old_join_sources)[0]
query.set("from", exp.From(this=old_joins.pop(new_from_name).this))
Expand Down
32 changes: 32 additions & 0 deletions tests/dialects/test_e6.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ def test_E6(self):
},
)

self.validate_all(
"SELECT TYPEOF('hello')",
read={
"databricks": "SELECT TYPEOF('hello');",
"spark": "SELECT TYPEOF('hello');",
"spark2": "SELECT TYPEOF('hello');",
"snowflake": "SELECT TYPEOF('hello');",
},
)
self.validate_all(
"SELECT ARRAY_INTERSECT(ARRAY[1, 2, 3], ARRAY[1, 3, 3, 5])",
read={
Expand Down Expand Up @@ -694,6 +703,29 @@ def test_E6(self):
},
)

# FIND_IN_SET function tests - Databricks to E6 transpilation
self.validate_all(
"SELECT ARRAY_POSITION('ab', SPLIT('abc,b,ab,c,def', ','))",
read={
"databricks": "SELECT FIND_IN_SET('ab', 'abc,b,ab,c,def')",
},
)

self.validate_all(
"SELECT ARRAY_POSITION('test', SPLIT('hello,world,test', ','))",
read={
"databricks": "SELECT FIND_IN_SET('test', 'hello,world,test')",
},
)

# Test FIND_IN_SET with column references
self.validate_all(
"SELECT ARRAY_POSITION(search_col, SPLIT(list_col, ',')) FROM table1",
read={
"databricks": "SELECT FIND_IN_SET(search_col, list_col) FROM table1",
},
)

def test_regex(self):
self.validate_all(
"REGEXP_REPLACE('abcd', 'ab', '')",
Expand Down