Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
-- A data set containing duplicate rows
CREATE OR REPLACE TEMPORARY VIEW duplicateColumnValueData AS SELECT * FROM VALUES
(1, 1),
(1, 2),
(2, 1),
(2, 2),
(3, 1),
(3, 2)
as duplicateRowData(a, b);

-- left semi greater than predicate
SELECT *
FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y
ON x.a >= y.a + 2;

-- left semi greater than predicate and equal operator #1
SELECT *
FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y
ON x.b = y.b and x.a >= y.a + 2;

-- left semi greater than predicate and equal operator #2
SELECT *
FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y
ON x.b = y.a and x.a >= y.b + 1;
38 changes: 38 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/using-join.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
create temporary view ut1 as select * from values
("r1c1", "r1c2", "t1r1c3"),
("r2c1", "r2c2", "t1r2c3"),
("r3c1x", "r3c2", "t1r3c3")
as ut1(c1, c2, c3);

create temporary view ut2 as select * from values
("r1c1", "r1c2", "t2r1c3"),
("r2c1", "r2c2", "t2r2c3"),
("r3c1y", "r3c2", "t2r3c3")
as ut2(c1, c2, c3);

create temporary view ut3 as select * from values
(CAST(null as String), "r1c2", "t3r1c3"),
("r2c1", "r2c2", "t3r2c3"),
("r3c1y", "r3c2", "t3r3c3")
as ut3(c1, c2, c3);

-- inner join with one using column
SELECT * FROM ut1 join ut2 using (c1);

-- inner join with two using columns
SELECT * FROM ut1 join ut2 using (c1, c2);

-- left outer join with one using column.
SELECT * FROM ut1 left join ut2 using (c1);

-- right outer join with one using column.
SELECT * FROM ut1 right join ut2 using (c1);

-- full outer join with one using column.
SELECT * FROM ut1 full outer join ut2 using (c1);

-- full outer join with null value in join column.
SELECT * FROM ut1 full outer join ut3 using (c1);

-- self join with using columns.
SELECT * FROM ut1 join ut1 using (c1);
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 4


-- !query 0
CREATE OR REPLACE TEMPORARY VIEW duplicateColumnValueData AS SELECT * FROM VALUES
(1, 1),
(1, 2),
(2, 1),
(2, 2),
(3, 1),
(3, 2)
as duplicateRowData(a, b)
-- !query 0 schema
struct<>
-- !query 0 output



-- !query 1
SELECT *
FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y
ON x.a >= y.a + 2
-- !query 1 schema
struct<a:int,b:int>
-- !query 1 output
3 1
3 2


-- !query 2
SELECT *
FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y
ON x.b = y.b and x.a >= y.a + 2
-- !query 2 schema
struct<a:int,b:int>
-- !query 2 output
3 1
3 2


-- !query 3
SELECT *
FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y
ON x.b = y.a and x.a >= y.b + 1
-- !query 3 schema
struct<a:int,b:int>
-- !query 3 output
2 1
2 2
3 1
3 2
109 changes: 109 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/using-join.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 10


-- !query 0
create temporary view ut1 as select * from values
("r1c1", "r1c2", "t1r1c3"),
("r2c1", "r2c2", "t1r2c3"),
("r3c1x", "r3c2", "t1r3c3")
as ut1(c1, c2, c3)
-- !query 0 schema
struct<>
-- !query 0 output



-- !query 1
create temporary view ut2 as select * from values
("r1c1", "r1c2", "t2r1c3"),
("r2c1", "r2c2", "t2r2c3"),
("r3c1y", "r3c2", "t2r3c3")
as ut2(c1, c2, c3)
-- !query 1 schema
struct<>
-- !query 1 output



-- !query 2
create temporary view ut3 as select * from values
(CAST(null as String), "r1c2", "t3r1c3"),
("r2c1", "r2c2", "t3r2c3"),
("r3c1y", "r3c2", "t3r3c3")
as ut3(c1, c2, c3)
-- !query 2 schema
struct<>
-- !query 2 output



-- !query 3
SELECT * FROM ut1 join ut2 using (c1)
-- !query 3 schema
struct<c1:string,c2:string,c3:string,c2:string,c3:string>
-- !query 3 output
r1c1 r1c2 t1r1c3 r1c2 t2r1c3
r2c1 r2c2 t1r2c3 r2c2 t2r2c3


-- !query 4
SELECT * FROM ut1 join ut2 using (c1, c2)
-- !query 4 schema
struct<c1:string,c2:string,c3:string,c3:string>
-- !query 4 output
r1c1 r1c2 t1r1c3 t2r1c3
r2c1 r2c2 t1r2c3 t2r2c3


-- !query 5
SELECT * FROM ut1 left join ut2 using (c1)
-- !query 5 schema
struct<c1:string,c2:string,c3:string,c2:string,c3:string>
-- !query 5 output
r1c1 r1c2 t1r1c3 r1c2 t2r1c3
r2c1 r2c2 t1r2c3 r2c2 t2r2c3
r3c1x r3c2 t1r3c3 NULL NULL


-- !query 6
SELECT * FROM ut1 right join ut2 using (c1)
-- !query 6 schema
struct<c1:string,c2:string,c3:string,c2:string,c3:string>
-- !query 6 output
r1c1 r1c2 t1r1c3 r1c2 t2r1c3
r2c1 r2c2 t1r2c3 r2c2 t2r2c3
r3c1y NULL NULL r3c2 t2r3c3


-- !query 7
SELECT * FROM ut1 full outer join ut2 using (c1)
-- !query 7 schema
struct<c1:string,c2:string,c3:string,c2:string,c3:string>
-- !query 7 output
r1c1 r1c2 t1r1c3 r1c2 t2r1c3
r2c1 r2c2 t1r2c3 r2c2 t2r2c3
r3c1x r3c2 t1r3c3 NULL NULL
r3c1y NULL NULL r3c2 t2r3c3


-- !query 8
SELECT * FROM ut1 full outer join ut3 using (c1)
-- !query 8 schema
struct<c1:string,c2:string,c3:string,c2:string,c3:string>
-- !query 8 output
NULL NULL NULL r1c2 t3r1c3
r1c1 r1c2 t1r1c3 NULL NULL
r2c1 r2c2 t1r2c3 r2c2 t3r2c3
r3c1x r3c2 t1r3c3 NULL NULL
r3c1y NULL NULL r3c2 t3r3c3


-- !query 9
SELECT * FROM ut1 join ut1 using (c1)
-- !query 9 schema
struct<c1:string,c2:string,c3:string,c2:string,c3:string>
-- !query 9 output
r1c1 r1c2 t1r1c3 r1c2 t1r1c3
r2c1 r2c2 t1r2c3 r2c2 t1r2c3
r3c1x r3c2 t1r3c3 r3c2 t1r3c3
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.plans.{Inner, LeftOuter, RightOuter}
import org.apache.spark.sql.catalyst.plans.logical.Join
import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSQLContext

class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
Expand Down Expand Up @@ -226,6 +227,42 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
)
}

test("cartesian product join") {
withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
checkAnswer(
testData3.join(testData3),
Row(1, null, 1, null) ::
Row(1, null, 2, 2) ::
Row(2, 2, 1, null) ::
Row(2, 2, 2, 2) :: Nil)
}
}

test("SortMergeJoin returns wrong results when using UnsafeRows") {
// This test is for the fix of https://issues.apache.org/jira/browse/SPARK-10737.
// This bug will be triggered when Tungsten is enabled and there are multiple
// SortMergeJoin operators executed in the same task.
val confs = SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1" :: Nil
withSQLConf(confs: _*) {
val df1 = (1 to 50).map(i => (s"str_$i", i)).toDF("i", "j")
val df2 =
df1
.join(df1.select(df1("i")), "i")
.select(df1("i"), df1("j"))

val df3 = df2.withColumnRenamed("i", "i1").withColumnRenamed("j", "j1")
val df4 =
df2
.join(df3, df2("i") === df3("i1"))
.withColumn("diff", $"j" - $"j1")
.select(df2("i"), df2("j"), $"diff")

checkAnswer(
df4,
df1.withColumn("diff", lit(0)))
}
}

test("SPARK-16991: Full outer join followed by inner join produces wrong results") {
val a = Seq((1, 2), (2, 3)).toDF("a", "b")
val b = Seq((2, 5), (3, 4)).toDF("a", "c")
Expand Down
Loading