Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.errors
import org.apache.spark.sql.catalyst.expressions._
Expand Down Expand Up @@ -69,8 +70,64 @@ case class UnresolvedAttribute(nameParts: Seq[String]) extends Attribute with Un
}

object UnresolvedAttribute {
/**
* Creates an [[UnresolvedAttribute]], parsing segments separated by dots ('.').
*/
def apply(name: String): UnresolvedAttribute = new UnresolvedAttribute(name.split("\\."))

/**
* Creates an [[UnresolvedAttribute]], from a single quoted string (for example using backticks in
* HiveQL. Since the string is consider quoted, no processing is done on the name.
*/
def quoted(name: String): UnresolvedAttribute = new UnresolvedAttribute(Seq(name))

/**
* Creates an [[UnresolvedAttribute]] from a string in an embedded language. In this case
* we treat it as a quoted identifier, except for '.', which must be further quoted using
* backticks if it is part of a column name.
*/
def quotedString(name: String): UnresolvedAttribute =
new UnresolvedAttribute(parseAttributeName(name))

/**
* Used to split attribute name by dot with backticks rule.
* Backticks must appear in pairs, and the quoted string must be a complete name part,
* which means `ab..c`e.f is not allowed.
* Escape character is not supported now, so we can't use backtick inside name part.
*/
def parseAttributeName(name: String): Seq[String] = {
def e = new AnalysisException(s"syntax error in attribute name: $name")
val nameParts = scala.collection.mutable.ArrayBuffer.empty[String]
val tmp = scala.collection.mutable.ArrayBuffer.empty[Char]
var inBacktick = false
var i = 0
while (i < name.length) {
val char = name(i)
if (inBacktick) {
if (char == '`') {
inBacktick = false
if (i + 1 < name.length && name(i + 1) != '.') throw e
} else {
tmp += char
}
} else {
if (char == '`') {
if (tmp.nonEmpty) throw e
inBacktick = true
} else if (char == '.') {
if (name(i - 1) == '.' || i == name.length - 1) throw e
nameParts += tmp.mkString
tmp.clear()
} else {
tmp += char
}
}
i += 1
}
if (inBacktick) throw e
nameParts += tmp.mkString
nameParts.toSeq
}
}

case class UnresolvedFunction(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,47 +130,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
def resolveQuoted(
name: String,
resolver: Resolver): Option[NamedExpression] = {
resolve(parseAttributeName(name), output, resolver)
}

/**
* Internal method, used to split attribute name by dot with backticks rule.
* Backticks must appear in pairs, and the quoted string must be a complete name part,
* which means `ab..c`e.f is not allowed.
* Escape character is not supported now, so we can't use backtick inside name part.
*/
private def parseAttributeName(name: String): Seq[String] = {
val e = new AnalysisException(s"syntax error in attribute name: $name")
val nameParts = scala.collection.mutable.ArrayBuffer.empty[String]
val tmp = scala.collection.mutable.ArrayBuffer.empty[Char]
var inBacktick = false
var i = 0
while (i < name.length) {
val char = name(i)
if (inBacktick) {
if (char == '`') {
inBacktick = false
if (i + 1 < name.length && name(i + 1) != '.') throw e
} else {
tmp += char
}
} else {
if (char == '`') {
if (tmp.nonEmpty) throw e
inBacktick = true
} else if (char == '.') {
if (name(i - 1) == '.' || i == name.length - 1) throw e
nameParts += tmp.mkString
tmp.clear()
} else {
tmp += char
}
}
i += 1
}
if (inBacktick) throw e
nameParts += tmp.mkString
nameParts.toSeq
resolve(UnresolvedAttribute.parseAttributeName(name), output, resolver)
}

/**
Expand Down
2 changes: 1 addition & 1 deletion sql/core/src/main/scala/org/apache/spark/sql/Column.scala
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
def this(name: String) = this(name match {
case "*" => UnresolvedStar(None)
case _ if name.endsWith(".*") => UnresolvedStar(Some(name.substring(0, name.length - 2)))
case _ => UnresolvedAttribute(name)
case _ => UnresolvedAttribute.quotedString(name)
})

/** Creates a column based on the given expression. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
*/
implicit class StringToColumn(val sc: StringContext) {
def $(args: Any*): ColumnName = {
new ColumnName(sc.s(args : _*))
new ColumnName(sc.s(args: _*))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,74 @@ class ColumnExpressionSuite extends QueryTest with SQLTestUtils {

override def sqlContext(): SQLContext = ctx

test("column names with space") {
val df = Seq((1, "a")).toDF("name with space", "name.with.dot")

checkAnswer(
df.select(df("name with space")),
Row(1) :: Nil)

checkAnswer(
df.select($"name with space"),
Row(1) :: Nil)

checkAnswer(
df.select(col("name with space")),
Row(1) :: Nil)

checkAnswer(
df.select("name with space"),
Row(1) :: Nil)

checkAnswer(
df.select(expr("`name with space`")),
Row(1) :: Nil)
}

test("column names with dot") {
val df = Seq((1, "a")).toDF("name with space", "name.with.dot").as("a")

checkAnswer(
df.select(df("`name.with.dot`")),
Row("a") :: Nil)

checkAnswer(
df.select($"`name.with.dot`"),
Row("a") :: Nil)

checkAnswer(
df.select(col("`name.with.dot`")),
Row("a") :: Nil)

checkAnswer(
df.select("`name.with.dot`"),
Row("a") :: Nil)

checkAnswer(
df.select(expr("`name.with.dot`")),
Row("a") :: Nil)

checkAnswer(
df.select(df("a.`name.with.dot`")),
Row("a") :: Nil)

checkAnswer(
df.select($"a.`name.with.dot`"),
Row("a") :: Nil)

checkAnswer(
df.select(col("a.`name.with.dot`")),
Row("a") :: Nil)

checkAnswer(
df.select("a.`name.with.dot`"),
Row("a") :: Nil)

checkAnswer(
df.select(expr("a.`name.with.dot`")),
Row("a") :: Nil)
}

test("alias") {
val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList")
assert(df.select(df("a").as("b")).columns.head === "b")
Expand Down