spark sql 中的DataFrame和DataSet读取文本
package ml.test
import org.apache.spark.sql.functions.split
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
import org.slf4j.LoggerFactory
object TestSpark {
private val logger = LoggerFactory.getLogger(TestSpark.getClass)
private val path: String = "D:\\table.txt"
def main(args: Array[String]): Unit = {
logger.info("创建spark连接")
val spark: SparkSession = SparkSession.builder()
.appName("WordCount")
.master("local[*]")
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
val frame: DataFrame = spark.read.text(path).toDF("XXX")
frame.show()
import spark.implicits._
frame.withColumn("_tmp", split(frame.col("XXX"), ","))
.select($"_tmp".getItem(0).as("name"),
$"_tmp".getItem(1).as("age"),
$"_tmp".getItem(2).as("addr"))
.drop("_tmp").show()
frame.withColumn("_tmp", split($"XXX", ","))
.withColumn("name", $"_tmp".getItem(0))
.withColumn("age", $"_tmp".getItem(1))
.withColumn("addr", $"_tmp".getItem(2))
.drop("_tmp").show()
println("=========================================")
val ds: Dataset[String] = spark.read.textFile(path)
ds.show()
ds.withColumn("_tmp", split($"value", ","))
.select($"_tmp".getItem(0).as("name"),
$"_tmp".getItem(1).as("age"),
$"_tmp".getItem(2).as("addr"))
.drop("_tmp").show()
}
}
+-------------------+
| XXX|
+-------------------+
| name,age,addr|
| tom,17,china|
| lili,18,america|
|zhangsan,100,canada|
+-------------------+
+--------+---+-------+
| name|age| addr|
+--------+---+-------+
| name|age| addr|
| tom| 17| china|
| lili| 18|america|
|zhangsan|100| canada|
+--------+---+-------+
+-------------------+--------+---+-------+
| XXX| name|age| addr|
+-------------------+--------+---+-------+
| name,age,addr| name|age| addr|
| tom,17,china| tom| 17| china|
| lili,18,america| lili| 18|america|
|zhangsan,100,canada|zhangsan|100| canada|
+-------------------+--------+---+-------+
=========================================
+-------------------+
| value|
+-------------------+
| name,age,addr|
| tom,17,china|
| lili,18,america|
|zhangsan,100,canada|
+-------------------+
+--------+---+-------+
| name|age| addr|
+--------+---+-------+
| name|age| addr|
| tom| 17| china|
| lili| 18|america|
|zhangsan|100| canada|
+--------+---+-------+