1.启动kafka:
]# ./bin/kafka-server-start.sh config/server.properties
2 查看topic list
./bin/kafka-topics.sh --list --zookeeper localhost:2181
3.streaming代码:
package com.badou.streaming
import org.apache.hadoop.hbase._
import org.apache.hadoop.hbase.client._
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka.KafkaUtils
object HbaseHandler {
def insert(row: String, column: String, value: String) {
// Hbase配置
val tableName = "sparkstream_kafkahbase_table" // 定义表名
val hbaseConf = HBaseConfiguration.create()
hbaseConf.set("hbase.zookeeper.quorum", "master,slave1,slave2")
hbaseConf.set("hbase.zookeeper.property.clientPort", "2181")
hbaseConf.set("hbase.defaults.for.version.skip", "true")
val hTable = new HTable(hbaseConf, tableName)
val thePut = new Put(row.getBytes)
thePut.add("info".getBytes,column.getBytes,value.getBytes)
hTable.setAutoFlush(false, false)
// 写入数据缓存
hTable.setWriteBufferSize(3*1024*1024)
hTable.put(thePut)
// 提交
hTable.flushCommits()
}
}
object kafkaStreamHbase {
def main(args: Array[String]) {
val zkQuorum = "master:2181,slave1:2181,slave2:2181"
val group = "group_1"
val topics = "topic_1013"
val numThreads = 1
var output="hdfs://master:9000/stream_out/spark-log.txt"
val sparkConf = new SparkConf().setAppName("kafkaStreamHbase").setMaster("local[2]")
val ssc = new StreamingContext(sparkConf, Seconds(10))
ssc.checkpoint("hdfs://master:9000/hdfs_checkpoint")
val topicpMap = topics.split(",").map((_,numThreads.toInt)).toMap
val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicpMap).map(_._2)
// lines.print()
// lines.saveAsTextFiles("hdfs://master:9000/stream_out/sparkstreaming_hbasetest.log")
val line = lines.flatMap(_.split("\n"))
val words = line.map(_.split("\\|"))
words.foreachRDD(rdd => {
rdd.foreachPartition(partitionOfRecords => {
partitionOfRecords.foreach(pair => {
val key = pair(0)
val col = pair(1)
val value = pair(2)
println(key + "_" + col + " : " + value)
HbaseHandler.insert(key, col, value)
})
})
})
ssc.start()
ssc.awaitTermination()
}
}
5.启动hbase
创建表
4.测试
启动producer
./bin/kafka-console-producer.sh --broker-list localhost:9092 --topic topic_0309
idea运行代码
结果