说明(流处理)
- 读取kafka流数据,每5s统计一次
- 将处理结果写入Redis
代码示例
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.test</groupId>
<artifactId>learn_flink</artifactId>
<version>1.0-SNAPSHOT</version>
<name>Flink learn</name>
<description>Flink学习</description>
<developers>
<developer>
<id>lvxw</id>
<name>吕学文</name>
<email>[email protected]</email>
</developer>
</developers>
<dependencies>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.4.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.4.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.8_2.11</artifactId>
<version>1.4.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-filesystem_2.11</artifactId>
<version>1.4.2</version>
</dependency>
<dependency>
<groupId>com.yammer.metrics</groupId>
<artifactId>metrics-core</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>com.101tec</groupId>
<artifactId>zkclient</artifactId>
<version>0.10</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>0.8.2.1</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.12</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>2.11.12</version>
</dependency>
<dependency>
<groupId>org.apache.bahir</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-pool2</artifactId>
<version>2.4.2</version>
</dependency>
</dependencies>
<properties>
<sonar.language>java</sonar.language>
<java-version>1.7</java-version>
<src.dir>src/main/scala</src.dir>
<src.dir>src/main/java</src.dir>
<src.res.dir>src/main/resource</src.res.dir>
<test.src.dir>src/test/scala</test.src.dir>
<test.src.dir>src/test/java</test.src.dir>
<test.res.dir>src/test/resource</test.res.dir>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<jarfile.name>Flink</jarfile.name>
<jar.out.dir>jar</jar.out.dir>
<maven.build.timestamp.format>yyyyMMdd-HHmmss</maven.build.timestamp.format>
</properties>
<build>
<sourceDirectory>${src.dir}</sourceDirectory>
<resources>
<resource>
<directory>${src.dir}</directory>
</resource>
<resource>
<directory>${src.res.dir}</directory>
<includes>
<include>**/*</include>
<include>*</include>
</includes>
<filtering>true</filtering>
</resource>
</resources>
<testSourceDirectory>${test.src.dir}</testSourceDirectory>
<testResources>
<testResource>
<directory>${test.src.dir}</directory>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.1.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<artifactSet>
<includes>
<include>org.apache.flink:flink-connector-kafka-*</include>
<include>org.apache.flink:flink-connector-filesystem_2.11</include>
<include>org.apache.commons:commons-pool2</include>
<include>redis.clients:jedis</include>
<include>org.apache.kafka:*</include>
<include>com.yammer.metrics:*</include>
<include>com.101tec:*</include>
<include>org.apache.bahir:*</include>
</includes>
</artifactSet>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<finalName>${jarfile.name}</finalName>
<outputDirectory>${jar.out.dir}</outputDirectory>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.3</version>
<executions>
<execution>
<id>Build deploy package</id>
<phase>package</phase>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<tarLongFileMode>gnu</tarLongFileMode>
<finalName>${project.artifactId}-${project.version}-${project.activeProfiles[0].id}-${maven.build.timestamp}</finalName>
<descriptors>
<descriptor>assembly.xml</descriptor>
</descriptors>
</configuration>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.1</version>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>process-resources</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
package com.test
import java.text.SimpleDateFormat
import java.util.Properties
import java.util.Date
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig
import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}
import org.apache.flink.util.Collector
object WordCountKafka5Seconds{
private val sdf = new SimpleDateFormat("yyyyMMddHHmm")
class RedisExampleMapper extends RedisMapper[(String, String)]{
override def getCommandDescription: RedisCommandDescription = {
new RedisCommandDescription(RedisCommand.RPUSH)
}
override def getKeyFromData(data: (String, String)): String = data._1
override def getValueFromData(data: (String, String)): String = data._2
}
val ZOOKEEPER_HOST = "artemis-02:2181,artemis-03:2181,artemis-04:2181/microlens/artemis/kafka"
val KAFKA_BROKER = "artemis-02:9092,artemis-02:9092,artemis-02:9092"
val TRANSACTION_GROUP = "transaction"
val TOPIC = "test-flink"
def main(args : Array[String]){
System.setProperty("HADOOP_USER_NAME","hadoop")
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.enableCheckpointing(5000)
env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
val kafkaProps = new Properties()
kafkaProps.setProperty("zookeeper.connect", ZOOKEEPER_HOST)
kafkaProps.setProperty("bootstrap.servers", KAFKA_BROKER)
kafkaProps.setProperty("group.id", TRANSACTION_GROUP)
val kafkaConsumer = new FlinkKafkaConsumer08[String](TOPIC, new SimpleStringSchema(), kafkaProps)
kafkaConsumer.setStartFromLatest()
val result = env
.addSource(kafkaConsumer)
.flatMap{x =>
x.split(" ")
}
.map((_,1))
.keyBy(0)
.timeWindow(Time.minutes(1L))
.apply{(tuple, window, values, out:Collector[Map[String,Int]]) =>
val time = sdf.format(new Date())+":"
val re = values.groupBy(_._1)
.map{x =>
var sum = 0
val key = x._1
val value = x._2
for((k,v) <- value if k ==key) {
sum += v
}
(time+key,sum)
}
out.collect(re)
}
.flatMap(x => x)
.map{x =>
val kv = x._1.split(":")
(kv(0),kv(1)+"->"+x._2)
}
val conf = new FlinkJedisPoolConfig.Builder().setHost("artemis-02").setPort(6379).build()
result.addSink(new RedisSink[(String, String)](conf, new RedisExampleMapper()))
env.execute()
}
}