两个流连接到一起后,实际还是保持各自的数据结构,为了统一处理,可以通过自定义函数,将两个流的输出格式统一起来,这样就变成一个流了,方便后续处理
import org.apache.flink.streaming.api.functions.co.CoMapFunction
import org.apache.flink.streaming.api.scala._
object ConnectStreamExample {
// DataStream -> ConnectedStream -> DataStream
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream1: DataStream[(Int, String)] = env.fromElements(
(1, "a"),
(2, "b")
)
val stream2: DataStream[(Int, Int)] = env.fromElements(
(1, 1),
(1, 2)
)
// select * from A inner join B on A.id = B.id;
// conn和conn1的写法是等价的
val conn: ConnectedStreams[(Int, String), (Int, Int)] = stream1
.keyBy(_._1)
.connect(stream2.keyBy(_._1)) //key不一定关联
val conn1: ConnectedStreams[(Int, String), (Int, Int)] = stream1
.connect(stream2)
.keyBy(0,0)
val outStream: DataStream[String] = conn.map(new MyCoMapFunction)
outStream.print()
env.execute()
}
class MyCoMapFunction extends CoMapFunction[(Int, String), (Int, Int), String] {
override def map1(value: (Int, String)): String = value._2 + " from map1"
override def map2(value: (Int, Int)): String = value._2.toString + " from map2"
}
}