[GAL-504] 优化File Chunk Combiner性能及功能
This commit is contained in:
@@ -3,12 +3,12 @@ package com.zdjizhi;
|
||||
import com.zdjizhi.config.Configs;
|
||||
import com.zdjizhi.function.*;
|
||||
import com.zdjizhi.pojo.*;
|
||||
import com.zdjizhi.sink.HBaseSink;
|
||||
import com.zdjizhi.sink.HosSink;
|
||||
import com.zdjizhi.kafka.KafkaConsumer;
|
||||
import com.zdjizhi.trigger.LastChunkOrNoDataInTimeTrigger;
|
||||
import com.zdjizhi.trigger.MultipleTrigger;
|
||||
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
||||
import org.apache.flink.api.common.functions.FilterFunction;
|
||||
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
|
||||
import org.apache.flink.api.java.utils.ParameterTool;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
@@ -24,9 +24,8 @@ import org.apache.flink.util.OutputTag;
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
public class FileChunkCombiner extends KafkaConsumer {
|
||||
public class FileChunkCombiner {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
final ParameterTool parameterTool = ParameterTool.fromPropertiesFile(args[0]);
|
||||
@@ -43,7 +42,7 @@ public class FileChunkCombiner extends KafkaConsumer {
|
||||
.name("Kafka Source")
|
||||
.map(new ParseMessagePackMapFunction())
|
||||
.name("Map: Parse Message Pack")
|
||||
.filter((FilterFunction<FileChunk>) Objects::nonNull)
|
||||
.filter(new FileChunkFilter(configuration.getLong(Configs.FILE_MAX_SIZE), configuration.getString(Configs.FILTER_EXPRESSION)))
|
||||
.assignTimestampsAndWatermarks(watermarkStrategy);
|
||||
|
||||
OutputTag<FileChunk> delayedChunkOutputTag = new OutputTag<FileChunk>("delayed-chunk") {
|
||||
@@ -63,14 +62,23 @@ public class FileChunkCombiner extends KafkaConsumer {
|
||||
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
|
||||
.disableChaining();
|
||||
|
||||
HosSink hosSink = new HosSink(configuration);
|
||||
windowStream.addSink(hosSink)
|
||||
.name("Hos")
|
||||
.setParallelism(configuration.get(Configs.SINK_HOS_PARALLELISM));
|
||||
windowStream.getSideOutput(delayedChunkOutputTag)
|
||||
.map(new SideOutputMapFunction())
|
||||
.addSink(hosSink)
|
||||
.name("Hos Delayed Chunk");
|
||||
if ("hos".equals(configuration.get(Configs.SINK_TYPE))) {
|
||||
windowStream.addSink(new HosSink(configuration))
|
||||
.name("Hos")
|
||||
.setParallelism(configuration.get(Configs.SINK_PARALLELISM));
|
||||
windowStream.getSideOutput(delayedChunkOutputTag)
|
||||
.map(new SideOutputMapFunction())
|
||||
.addSink(new HosSink(configuration))
|
||||
.name("Hos Delayed Chunk");
|
||||
} else {
|
||||
windowStream.addSink(new HBaseSink(configuration))
|
||||
.name("HBase")
|
||||
.setParallelism(configuration.get(Configs.SINK_PARALLELISM));
|
||||
windowStream.getSideOutput(delayedChunkOutputTag)
|
||||
.map(new SideOutputMapFunction())
|
||||
.addSink(new HBaseSink(configuration))
|
||||
.name("HBase Delayed Chunk");
|
||||
}
|
||||
|
||||
environment.execute(configuration.get(Configs.FLINK_JOB_NAME));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user