提交数据预处理程序初版
This commit is contained in:
56
src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java
Normal file
56
src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java
Normal file
@@ -0,0 +1,56 @@
|
||||
package com.zdjizhi.topology;
|
||||
|
||||
import cn.hutool.log.Log;
|
||||
import cn.hutool.log.LogFactory;
|
||||
import com.zdjizhi.common.FlowWriteConfig;
|
||||
import com.zdjizhi.utils.functions.FilterNullFunction;
|
||||
import com.zdjizhi.utils.functions.MapCompletedFunction;
|
||||
import com.zdjizhi.utils.kafka.Consumer;
|
||||
import com.zdjizhi.utils.kafka.Producer;
|
||||
import org.apache.flink.streaming.api.TimeCharacteristic;
|
||||
import org.apache.flink.streaming.api.datastream.DataStream;
|
||||
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||
|
||||
/**
|
||||
* @author qidaijie
|
||||
* @Package com.zdjizhi.topology
|
||||
* @Description:
|
||||
* @date 2021/5/2016:42
|
||||
*/
|
||||
public class LogFlowWriteTopology {
|
||||
private static final Log logger = LogFactory.get();
|
||||
|
||||
public static void main(String[] args) {
|
||||
final StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||
|
||||
//开启Checkpoint,interval用于指定checkpoint的触发间隔(单位milliseconds)
|
||||
// environment.enableCheckpointing(5000);
|
||||
|
||||
DataStreamSource<String> streamSource = environment.addSource(Consumer.getKafkaConsumer())
|
||||
.setParallelism(FlowWriteConfig.CONSUMER_PARALLELISM);
|
||||
|
||||
if (FlowWriteConfig.LOG_NEED_COMPLETE == 1) {
|
||||
//对原始日志进行处理补全转换等
|
||||
DataStream<String> cleaningLog = streamSource.map(new MapCompletedFunction())
|
||||
.name("TransFormLogs").setParallelism(FlowWriteConfig.MAP_PARALLELISM);
|
||||
//过滤空数据不发送到Kafka内
|
||||
DataStream<String> result = cleaningLog.filter(new FilterNullFunction()).name("FilterAbnormalData");
|
||||
//发送数据到Kafka
|
||||
result.addSink(Producer.getKafkaProducer()).name("LogSinkKafka")
|
||||
.setParallelism(FlowWriteConfig.PRODUCER_PARALLELISM);
|
||||
} else {
|
||||
DataStream<String> result = streamSource.filter(new FilterNullFunction()).name("FilterOriginalData");
|
||||
result.addSink(Producer.getKafkaProducer()).name("LogSinkKafka").setParallelism(FlowWriteConfig.PRODUCER_PARALLELISM);
|
||||
}
|
||||
|
||||
try {
|
||||
environment.execute(args[0]);
|
||||
} catch (Exception e) {
|
||||
logger.error("This Flink task start ERROR! Exception information is :" + e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user