根据04版补全程序更新P19双写程序。
This commit is contained in:
74
src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java
Normal file
74
src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java
Normal file
@@ -0,0 +1,74 @@
|
||||
package com.zdjizhi.topology;
|
||||
|
||||
import cn.hutool.log.Log;
|
||||
import cn.hutool.log.LogFactory;
|
||||
import com.zdjizhi.common.FlowWriteConfig;
|
||||
import com.zdjizhi.utils.functions.DealFileProcessFunction;
|
||||
import com.zdjizhi.utils.functions.FilterNullFunction;
|
||||
import com.zdjizhi.utils.functions.MapCompletedFunction;
|
||||
import com.zdjizhi.utils.functions.TypeMapCompletedFunction;
|
||||
import com.zdjizhi.utils.kafka.KafkaConsumer;
|
||||
import com.zdjizhi.utils.kafka.KafkaProducer;
|
||||
import org.apache.flink.streaming.api.datastream.DataStream;
|
||||
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
|
||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author 王成成
|
||||
* @Package com.zdjizhi.topology
|
||||
* @Description:
|
||||
* @date 2022.06.01
|
||||
*/
|
||||
public class LogFlowWriteTopology {
|
||||
private static final Log logger = LogFactory.get();
|
||||
|
||||
public static void main(String[] args) {
|
||||
final StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||
//两个输出之间的最大时间 (单位milliseconds)
|
||||
environment.setBufferTimeout(FlowWriteConfig.BUFFER_TIMEOUT);
|
||||
|
||||
if (FlowWriteConfig.LOG_NEED_COMPLETE == 1) {
|
||||
|
||||
SingleOutputStreamOperator<Map<String, Object>> streamSource = environment.addSource(KafkaConsumer.myDeserializationConsumer())
|
||||
.setParallelism(FlowWriteConfig.SOURCE_PARALLELISM).name(FlowWriteConfig.SOURCE_KAFKA_TOPIC);
|
||||
|
||||
DataStream<Map<String, Object>> cleaningLog;
|
||||
switch (FlowWriteConfig.LOG_TRANSFORM_TYPE) {
|
||||
case 0:
|
||||
//对原始日志进行处理补全转换等,不对日志字段类型做校验。
|
||||
cleaningLog = streamSource.map(new MapCompletedFunction()).name("MapCompletedFunction")
|
||||
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
|
||||
break;
|
||||
case 1:
|
||||
//对原始日志进行处理补全转换等,对日志字段类型做若校验,可根据schema进行强转。
|
||||
cleaningLog = streamSource.map(new TypeMapCompletedFunction()).name("TypeMapCompletedFunction")
|
||||
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
|
||||
break;
|
||||
default:
|
||||
//对原始日志进行处理补全转换等,不对日志字段类型做校验。
|
||||
cleaningLog = streamSource.map(new MapCompletedFunction()).name("MapCompletedFunction")
|
||||
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
|
||||
}
|
||||
//处理带有非结构化日志的数据
|
||||
SingleOutputStreamOperator<String> process = cleaningLog.process(new DealFileProcessFunction());
|
||||
SingleOutputStreamOperator<String> resultFileMetaData = process.getSideOutput(DealFileProcessFunction.metaToKafa).filter(new FilterNullFunction()).name("FilterAbnormalTrafficFileMetaData").setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
|
||||
SingleOutputStreamOperator<String> result = process.filter(new FilterNullFunction()).name("FilterAbnormalData").setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
|
||||
//文件元数据发送至TRAFFIC-FILE-METADATA
|
||||
resultFileMetaData.addSink(KafkaProducer.getTrafficFileMetaKafkaProducer()).name("toTrafficFileMeta")
|
||||
.setParallelism(FlowWriteConfig.FILE_DATA_SINK_PARALLELISM);
|
||||
//补全后的数据发送给百分点的kafka
|
||||
result.addSink(KafkaProducer.getPercentKafkaProducer()).name("toPercentKafka")
|
||||
.setParallelism(FlowWriteConfig.PERCENT_SINK_PARALLELISM);
|
||||
}
|
||||
try {
|
||||
environment.execute(args[0]);
|
||||
} catch (Exception e) {
|
||||
logger.error("This Flink task start ERROR! Exception information is :" + e);
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user