package com.zdjizhi.topology; import cn.hutool.log.Log; import cn.hutool.log.LogFactory; import com.zdjizhi.common.FlowWriteConfig; import com.zdjizhi.utils.functions.DealFileProcessFunction; import com.zdjizhi.utils.functions.FilterNullFunction; import com.zdjizhi.utils.functions.MapCompletedFunction; import com.zdjizhi.utils.functions.TypeMapCompletedFunction; import com.zdjizhi.utils.kafka.KafkaConsumer; import com.zdjizhi.utils.kafka.KafkaProducer; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import java.util.Map; /** * @author 王成成 * @Package com.zdjizhi.topology * @Description: * @date 2022.06.01 */ public class LogFlowWriteTopology { private static final Log logger = LogFactory.get(); public static void main(String[] args) { final StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); //两个输出之间的最大时间 (单位milliseconds) environment.setBufferTimeout(FlowWriteConfig.BUFFER_TIMEOUT); if (FlowWriteConfig.LOG_NEED_COMPLETE == 1) { SingleOutputStreamOperator> streamSource = environment.addSource(KafkaConsumer.myDeserializationConsumer()) .setParallelism(FlowWriteConfig.SOURCE_PARALLELISM).name(FlowWriteConfig.SOURCE_KAFKA_TOPIC); DataStream> cleaningLog; switch (FlowWriteConfig.LOG_TRANSFORM_TYPE) { case 0: //对原始日志进行处理补全转换等,不对日志字段类型做校验。 cleaningLog = streamSource.map(new MapCompletedFunction()).name("MapCompletedFunction") .setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); break; case 1: //对原始日志进行处理补全转换等,对日志字段类型做若校验,可根据schema进行强转。 cleaningLog = streamSource.map(new TypeMapCompletedFunction()).name("TypeMapCompletedFunction") .setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); break; default: //对原始日志进行处理补全转换等,不对日志字段类型做校验。 cleaningLog = streamSource.map(new MapCompletedFunction()).name("MapCompletedFunction") .setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); } //处理带有非结构化日志的数据 SingleOutputStreamOperator process = cleaningLog.process(new DealFileProcessFunction()); SingleOutputStreamOperator resultFileMetaData = process.getSideOutput(DealFileProcessFunction.metaToKafa).filter(new FilterNullFunction()).name("FilterAbnormalTrafficFileMetaData").setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); SingleOutputStreamOperator result = process.filter(new FilterNullFunction()).name("FilterAbnormalData").setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); //文件元数据发送至TRAFFIC-FILE-METADATA resultFileMetaData.addSink(KafkaProducer.getTrafficFileMetaKafkaProducer()).name("toTrafficFileMeta") .setParallelism(FlowWriteConfig.FILE_DATA_SINK_PARALLELISM); //补全后的数据发送给百分点的kafka result.addSink(KafkaProducer.getPercentKafkaProducer()).name("toPercentKafka") .setParallelism(FlowWriteConfig.PERCENT_SINK_PARALLELISM); } try { environment.execute(args[0]); } catch (Exception e) { logger.error("This Flink task start ERROR! Exception information is :" + e); e.printStackTrace(); } } }