package com.zdjizhi.topology; import cn.hutool.log.Log; import cn.hutool.log.LogFactory; import com.alibaba.fastjson2.JSONObject; import com.zdjizhi.common.FlowWriteConfig; import com.zdjizhi.operator.count.SendCountProcess; import com.zdjizhi.operator.map.MapCompleted; import com.zdjizhi.operator.map.TypeMapCompleted; import com.zdjizhi.operator.process.DealFileProcessFunction; import com.zdjizhi.tools.connections.kafka.KafkaConsumer; import com.zdjizhi.tools.connections.kafka.KafkaProducer; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; public class LogFlowWriteTopology { private static final Log logger = LogFactory.get(); public static void main(String[] args) { final StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); //两个输出之间的最大时间 (单位milliseconds) environment.setBufferTimeout(FlowWriteConfig.BUFFER_TIMEOUT); SingleOutputStreamOperator completedStream; if (FlowWriteConfig.LOG_TRANSFORM_TYPE == 0) {//不对日志字段类型做校验。 completedStream = environment.addSource(KafkaConsumer.flinkConsumer()).name(FlowWriteConfig.SOURCE_KAFKA_TOPIC).setParallelism(FlowWriteConfig.SOURCE_PARALLELISM) .process(new MapCompleted()).name("MapCompletedFunction").setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); } else {//对日志字段类型做弱校验,可根据schema进行强转。 completedStream = environment.addSource(KafkaConsumer.flinkConsumer()).name(FlowWriteConfig.SOURCE_KAFKA_TOPIC).setParallelism(FlowWriteConfig.SOURCE_PARALLELISM) .process(new TypeMapCompleted()).name("TypeMapCompletedFunction").setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); } //处理带有非结构化文件字段的数据 SingleOutputStreamOperator dealFileProcessFunction = completedStream.process(new DealFileProcessFunction()).name("DealFileProcessFunction").uid("DealFile-ProcessFunction").setParallelism(FlowWriteConfig.DEAL_FILE_PARALLELISM); //补全后的数据发送至百分点的kafka dealFileProcessFunction.addSink(KafkaProducer.getPercentKafkaProducer()).name("ToPercentKafka").uid("To-Percent-Kafka").setParallelism(FlowWriteConfig.SINK_PERCENT_PARALLELISM); //文件元数据发送至TRAFFIC-FILE-METADATA dealFileProcessFunction.getSideOutput(DealFileProcessFunction.metaToKafa).addSink(KafkaProducer.getTrafficFileMetaKafkaProducer()).name("toTrafficFileMeta").uid("to-Traffic-FileMeta").setParallelism(FlowWriteConfig.SINK_FILE_DATA_PARALLELISM); dealFileProcessFunction.getSideOutput(DealFileProcessFunction.dealFileMetircTag).process(new SendCountProcess()).name("SendCountProcess").uid("Send-Count-Process").setParallelism(1); try { environment.execute(args[0]); } catch (Exception e) { logger.error("This Flink task start ERROR! Exception information is :" + e); e.printStackTrace(); } } }