This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
galaxy-tsg-olap-log-stream-…/src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java
wangchengcheng 5c0a108393 1.适配TSG 23.07及以上功能,添加数据传输统计指标,并输出至pushgateway。(GAL-409)
2.原URL参数domain从http_domain字段取值,更新为从common_server_domain字段取值。(GAL-410)
2023-09-28 15:59:26 +08:00

57 lines
3.1 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package com.zdjizhi.topology;
import cn.hutool.log.Log;
import cn.hutool.log.LogFactory;
import com.alibaba.fastjson2.JSONObject;
import com.zdjizhi.common.FlowWriteConfig;
import com.zdjizhi.operator.count.SendCountProcess;
import com.zdjizhi.operator.map.MapCompleted;
import com.zdjizhi.operator.map.TypeMapCompleted;
import com.zdjizhi.operator.process.DealFileProcessFunction;
import com.zdjizhi.tools.connections.kafka.KafkaConsumer;
import com.zdjizhi.tools.connections.kafka.KafkaProducer;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class LogFlowWriteTopology {
private static final Log logger = LogFactory.get();
public static void main(String[] args) {
final StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
//两个输出之间的最大时间 (单位milliseconds)
environment.setBufferTimeout(FlowWriteConfig.BUFFER_TIMEOUT);
SingleOutputStreamOperator<JSONObject> completedStream;
if (FlowWriteConfig.LOG_TRANSFORM_TYPE == 0) {//不对日志字段类型做校验。
completedStream = environment.addSource(KafkaConsumer.flinkConsumer()).name(FlowWriteConfig.SOURCE_KAFKA_TOPIC).setParallelism(FlowWriteConfig.SOURCE_PARALLELISM)
.process(new MapCompleted()).name("MapCompletedFunction").setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
} else {//对日志字段类型做弱校验可根据schema进行强转。
completedStream = environment.addSource(KafkaConsumer.flinkConsumer()).name(FlowWriteConfig.SOURCE_KAFKA_TOPIC).setParallelism(FlowWriteConfig.SOURCE_PARALLELISM)
.process(new TypeMapCompleted()).name("TypeMapCompletedFunction").setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
}
//处理带有非结构化文件字段的数据
SingleOutputStreamOperator<String> dealFileProcessFunction = completedStream.process(new DealFileProcessFunction()).name("DealFileProcessFunction").uid("DealFile-ProcessFunction").setParallelism(FlowWriteConfig.DEAL_FILE_PARALLELISM);
//补全后的数据发送至百分点的kafka
dealFileProcessFunction.addSink(KafkaProducer.getPercentKafkaProducer()).name("ToPercentKafka").uid("To-Percent-Kafka").setParallelism(FlowWriteConfig.SINK_PERCENT_PARALLELISM);
//文件元数据发送至TRAFFIC-FILE-METADATA
dealFileProcessFunction.getSideOutput(DealFileProcessFunction.metaToKafa).addSink(KafkaProducer.getTrafficFileMetaKafkaProducer()).name("toTrafficFileMeta").uid("to-Traffic-FileMeta").setParallelism(FlowWriteConfig.SINK_FILE_DATA_PARALLELISM);
dealFileProcessFunction.getSideOutput(DealFileProcessFunction.dealFileMetircTag).process(new SendCountProcess()).name("SendCountProcess").uid("Send-Count-Process").setParallelism(1);
try {
environment.execute(args[0]);
} catch (Exception e) {
logger.error("This Flink task start ERROR! Exception information is :" + e);
e.printStackTrace();
}
}
}