2021-08-23 17:05:17 +08:00
|
|
|
|
package com.zdjizhi.topology;
|
|
|
|
|
|
|
|
|
|
|
|
import cn.hutool.log.Log;
|
|
|
|
|
|
import cn.hutool.log.LogFactory;
|
|
|
|
|
|
import com.zdjizhi.common.FlowWriteConfig;
|
|
|
|
|
|
import com.zdjizhi.utils.functions.FilterNullFunction;
|
|
|
|
|
|
import com.zdjizhi.utils.functions.MapCompletedFunction;
|
2021-11-07 17:13:13 +03:00
|
|
|
|
import com.zdjizhi.utils.functions.TypeMapCompletedFunction;
|
2022-03-08 15:18:28 +08:00
|
|
|
|
import com.zdjizhi.utils.kafka.KafkaConsumer;
|
|
|
|
|
|
import com.zdjizhi.utils.kafka.KafkaProducer;
|
2021-08-23 17:05:17 +08:00
|
|
|
|
import org.apache.flink.streaming.api.datastream.DataStream;
|
|
|
|
|
|
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
|
|
|
|
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
|
|
|
|
|
|
2022-03-08 15:18:28 +08:00
|
|
|
|
import java.util.Map;
|
|
|
|
|
|
|
2021-08-23 17:05:17 +08:00
|
|
|
|
/**
|
|
|
|
|
|
* @author qidaijie
|
|
|
|
|
|
* @Package com.zdjizhi.topology
|
|
|
|
|
|
* @Description:
|
|
|
|
|
|
* @date 2021/5/2016:42
|
|
|
|
|
|
*/
|
|
|
|
|
|
public class LogFlowWriteTopology {
|
|
|
|
|
|
private static final Log logger = LogFactory.get();
|
|
|
|
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
|
|
|
final StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
|
|
|
|
|
|
|
2021-11-11 09:14:09 +03:00
|
|
|
|
//两个输出之间的最大时间 (单位milliseconds)
|
|
|
|
|
|
environment.setBufferTimeout(FlowWriteConfig.BUFFER_TIMEOUT);
|
2021-11-07 17:13:13 +03:00
|
|
|
|
|
2021-08-23 17:05:17 +08:00
|
|
|
|
if (FlowWriteConfig.LOG_NEED_COMPLETE == 1) {
|
2022-03-08 15:18:28 +08:00
|
|
|
|
DataStreamSource<Map<String, Object>> streamSource = environment.addSource(KafkaConsumer.myDeserializationConsumer())
|
|
|
|
|
|
.setParallelism(FlowWriteConfig.SOURCE_PARALLELISM);
|
|
|
|
|
|
|
2021-11-07 17:13:13 +03:00
|
|
|
|
DataStream<String> cleaningLog;
|
|
|
|
|
|
switch (FlowWriteConfig.LOG_TRANSFORM_TYPE) {
|
|
|
|
|
|
case 0:
|
|
|
|
|
|
//对原始日志进行处理补全转换等,不对日志字段类型做校验。
|
|
|
|
|
|
cleaningLog = streamSource.map(new MapCompletedFunction()).name("MapCompletedFunction")
|
|
|
|
|
|
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 1:
|
|
|
|
|
|
//对原始日志进行处理补全转换等,对日志字段类型做若校验,可根据schema进行强转。
|
|
|
|
|
|
cleaningLog = streamSource.map(new TypeMapCompletedFunction()).name("TypeMapCompletedFunction")
|
|
|
|
|
|
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
|
|
|
|
|
|
break;
|
|
|
|
|
|
default:
|
|
|
|
|
|
//对原始日志进行处理补全转换等,不对日志字段类型做校验。
|
|
|
|
|
|
cleaningLog = streamSource.map(new MapCompletedFunction()).name("MapCompletedFunction")
|
|
|
|
|
|
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2022-03-08 15:18:28 +08:00
|
|
|
|
//过滤空数据不发送到Kafka内
|
2021-09-27 11:11:56 +08:00
|
|
|
|
DataStream<String> result = cleaningLog.filter(new FilterNullFunction()).name("FilterAbnormalData")
|
|
|
|
|
|
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
|
2021-11-11 09:14:09 +03:00
|
|
|
|
|
2021-08-23 17:05:17 +08:00
|
|
|
|
//发送数据到Kafka
|
2022-03-08 15:18:28 +08:00
|
|
|
|
result.addSink(KafkaProducer.getKafkaProducer()).name("LogSinkKafka")
|
2021-11-07 17:13:13 +03:00
|
|
|
|
.setParallelism(FlowWriteConfig.SINK_PARALLELISM);
|
2021-08-23 17:05:17 +08:00
|
|
|
|
} else {
|
2022-03-08 15:18:28 +08:00
|
|
|
|
DataStreamSource<String> streamSource = environment.addSource(KafkaConsumer.flinkConsumer())
|
|
|
|
|
|
.setParallelism(FlowWriteConfig.SOURCE_PARALLELISM);
|
|
|
|
|
|
|
2021-09-27 11:11:56 +08:00
|
|
|
|
//过滤空数据不发送到Kafka内
|
|
|
|
|
|
DataStream<String> result = streamSource.filter(new FilterNullFunction()).name("FilterOriginalData")
|
|
|
|
|
|
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
|
2022-03-08 15:18:28 +08:00
|
|
|
|
|
2021-09-27 11:11:56 +08:00
|
|
|
|
//发送数据到Kafka
|
2022-03-08 15:18:28 +08:00
|
|
|
|
result.addSink(KafkaProducer.getKafkaProducer()).name("LogSinkKafka")
|
2021-11-07 17:13:13 +03:00
|
|
|
|
.setParallelism(FlowWriteConfig.SINK_PARALLELISM);
|
2021-08-23 17:05:17 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
environment.execute(args[0]);
|
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
|
logger.error("This Flink task start ERROR! Exception information is :" + e);
|
2021-09-27 11:11:56 +08:00
|
|
|
|
e.printStackTrace();
|
2021-08-23 17:05:17 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|