优化代码:使用windowAll做数据批量操作

This commit is contained in:
zhanghongqing
2022-07-12 19:24:53 +08:00
parent c1b70a6da0
commit 06042db9b1
35 changed files with 593 additions and 1027 deletions

View File

@@ -2,40 +2,39 @@ package com.zdjizhi.etl;
import cn.hutool.core.convert.Convert;
import cn.hutool.core.date.DateUtil;
import com.arangodb.entity.BaseDocument;
import cn.hutool.log.Log;
import cn.hutool.log.LogFactory;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Map;
/**
* 对ip去重
*/
public class Ip2IpGraphProcessFunction extends ProcessWindowFunction<Map<String, Object>, BaseDocument, Tuple2<String, String>, TimeWindow> {
public class Ip2IpGraphProcessFunction extends ProcessWindowFunction<Map<String, Object>, Map<String, Object>, Tuple2<String, String>, TimeWindow> {
private static final Logger logger = LoggerFactory.getLogger(Ip2IpGraphProcessFunction.class);
private static final Log logger = LogFactory.get();
@Override
public void process(Tuple2<String, String> keys, Context context, Iterable<Map<String, Object>> elements, Collector<BaseDocument> out) {
public void process(Tuple2<String, String> keys, Context context, Iterable<Map<String, Object>> elements, Collector<Map<String, Object>> out) {
try {
long lastFoundTime = DateUtil.currentSeconds();
for (Map<String, Object> log : elements) {
long connStartTimetime = Convert.toLong(log.get("conn_start_time"));
long connStartTimetime = Convert.toLong(log.get("start_time"));
lastFoundTime = connStartTimetime > lastFoundTime ? connStartTimetime : lastFoundTime;
}
BaseDocument baseDocument = new BaseDocument();
baseDocument.setKey(String.join("-", keys.f0, keys.f1));
baseDocument.addAttribute("src_ip", keys.f0);
baseDocument.addAttribute("dst_ip", keys.f1);
baseDocument.addAttribute("last_found_time", lastFoundTime);
out.collect(baseDocument);
logger.debug("获取中间聚合结果:{}", baseDocument.toString());
Map<String, Object> newLog = new HashMap<>();
newLog.put("src_ip", keys.f0);
newLog.put("dst_ip", keys.f1);
newLog.put("last_found_time", lastFoundTime);
out.collect(newLog);
logger.debug("获取中间聚合结果:{}", newLog.toString());
} catch (Exception e) {
logger.error("获取中间聚合结果失败,middleResult: {}", e);