package com.zdjizhi.etl; import cn.hutool.core.convert.Convert; import cn.hutool.core.date.DateUtil; import com.arangodb.entity.BaseDocument; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; import org.apache.flink.streaming.api.windowing.windows.TimeWindow; import org.apache.flink.util.Collector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Map; /** * 对ip去重 */ public class Ip2IpGraphProcessFunction extends ProcessWindowFunction, BaseDocument, Tuple2, TimeWindow> { private static final Logger logger = LoggerFactory.getLogger(Ip2IpGraphProcessFunction.class); @Override public void process(Tuple2 keys, Context context, Iterable> elements, Collector out) { try { long lastFoundTime = DateUtil.currentSeconds(); for (Map log : elements) { long connStartTimetime = Convert.toLong(log.get("conn_start_time")); lastFoundTime = connStartTimetime > lastFoundTime ? connStartTimetime : lastFoundTime; } BaseDocument baseDocument = new BaseDocument(); baseDocument.setKey(String.join("-", keys.f0, keys.f1)); baseDocument.addAttribute("src_ip", keys.f0); baseDocument.addAttribute("dst_ip", keys.f1); baseDocument.addAttribute("last_found_time", lastFoundTime); out.collect(baseDocument); logger.debug("获取中间聚合结果:{}", baseDocument.toString()); } catch (Exception e) { logger.error("获取中间聚合结果失败,middleResult: {}", e); } } }