package com.zdjizhi.etl.connection; import cn.hutool.core.convert.Convert; import cn.hutool.core.date.DateUtil; import cn.hutool.log.Log; import cn.hutool.log.LogFactory; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; import org.apache.flink.streaming.api.windowing.windows.TimeWindow; import org.apache.flink.util.Collector; import java.util.HashMap; import java.util.Map; /** * 对ip去重 */ public class Ip2IpGraphProcessFunction extends ProcessWindowFunction, Map, Tuple2, TimeWindow> { private static final Log logger = LogFactory.get(); @Override public void process(Tuple2 keys, Context context, Iterable> elements, Collector> out) { try { long lastFoundTime = DateUtil.currentSeconds(); for (Map log : elements) { long connStartTimetime = Convert.toLong(log.get("start_time")); lastFoundTime = connStartTimetime > lastFoundTime ? connStartTimetime : lastFoundTime; } Map newLog = new HashMap<>(); newLog.put("src_ip", keys.f0); newLog.put("dst_ip", keys.f1); newLog.put("last_found_time", lastFoundTime); out.collect(newLog); logger.debug("获取中间聚合结果:{}", newLog.toString()); } catch (Exception e) { logger.error("获取中间聚合结果失败,middleResult: {}", e); } } }