优化单元测试,优化监控指标,增加限流功能
This commit is contained in:
10
pom.xml
10
pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.zdjizhi</groupId>
|
<groupId>com.zdjizhi</groupId>
|
||||||
<artifactId>file-chunk-combiner</artifactId>
|
<artifactId>file-chunk-combiner</artifactId>
|
||||||
<version>1.1.0</version>
|
<version>1.2.0</version>
|
||||||
|
|
||||||
<repositories>
|
<repositories>
|
||||||
<repository>
|
<repository>
|
||||||
@@ -292,6 +292,14 @@
|
|||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
|
<version>3.1.2</version>
|
||||||
|
<configuration>
|
||||||
|
<redirectTestOutputToFile>true</redirectTestOutputToFile>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
@@ -40,12 +40,12 @@ public class FileChunkCombiner {
|
|||||||
SingleOutputStreamOperator<FileChunk> parseMessagePackStream = environment
|
SingleOutputStreamOperator<FileChunk> parseMessagePackStream = environment
|
||||||
.addSource(KafkaConsumer.byteArrayConsumer(configuration))
|
.addSource(KafkaConsumer.byteArrayConsumer(configuration))
|
||||||
.name("Kafka Source")
|
.name("Kafka Source")
|
||||||
.map(new ParseMessagePackMapFunction())
|
.map(new ParseMessagePackMapFunction(configuration.get(Configs.MAP_ENABLE_RATE_LIMIT), configuration.get(Configs.MAP_RATE_LIMIT_THRESHOLD)))
|
||||||
.name("Map: Parse Message Pack")
|
.name("Map: Parse Message Pack")
|
||||||
.filter(new FileChunkFilterFunction(configuration.getLong(Configs.FILE_MAX_SIZE), configuration.getString(Configs.FILTER_EXPRESSION)))
|
.filter(new FileChunkFilterFunction(configuration.getLong(Configs.FILE_MAX_SIZE), configuration.getString(Configs.FILTER_EXPRESSION)))
|
||||||
.assignTimestampsAndWatermarks(watermarkStrategy);
|
.assignTimestampsAndWatermarks(watermarkStrategy);
|
||||||
|
|
||||||
OutputTag<FileChunk> delayedChunkOutputTag = new OutputTag<FileChunk>("delayed-chunk") {
|
OutputTag<FileChunk> delayedChunkOutputTag = new OutputTag<>("delayed-chunk") {
|
||||||
};
|
};
|
||||||
|
|
||||||
List<Trigger<Object, TimeWindow>> triggers = new ArrayList<>();
|
List<Trigger<Object, TimeWindow>> triggers = new ArrayList<>();
|
||||||
@@ -57,7 +57,7 @@ public class FileChunkCombiner {
|
|||||||
.window(TumblingEventTimeWindows.of(Time.seconds(configuration.get(Configs.COMBINER_WINDOW_TIME))))
|
.window(TumblingEventTimeWindows.of(Time.seconds(configuration.get(Configs.COMBINER_WINDOW_TIME))))
|
||||||
.trigger(trigger)
|
.trigger(trigger)
|
||||||
.sideOutputLateData(delayedChunkOutputTag)
|
.sideOutputLateData(delayedChunkOutputTag)
|
||||||
.process(new CombineChunkProcessWindowFunction(configuration))
|
.process(new CombineChunkProcessWindowFunction(configuration.get(Configs.FILE_MAX_CHUNK_COUNT)))
|
||||||
.name("Window: Combine Chunk")
|
.name("Window: Combine Chunk")
|
||||||
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
|
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
|
||||||
.disableChaining();
|
.disableChaining();
|
||||||
|
|||||||
@@ -43,6 +43,13 @@ public class Configs {
|
|||||||
.stringType()
|
.stringType()
|
||||||
.noDefaultValue();
|
.noDefaultValue();
|
||||||
|
|
||||||
|
public static final ConfigOption<Boolean> MAP_ENABLE_RATE_LIMIT = ConfigOptions.key("map.enable.rate.limit")
|
||||||
|
.booleanType()
|
||||||
|
.defaultValue(false);
|
||||||
|
public static final ConfigOption<Long> MAP_RATE_LIMIT_THRESHOLD = ConfigOptions.key("map.rate.limit.threshold")
|
||||||
|
.longType()
|
||||||
|
.defaultValue(Long.MAX_VALUE);
|
||||||
|
|
||||||
public static final ConfigOption<Integer> COMBINER_WINDOW_PARALLELISM = ConfigOptions.key("combiner.window.parallelism")
|
public static final ConfigOption<Integer> COMBINER_WINDOW_PARALLELISM = ConfigOptions.key("combiner.window.parallelism")
|
||||||
.intType()
|
.intType()
|
||||||
.defaultValue(1);
|
.defaultValue(1);
|
||||||
|
|||||||
@@ -1,27 +1,33 @@
|
|||||||
package com.zdjizhi.function;
|
package com.zdjizhi.function;
|
||||||
|
|
||||||
import com.zdjizhi.config.Configs;
|
import cn.hutool.core.util.ArrayUtil;
|
||||||
|
import cn.hutool.log.Log;
|
||||||
|
import cn.hutool.log.LogFactory;
|
||||||
import com.zdjizhi.pojo.FileChunk;
|
import com.zdjizhi.pojo.FileChunk;
|
||||||
import com.zdjizhi.utils.PublicUtil;
|
import com.zdjizhi.utils.StringUtil;
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
import org.apache.flink.metrics.Counter;
|
import org.apache.flink.metrics.Counter;
|
||||||
|
import org.apache.flink.metrics.MeterView;
|
||||||
import org.apache.flink.metrics.MetricGroup;
|
import org.apache.flink.metrics.MetricGroup;
|
||||||
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
|
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
|
||||||
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
|
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
|
||||||
import org.apache.flink.util.Collector;
|
import org.apache.flink.util.Collector;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
|
import static com.zdjizhi.utils.PublicConstants.COMBINE_MODE_SEEK;
|
||||||
|
|
||||||
public class CombineChunkProcessWindowFunction extends ProcessWindowFunction<FileChunk, FileChunk, String, TimeWindow> {
|
public class CombineChunkProcessWindowFunction extends ProcessWindowFunction<FileChunk, FileChunk, String, TimeWindow> {
|
||||||
|
private static final Log LOG = LogFactory.get();
|
||||||
|
|
||||||
private transient Counter duplicateChunkCounter;
|
public transient Counter duplicateChunkCounter;
|
||||||
public transient Counter combineErrorCounter;
|
public transient Counter combineErrorCounter;
|
||||||
public transient Counter seekChunkCounter;
|
private final int fileMaxChunkCount;
|
||||||
public transient Counter appendChunkCounter;
|
|
||||||
private final Configuration configuration;
|
|
||||||
|
|
||||||
public CombineChunkProcessWindowFunction(Configuration configuration) {
|
public CombineChunkProcessWindowFunction(int fileMaxChunkCount) {
|
||||||
this.configuration = configuration;
|
this.fileMaxChunkCount = fileMaxChunkCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -30,15 +36,126 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction<Fil
|
|||||||
MetricGroup metricGroup = getRuntimeContext().getMetricGroup();
|
MetricGroup metricGroup = getRuntimeContext().getMetricGroup();
|
||||||
duplicateChunkCounter = metricGroup.counter("duplicateChunkCount");
|
duplicateChunkCounter = metricGroup.counter("duplicateChunkCount");
|
||||||
combineErrorCounter = metricGroup.counter("combineErrorCount");
|
combineErrorCounter = metricGroup.counter("combineErrorCount");
|
||||||
seekChunkCounter = metricGroup.counter("seekChunkCount");
|
metricGroup.meter("duplicateChunkPerSecond", new MeterView(duplicateChunkCounter));
|
||||||
appendChunkCounter = metricGroup.counter("appendChunkCount");
|
metricGroup.meter("combineErrorPerSecond", new MeterView(combineErrorCounter));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void process(String string, Context context, Iterable<FileChunk> elements, Collector<FileChunk> out) {
|
public void process(String string, Context context, Iterable<FileChunk> elements, Collector<FileChunk> out) {
|
||||||
List<FileChunk> fileChunks = PublicUtil.combine(elements, configuration.get(Configs.FILE_MAX_CHUNK_COUNT), duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter);
|
List<FileChunk> fileChunks = combine(elements);
|
||||||
for (FileChunk fileChunk : fileChunks) {
|
for (FileChunk fileChunk : fileChunks) {
|
||||||
out.collect(fileChunk);
|
out.collect(fileChunk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<FileChunk> combine(Iterable<FileChunk> input) {
|
||||||
|
List<FileChunk> combinedFileChunkList = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
List<FileChunk> originalFileChunkList = StreamSupport.stream(input.spliterator(), false).collect(Collectors.toList());
|
||||||
|
List<byte[]> waitingToCombineChunkList = new ArrayList<>();
|
||||||
|
if (COMBINE_MODE_SEEK.equals(originalFileChunkList.get(0).getCombineMode())) {
|
||||||
|
// 按照offset排序
|
||||||
|
originalFileChunkList.sort(Comparator.comparingLong(FileChunk::getOffset));
|
||||||
|
Iterator<FileChunk> originalFileChunkIterator = originalFileChunkList.iterator();
|
||||||
|
if (originalFileChunkIterator.hasNext()) {
|
||||||
|
int duplicateCount = 0;
|
||||||
|
FileChunk currentFileChunk = originalFileChunkIterator.next();
|
||||||
|
int lastChunkFlag = currentFileChunk.getLastChunkFlag();
|
||||||
|
long startOffset = currentFileChunk.getOffset();
|
||||||
|
if (currentFileChunk.getChunk() != null && currentFileChunk.getChunk().length > 0) {
|
||||||
|
waitingToCombineChunkList.add(currentFileChunk.getChunk());
|
||||||
|
}
|
||||||
|
while (originalFileChunkIterator.hasNext()) {
|
||||||
|
long expectedOffset = currentFileChunk.getOffset() + currentFileChunk.getLength();
|
||||||
|
currentFileChunk = originalFileChunkIterator.next();
|
||||||
|
long actualOffset = currentFileChunk.getOffset();
|
||||||
|
if (expectedOffset > actualOffset) {// 期望offset大于当前offset,该块为重复块,跳过该块
|
||||||
|
duplicateCount++;
|
||||||
|
duplicateChunkCounter.inc();
|
||||||
|
} else if (expectedOffset == actualOffset) {// 期望offset等于当前offset,将该块添加到待合并集合中
|
||||||
|
if (currentFileChunk.getLastChunkFlag() == 1) {
|
||||||
|
lastChunkFlag = currentFileChunk.getLastChunkFlag();
|
||||||
|
}
|
||||||
|
if (currentFileChunk.getChunk() != null && currentFileChunk.getChunk().length > 0) {
|
||||||
|
waitingToCombineChunkList.add(currentFileChunk.getChunk());
|
||||||
|
}
|
||||||
|
} else {// 期望offset小于当前offset,说明缺块
|
||||||
|
if (waitingToCombineChunkList.size() > 0) {//将可合并的chunk合并,清空集合
|
||||||
|
combinedFileChunkList.add(combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), 0, null));
|
||||||
|
waitingToCombineChunkList.clear();
|
||||||
|
} else {
|
||||||
|
if (lastChunkFlag == 1) {
|
||||||
|
combinedFileChunkList.add(currentFileChunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 将当前块作为第一个块,继续合并
|
||||||
|
startOffset = currentFileChunk.getOffset();// 重置起始offset
|
||||||
|
lastChunkFlag = currentFileChunk.getLastChunkFlag();
|
||||||
|
if (currentFileChunk.getChunk() != null && currentFileChunk.getChunk().length > 0) {
|
||||||
|
waitingToCombineChunkList.add(currentFileChunk.getChunk());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (waitingToCombineChunkList.size() > 0) {
|
||||||
|
combinedFileChunkList.add(combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), 0, null));
|
||||||
|
} else {
|
||||||
|
if (lastChunkFlag == 1) {
|
||||||
|
combinedFileChunkList.add(currentFileChunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (duplicateCount > 0) {
|
||||||
|
LOG.error("Duplicate upload chunk, uuid=" + currentFileChunk.getUuid() + ", repetitionCount=" + duplicateCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// 按timestamp排序
|
||||||
|
originalFileChunkList.sort(Comparator.comparingLong(FileChunk::getTimestamp));
|
||||||
|
long startTimestamp = originalFileChunkList.get(0).getTimestamp();
|
||||||
|
StringBuilder timestampAndSizes = new StringBuilder();
|
||||||
|
for (FileChunk originalFileChunk : originalFileChunkList) {
|
||||||
|
byte[] chunk = originalFileChunk.getChunk();
|
||||||
|
if (chunk != null && chunk.length > 0) {
|
||||||
|
chunk = originalFileChunk.getChunk();
|
||||||
|
waitingToCombineChunkList.add(chunk);
|
||||||
|
timestampAndSizes.append(originalFileChunk.getTimestamp()).append("-").append(chunk.length).append(";");
|
||||||
|
}
|
||||||
|
if (waitingToCombineChunkList.size() > fileMaxChunkCount) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (waitingToCombineChunkList.size() > 0) {
|
||||||
|
combinedFileChunkList.add(combineChunk(waitingToCombineChunkList, originalFileChunkList.get(0).getUuid(), originalFileChunkList.get(0).getFileName(), originalFileChunkList.get(0).getFileType(), 0, "append", 0, originalFileChunkList.get(0).getMeta(), startTimestamp, timestampAndSizes.toString()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Combiner chunk error.", e);
|
||||||
|
combineErrorCounter.inc();
|
||||||
|
}
|
||||||
|
return combinedFileChunkList;
|
||||||
|
}
|
||||||
|
|
||||||
|
private FileChunk combineChunk(List<byte[]> byteList, String uuid, String fileName, String fileType, long offset, String combineMode, int lastChunkFlag, Map<String, Object> metaMap, long startTimestamp, String chunkNumbers) {
|
||||||
|
FileChunk fileChunk = new FileChunk();
|
||||||
|
fileChunk.setChunkCount(byteList.size());
|
||||||
|
byte[][] bytes = new byte[byteList.size()][];
|
||||||
|
byteList.toArray(bytes);
|
||||||
|
byte[] newData = ArrayUtil.addAll(bytes);
|
||||||
|
if (COMBINE_MODE_SEEK.equals(combineMode)) {
|
||||||
|
fileChunk.setOffset(offset);
|
||||||
|
fileChunk.setLastChunkFlag(lastChunkFlag);
|
||||||
|
} else {
|
||||||
|
if (StringUtil.isNotEmpty(chunkNumbers)) {
|
||||||
|
fileChunk.setChunkNumbers(chunkNumbers);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fileChunk.setTimestamp(startTimestamp);
|
||||||
|
fileChunk.setFileType(fileType);
|
||||||
|
fileChunk.setUuid(uuid);
|
||||||
|
fileChunk.setChunk(newData);
|
||||||
|
fileChunk.setFileName(fileName);
|
||||||
|
fileChunk.setCombineMode(combineMode);
|
||||||
|
fileChunk.setLength(newData.length);
|
||||||
|
fileChunk.setMeta(metaMap);
|
||||||
|
return fileChunk;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,12 +6,13 @@ import org.apache.commons.jexl3.*;
|
|||||||
import org.apache.flink.api.common.functions.RichFilterFunction;
|
import org.apache.flink.api.common.functions.RichFilterFunction;
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
import org.apache.flink.metrics.Counter;
|
import org.apache.flink.metrics.Counter;
|
||||||
|
import org.apache.flink.metrics.MeterView;
|
||||||
import org.apache.flink.metrics.MetricGroup;
|
import org.apache.flink.metrics.MetricGroup;
|
||||||
|
|
||||||
public class FileChunkFilterFunction extends RichFilterFunction<FileChunk> {
|
public class FileChunkFilterFunction extends RichFilterFunction<FileChunk> {
|
||||||
private final long maxFileSize;
|
private final long maxFileSize;
|
||||||
private final String filterExpression;
|
private final String filterExpression;
|
||||||
private transient Counter filterChunkCounter;
|
public transient Counter filterChunkCounter;
|
||||||
private JexlExpression jexlExpression;
|
private JexlExpression jexlExpression;
|
||||||
private JexlContext jexlContext;
|
private JexlContext jexlContext;
|
||||||
|
|
||||||
@@ -25,6 +26,7 @@ public class FileChunkFilterFunction extends RichFilterFunction<FileChunk> {
|
|||||||
super.open(parameters);
|
super.open(parameters);
|
||||||
MetricGroup metricGroup = getRuntimeContext().getMetricGroup();
|
MetricGroup metricGroup = getRuntimeContext().getMetricGroup();
|
||||||
filterChunkCounter = metricGroup.counter("filterChunkCount");
|
filterChunkCounter = metricGroup.counter("filterChunkCount");
|
||||||
|
metricGroup.meter("filterChunkPerSecond", new MeterView(filterChunkCounter));
|
||||||
JexlEngine jexlEngine = new JexlBuilder().create();
|
JexlEngine jexlEngine = new JexlBuilder().create();
|
||||||
jexlExpression = jexlEngine.createExpression(filterExpression);
|
jexlExpression = jexlEngine.createExpression(filterExpression);
|
||||||
jexlContext = new MapContext();
|
jexlContext = new MapContext();
|
||||||
|
|||||||
@@ -6,17 +6,68 @@ import cn.hutool.log.Log;
|
|||||||
import cn.hutool.log.LogFactory;
|
import cn.hutool.log.LogFactory;
|
||||||
import com.zdjizhi.pojo.FileChunk;
|
import com.zdjizhi.pojo.FileChunk;
|
||||||
import org.apache.flink.api.common.functions.RichMapFunction;
|
import org.apache.flink.api.common.functions.RichMapFunction;
|
||||||
|
import org.apache.flink.configuration.Configuration;
|
||||||
|
import org.apache.flink.metrics.*;
|
||||||
import org.msgpack.core.MessagePack;
|
import org.msgpack.core.MessagePack;
|
||||||
import org.msgpack.core.MessageUnpacker;
|
import org.msgpack.core.MessageUnpacker;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static com.zdjizhi.utils.PublicConstants.COMBINE_MODE_APPEND;
|
||||||
|
|
||||||
public class ParseMessagePackMapFunction extends RichMapFunction<byte[], FileChunk> {
|
public class ParseMessagePackMapFunction extends RichMapFunction<byte[], FileChunk> {
|
||||||
private static final Log LOG = LogFactory.get();
|
private static final Log LOG = LogFactory.get();
|
||||||
|
|
||||||
|
public transient Counter parseMessagePackCounter;
|
||||||
|
public transient Counter parseMessagePackErrorCounter;
|
||||||
|
public transient Counter rateLimitDropCounter;
|
||||||
|
private final boolean enableRateLimit;
|
||||||
|
private final long rateLimitThreshold;
|
||||||
|
private long timestamp;
|
||||||
|
private long count;
|
||||||
|
|
||||||
|
public ParseMessagePackMapFunction(boolean enableRateLimit, long rateLimitThreshold) {
|
||||||
|
this.rateLimitThreshold = rateLimitThreshold;
|
||||||
|
this.enableRateLimit = enableRateLimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void open(Configuration parameters) throws Exception {
|
||||||
|
super.open(parameters);
|
||||||
|
MetricGroup metricGroup = getRuntimeContext().getMetricGroup();
|
||||||
|
parseMessagePackCounter = metricGroup.counter("parseMessagePackCount");
|
||||||
|
parseMessagePackErrorCounter = metricGroup.counter("parseMessagePackErrorCount");
|
||||||
|
rateLimitDropCounter = metricGroup.counter("rateLimitDropCount");
|
||||||
|
metricGroup.meter("parseMessagePackPerSecond", new MeterView(parseMessagePackCounter));
|
||||||
|
metricGroup.meter("parseMessagePackErrorPerSecond", new MeterView(parseMessagePackErrorCounter));
|
||||||
|
metricGroup.meter("rateLimitDropPerSecond", new MeterView(rateLimitDropCounter));
|
||||||
|
timestamp = System.currentTimeMillis();
|
||||||
|
count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FileChunk map(byte[] messagePackData) {
|
public FileChunk map(byte[] messagePackData) {
|
||||||
|
FileChunk fileChunk = null;
|
||||||
|
if (enableRateLimit) {
|
||||||
|
count++;
|
||||||
|
if (System.currentTimeMillis() - timestamp < 1000 && count <= rateLimitThreshold) {
|
||||||
|
fileChunk = parseMessagePack(messagePackData);
|
||||||
|
} else if (System.currentTimeMillis() - timestamp < 1000 && count > rateLimitThreshold) {
|
||||||
|
rateLimitDropCounter.inc();
|
||||||
|
} else {
|
||||||
|
rateLimitDropCounter.inc();
|
||||||
|
timestamp = System.currentTimeMillis();
|
||||||
|
count = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fileChunk = parseMessagePack(messagePackData);
|
||||||
|
}
|
||||||
|
return fileChunk;
|
||||||
|
}
|
||||||
|
|
||||||
|
private FileChunk parseMessagePack(byte[] messagePackData) {
|
||||||
|
parseMessagePackCounter.inc();
|
||||||
FileChunk fileChunk;
|
FileChunk fileChunk;
|
||||||
try {
|
try {
|
||||||
fileChunk = new FileChunk();
|
fileChunk = new FileChunk();
|
||||||
@@ -66,10 +117,11 @@ public class ParseMessagePackMapFunction extends RichMapFunction<byte[], FileChu
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ("append".equals(fileChunk.getCombineMode())) {
|
if (COMBINE_MODE_APPEND.equals(fileChunk.getCombineMode())) {
|
||||||
fileChunk.setLastChunkFlag(0);
|
fileChunk.setLastChunkFlag(0);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
parseMessagePackErrorCounter.inc();
|
||||||
LOG.error("Parse messagePack failed.", e);
|
LOG.error("Parse messagePack failed.", e);
|
||||||
fileChunk = null;
|
fileChunk = null;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,31 +4,29 @@ import com.zdjizhi.pojo.FileChunk;
|
|||||||
import org.apache.flink.api.common.functions.RichMapFunction;
|
import org.apache.flink.api.common.functions.RichMapFunction;
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
import org.apache.flink.metrics.Counter;
|
import org.apache.flink.metrics.Counter;
|
||||||
|
import org.apache.flink.metrics.MeterView;
|
||||||
import org.apache.flink.metrics.MetricGroup;
|
import org.apache.flink.metrics.MetricGroup;
|
||||||
|
|
||||||
import static com.zdjizhi.utils.PublicConstants.COMBINE_MODE_SEEK;
|
import static com.zdjizhi.utils.PublicConstants.COMBINE_MODE_APPEND;
|
||||||
|
|
||||||
public class SideOutputMapFunction extends RichMapFunction<FileChunk, FileChunk> {
|
public class SideOutputMapFunction extends RichMapFunction<FileChunk, FileChunk> {
|
||||||
|
|
||||||
private transient Counter pcapDelayedChunkCounter;
|
private transient Counter delayedChunkCounter;
|
||||||
private transient Counter trafficDelayedChunkCounter;
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void open(Configuration parameters) throws Exception {
|
public void open(Configuration parameters) throws Exception {
|
||||||
super.open(parameters);
|
super.open(parameters);
|
||||||
MetricGroup metricGroup = getRuntimeContext().getMetricGroup();
|
MetricGroup metricGroup = getRuntimeContext().getMetricGroup();
|
||||||
pcapDelayedChunkCounter = metricGroup.counter("pcapDelayedChunkCount");
|
delayedChunkCounter = metricGroup.counter("delayedChunkCount");
|
||||||
trafficDelayedChunkCounter = metricGroup.counter("trafficDelayedChunkCount");
|
metricGroup.meter("delayedChunkPerSecond", new MeterView(delayedChunkCounter));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FileChunk map(FileChunk fileChunk) {
|
public FileChunk map(FileChunk fileChunk) {
|
||||||
|
delayedChunkCounter.inc();
|
||||||
fileChunk.setChunkCount(1);
|
fileChunk.setChunkCount(1);
|
||||||
if (COMBINE_MODE_SEEK.equals(fileChunk.getCombineMode())) {
|
if (COMBINE_MODE_APPEND.equals(fileChunk.getCombineMode())) {
|
||||||
trafficDelayedChunkCounter.inc();
|
|
||||||
} else {
|
|
||||||
fileChunk.setChunkNumbers(fileChunk.getTimestamp() + "-" + fileChunk.getChunk().length + ";");
|
fileChunk.setChunkNumbers(fileChunk.getTimestamp() + "-" + fileChunk.getChunk().length + ";");
|
||||||
pcapDelayedChunkCounter.inc();
|
|
||||||
}
|
}
|
||||||
return fileChunk;
|
return fileChunk;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -167,16 +167,19 @@ public class FileChunk implements Serializable {
|
|||||||
length == fileChunk.length &&
|
length == fileChunk.length &&
|
||||||
lastChunkFlag == fileChunk.lastChunkFlag &&
|
lastChunkFlag == fileChunk.lastChunkFlag &&
|
||||||
chunkCount == fileChunk.chunkCount &&
|
chunkCount == fileChunk.chunkCount &&
|
||||||
|
timestamp == fileChunk.timestamp &&
|
||||||
Objects.equals(uuid, fileChunk.uuid) &&
|
Objects.equals(uuid, fileChunk.uuid) &&
|
||||||
Objects.equals(fileName, fileChunk.fileName) &&
|
Objects.equals(fileName, fileChunk.fileName) &&
|
||||||
Objects.equals(fileType, fileChunk.fileType) &&
|
Objects.equals(fileType, fileChunk.fileType) &&
|
||||||
Arrays.equals(chunk, fileChunk.chunk) &&
|
Arrays.equals(chunk, fileChunk.chunk) &&
|
||||||
Objects.equals(combineMode, fileChunk.combineMode);
|
Objects.equals(combineMode, fileChunk.combineMode) &&
|
||||||
|
Objects.equals(meta, fileChunk.meta) &&
|
||||||
|
Objects.equals(chunkNumbers, fileChunk.chunkNumbers);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
int result = Objects.hash(uuid, fileName, fileType, offset, length, combineMode, lastChunkFlag, chunkCount);
|
int result = Objects.hash(uuid, fileName, fileType, offset, length, combineMode, lastChunkFlag, chunkCount, timestamp, meta, chunkNumbers);
|
||||||
result = 31 * result + Arrays.hashCode(chunk);
|
result = 31 * result + Arrays.hashCode(chunk);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import com.zdjizhi.utils.PublicConstants;
|
|||||||
import com.zdjizhi.utils.PublicUtil;
|
import com.zdjizhi.utils.PublicUtil;
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
import org.apache.flink.metrics.Counter;
|
import org.apache.flink.metrics.Counter;
|
||||||
|
import org.apache.flink.metrics.MeterView;
|
||||||
import org.apache.flink.metrics.MetricGroup;
|
import org.apache.flink.metrics.MetricGroup;
|
||||||
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
|
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
@@ -61,6 +62,10 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
|
|||||||
sendHBaseErrorCounter = metricGroup.counter("sendHBaseErrorCount");
|
sendHBaseErrorCounter = metricGroup.counter("sendHBaseErrorCount");
|
||||||
sendHBaseFileCounter = metricGroup.counter("sendHBaseFileCount");
|
sendHBaseFileCounter = metricGroup.counter("sendHBaseFileCount");
|
||||||
sendHBaseChunkCounter = metricGroup.counter("sendHBaseChunkCount");
|
sendHBaseChunkCounter = metricGroup.counter("sendHBaseChunkCount");
|
||||||
|
metricGroup.meter("sendHBasePerSecond", new MeterView(sendHBaseCounter, 5));
|
||||||
|
metricGroup.meter("sendHBaseErrorPerSecond", new MeterView(sendHBaseErrorCounter));
|
||||||
|
metricGroup.meter("sendHBaseFilePerSecond", new MeterView(sendHBaseFileCounter));
|
||||||
|
metricGroup.meter("sendHBaseChunkPerSecond", new MeterView(sendHBaseChunkCounter));
|
||||||
isAsync = configuration.getBoolean(Configs.SINK_ASYNC);
|
isAsync = configuration.getBoolean(Configs.SINK_ASYNC);
|
||||||
if (isAsync) {
|
if (isAsync) {
|
||||||
AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection();
|
AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection();
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import com.zdjizhi.utils.PublicUtil;
|
|||||||
import org.apache.commons.lang.CharEncoding;
|
import org.apache.commons.lang.CharEncoding;
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
import org.apache.flink.metrics.Counter;
|
import org.apache.flink.metrics.Counter;
|
||||||
|
import org.apache.flink.metrics.MeterView;
|
||||||
import org.apache.flink.metrics.MetricGroup;
|
import org.apache.flink.metrics.MetricGroup;
|
||||||
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
|
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
|
||||||
import org.apache.http.HttpResponse;
|
import org.apache.http.HttpResponse;
|
||||||
@@ -69,6 +70,10 @@ public class HosSink extends RichSinkFunction<FileChunk> {
|
|||||||
sendHosErrorCounter = metricGroup.counter("sendHosErrorCount");
|
sendHosErrorCounter = metricGroup.counter("sendHosErrorCount");
|
||||||
sendHosFileCounter = metricGroup.counter("sendHosFileCount");
|
sendHosFileCounter = metricGroup.counter("sendHosFileCount");
|
||||||
sendHosChunkCounter = metricGroup.counter("sendHosChunkCount");
|
sendHosChunkCounter = metricGroup.counter("sendHosChunkCount");
|
||||||
|
metricGroup.meter("sendHosPerSecond", new MeterView(sendHosCounter, 5));
|
||||||
|
metricGroup.meter("sendHosErrorPerSecond", new MeterView(sendHosErrorCounter));
|
||||||
|
metricGroup.meter("sendHosFilePerSecond", new MeterView(sendHosFileCounter));
|
||||||
|
metricGroup.meter("sendHosChunkPerSecond", new MeterView(sendHosChunkCounter));
|
||||||
loadBalanceMode = configuration.getInteger(Configs.SINK_HOS_LOAD_BALANCE_MODE);
|
loadBalanceMode = configuration.getInteger(Configs.SINK_HOS_LOAD_BALANCE_MODE);
|
||||||
if (loadBalanceMode == 0) {
|
if (loadBalanceMode == 0) {
|
||||||
endpoint = configuration.getString(Configs.SINK_HOS_ENDPOINT);
|
endpoint = configuration.getString(Configs.SINK_HOS_ENDPOINT);
|
||||||
|
|||||||
@@ -1,134 +1,10 @@
|
|||||||
package com.zdjizhi.utils;
|
package com.zdjizhi.utils;
|
||||||
|
|
||||||
import cn.hutool.core.util.ArrayUtil;
|
|
||||||
import cn.hutool.crypto.digest.DigestUtil;
|
import cn.hutool.crypto.digest.DigestUtil;
|
||||||
import cn.hutool.log.Log;
|
|
||||||
import cn.hutool.log.LogFactory;
|
|
||||||
import com.zdjizhi.pojo.FileChunk;
|
|
||||||
import org.apache.flink.metrics.Counter;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.StreamSupport;
|
|
||||||
|
|
||||||
import static com.zdjizhi.utils.PublicConstants.COMBINE_MODE_SEEK;
|
|
||||||
|
|
||||||
public class PublicUtil {
|
public class PublicUtil {
|
||||||
private static final Log LOG = LogFactory.get();
|
|
||||||
|
|
||||||
public static List<FileChunk> combine(Iterable<FileChunk> input, long keyMaxChunk, Counter duplicateChunkCounter, Counter combineErrorCounter, Counter seekChunkCounter, Counter appendChunkCounter) {
|
|
||||||
List<FileChunk> combinedFileChunkList = new ArrayList<>();
|
|
||||||
try {
|
|
||||||
List<FileChunk> originalFileChunkList = StreamSupport.stream(input.spliterator(), false).collect(Collectors.toList());
|
|
||||||
List<byte[]> waitingToCombineChunkList = new ArrayList<>();
|
|
||||||
if (COMBINE_MODE_SEEK.equals(originalFileChunkList.get(0).getCombineMode())) {
|
|
||||||
seekChunkCounter.inc();
|
|
||||||
// 按照offset排序
|
|
||||||
originalFileChunkList.sort(Comparator.comparingLong(FileChunk::getOffset));
|
|
||||||
Iterator<FileChunk> originalFileChunkIterator = originalFileChunkList.iterator();
|
|
||||||
if (originalFileChunkIterator.hasNext()) {
|
|
||||||
int duplicateCount = 0;
|
|
||||||
FileChunk currentFileChunk = originalFileChunkIterator.next();
|
|
||||||
int lastChunkFlag = currentFileChunk.getLastChunkFlag();
|
|
||||||
long startOffset = currentFileChunk.getOffset();
|
|
||||||
if (currentFileChunk.getChunk() != null && currentFileChunk.getChunk().length > 0) {
|
|
||||||
waitingToCombineChunkList.add(currentFileChunk.getChunk());
|
|
||||||
}
|
|
||||||
while (originalFileChunkIterator.hasNext()) {
|
|
||||||
seekChunkCounter.inc();
|
|
||||||
long expectedOffset = currentFileChunk.getOffset() + currentFileChunk.getLength();
|
|
||||||
currentFileChunk = originalFileChunkIterator.next();
|
|
||||||
long actualOffset = currentFileChunk.getOffset();
|
|
||||||
if (expectedOffset > actualOffset) {// 期望offset大于当前offset,该块为重复块,跳过该块
|
|
||||||
duplicateCount++;
|
|
||||||
} else if (expectedOffset == actualOffset) {// 期望offset等于当前offset,将该块添加到待合并集合中
|
|
||||||
if (currentFileChunk.getLastChunkFlag() == 1) {
|
|
||||||
lastChunkFlag = currentFileChunk.getLastChunkFlag();
|
|
||||||
}
|
|
||||||
if (currentFileChunk.getChunk() != null && currentFileChunk.getChunk().length > 0) {
|
|
||||||
waitingToCombineChunkList.add(currentFileChunk.getChunk());
|
|
||||||
}
|
|
||||||
} else {// 期望offset小于当前offset,说明缺块
|
|
||||||
if (waitingToCombineChunkList.size() > 0) {//将可合并的chunk合并,清空集合
|
|
||||||
combinedFileChunkList.add(combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), 0, null));
|
|
||||||
waitingToCombineChunkList.clear();
|
|
||||||
} else {
|
|
||||||
if (lastChunkFlag == 1) {
|
|
||||||
combinedFileChunkList.add(currentFileChunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// 将当前块作为第一个块,继续合并
|
|
||||||
startOffset = currentFileChunk.getOffset();// 重置起始offset
|
|
||||||
lastChunkFlag = currentFileChunk.getLastChunkFlag();
|
|
||||||
if (currentFileChunk.getChunk() != null && currentFileChunk.getChunk().length > 0) {
|
|
||||||
waitingToCombineChunkList.add(currentFileChunk.getChunk());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (waitingToCombineChunkList.size() > 0) {
|
|
||||||
combinedFileChunkList.add(combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), 0, null));
|
|
||||||
} else {
|
|
||||||
if (lastChunkFlag == 1) {
|
|
||||||
combinedFileChunkList.add(currentFileChunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (duplicateCount > 0) {
|
|
||||||
LOG.error("Duplicate upload chunk, uuid=" + currentFileChunk.getUuid() + ", repetitionCount=" + duplicateCount);
|
|
||||||
duplicateChunkCounter.inc(duplicateCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// 按timestamp排序
|
|
||||||
originalFileChunkList.sort(Comparator.comparingLong(FileChunk::getTimestamp));
|
|
||||||
long startTimestamp = originalFileChunkList.get(0).getTimestamp();
|
|
||||||
StringBuilder timestampAndSizes = new StringBuilder();
|
|
||||||
for (FileChunk originalFileChunk : originalFileChunkList) {
|
|
||||||
appendChunkCounter.inc();
|
|
||||||
byte[] chunk = originalFileChunk.getChunk();
|
|
||||||
if (chunk != null && chunk.length > 0) {
|
|
||||||
chunk = originalFileChunk.getChunk();
|
|
||||||
waitingToCombineChunkList.add(chunk);
|
|
||||||
timestampAndSizes.append(originalFileChunk.getTimestamp()).append("-").append(chunk.length).append(";");
|
|
||||||
}
|
|
||||||
if (waitingToCombineChunkList.size() > keyMaxChunk) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (waitingToCombineChunkList.size() > 0) {
|
|
||||||
combinedFileChunkList.add(combineChunk(waitingToCombineChunkList, originalFileChunkList.get(0).getUuid(), originalFileChunkList.get(0).getFileName(), originalFileChunkList.get(0).getFileType(), 0, "append", 0, originalFileChunkList.get(0).getMeta(), startTimestamp, timestampAndSizes.toString()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.error("Combiner file error.", e);
|
|
||||||
combineErrorCounter.inc();
|
|
||||||
}
|
|
||||||
return combinedFileChunkList;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static FileChunk combineChunk(List<byte[]> byteList, String uuid, String fileName, String fileType, long offset, String combineMode, int lastChunkFlag, Map<String, Object> metaMap, long startTimestamp, String chunkNumbers) {
|
|
||||||
FileChunk fileChunk = new FileChunk();
|
|
||||||
fileChunk.setChunkCount(byteList.size());
|
|
||||||
byte[][] bytes = new byte[byteList.size()][];
|
|
||||||
byteList.toArray(bytes);
|
|
||||||
byte[] newData = ArrayUtil.addAll(bytes);
|
|
||||||
if (COMBINE_MODE_SEEK.equals(combineMode)) {
|
|
||||||
fileChunk.setOffset(offset);
|
|
||||||
fileChunk.setLastChunkFlag(lastChunkFlag);
|
|
||||||
} else {
|
|
||||||
if (StringUtil.isNotEmpty(chunkNumbers)) {
|
|
||||||
fileChunk.setChunkNumbers(chunkNumbers);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fileChunk.setTimestamp(startTimestamp);
|
|
||||||
fileChunk.setFileType(fileType);
|
|
||||||
fileChunk.setUuid(uuid);
|
|
||||||
fileChunk.setChunk(newData);
|
|
||||||
fileChunk.setFileName(fileName);
|
|
||||||
fileChunk.setCombineMode(combineMode);
|
|
||||||
fileChunk.setLength(newData.length);
|
|
||||||
fileChunk.setMeta(metaMap);
|
|
||||||
return fileChunk;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String getUUID() {
|
public static String getUUID() {
|
||||||
return UUID.randomUUID().toString().replace("-", "").toLowerCase();
|
return UUID.randomUUID().toString().replace("-", "").toLowerCase();
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ source.kafka.user=admin
|
|||||||
source.kafka.pin=galaxy2019
|
source.kafka.pin=galaxy2019
|
||||||
#SSL<53><4C>Ҫ
|
#SSL<53><4C>Ҫ
|
||||||
source.kafka.tools.library=D:\\K18-Phase2\\tsgSpace\\dat\\tsg\\
|
source.kafka.tools.library=D:\\K18-Phase2\\tsgSpace\\dat\\tsg\\
|
||||||
|
map.enable.rate.limit=false
|
||||||
|
map.rate.limit.threshold=10000
|
||||||
#<23><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
#<23><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||||
combiner.window.parallelism=2
|
combiner.window.parallelism=2
|
||||||
combiner.window.time=10
|
combiner.window.time=10
|
||||||
@@ -31,14 +33,14 @@ file.max.size=1073741824
|
|||||||
sink.parallelism=2
|
sink.parallelism=2
|
||||||
sink.type=hos
|
sink.type=hos
|
||||||
sink.async=false
|
sink.async=false
|
||||||
sink.batch=true
|
sink.batch=false
|
||||||
sink.batch.count=100
|
sink.batch.count=100
|
||||||
sink.batch.size=102400
|
sink.batch.size=102400
|
||||||
#hos sink<6E><6B><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
#hos sink<6E><6B><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||||
#0<><30><EFBFBD><EFBFBD>nginx<6E><78>1<EFBFBD><31>ѯ<EFBFBD><D1AF><EFBFBD><EFBFBD>hos<6F><73>Ĭ<EFBFBD><C4AC>0
|
#0<><30><EFBFBD><EFBFBD>nginx<6E><78>1<EFBFBD><31>ѯ<EFBFBD><D1AF><EFBFBD><EFBFBD>hos<6F><73>Ĭ<EFBFBD><C4AC>0
|
||||||
sink.hos.load.balance.mode=1
|
sink.hos.load.balance.mode=1
|
||||||
#<23><><EFBFBD><EFBFBD>nginx<6E><EFBFBD>hos<6F><73><EFBFBD><EFBFBD>Ϊip:port<72><74><EFBFBD><EFBFBD><EFBFBD>ʶ<EFBFBD><CAB6><EFBFBD>hos<6F><73><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊip1,ip2:port1,port2
|
#<23><><EFBFBD><EFBFBD>nginx<6E><EFBFBD>hos<6F><73><EFBFBD><EFBFBD>Ϊip:port<72><74><EFBFBD><EFBFBD><EFBFBD>ʶ<EFBFBD><CAB6><EFBFBD>hos<6F><73><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊip1,ip2:port1,port2
|
||||||
sink.hos.endpoint=192.168.40.151,192.168.40.152,192.168.40.203:8186
|
sink.hos.endpoint=192.168.41.29:8186
|
||||||
sink.hos.bucket=traffic_file_bucket
|
sink.hos.bucket=traffic_file_bucket
|
||||||
sink.hos.token=c21f969b5f03d33d43e04f8f136e7682
|
sink.hos.token=c21f969b5f03d33d43e04f8f136e7682
|
||||||
sink.hos.http.error.retry=3
|
sink.hos.http.error.retry=3
|
||||||
|
|||||||
@@ -4,36 +4,29 @@ import cn.hutool.core.io.FileUtil;
|
|||||||
import cn.hutool.core.util.ArrayUtil;
|
import cn.hutool.core.util.ArrayUtil;
|
||||||
import cn.hutool.core.util.RandomUtil;
|
import cn.hutool.core.util.RandomUtil;
|
||||||
import com.zdjizhi.config.Configs;
|
import com.zdjizhi.config.Configs;
|
||||||
import com.zdjizhi.function.CombineChunkProcessWindowFunction;
|
import com.zdjizhi.function.*;
|
||||||
import com.zdjizhi.function.FileChunkFilterFunction;
|
|
||||||
import com.zdjizhi.function.FileChunkKeySelector;
|
|
||||||
import com.zdjizhi.function.ParseMessagePackMapFunction;
|
|
||||||
import com.zdjizhi.pojo.FileChunk;
|
import com.zdjizhi.pojo.FileChunk;
|
||||||
import com.zdjizhi.sink.HBaseSink;
|
import com.zdjizhi.sink.HBaseSink;
|
||||||
import com.zdjizhi.sink.HosSink;
|
import com.zdjizhi.sink.HosSink;
|
||||||
import com.zdjizhi.trigger.LastChunkOrNoDataInTimeTrigger;
|
import com.zdjizhi.trigger.LastChunkOrNoDataInTimeTrigger;
|
||||||
import com.zdjizhi.trigger.MultipleTrigger;
|
import com.zdjizhi.trigger.MultipleTrigger;
|
||||||
import com.zdjizhi.utils.PublicUtil;
|
|
||||||
import org.apache.flink.api.common.ExecutionConfig;
|
import org.apache.flink.api.common.ExecutionConfig;
|
||||||
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
||||||
import org.apache.flink.api.common.functions.FilterFunction;
|
|
||||||
import org.apache.flink.api.common.functions.RuntimeContext;
|
import org.apache.flink.api.common.functions.RuntimeContext;
|
||||||
import org.apache.flink.api.common.state.ListStateDescriptor;
|
|
||||||
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
|
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
|
||||||
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
||||||
import org.apache.flink.api.common.typeutils.base.ListSerializer;
|
|
||||||
import org.apache.flink.api.java.typeutils.PojoTypeInfo;
|
import org.apache.flink.api.java.typeutils.PojoTypeInfo;
|
||||||
import org.apache.flink.api.java.utils.ParameterTool;
|
import org.apache.flink.api.java.utils.ParameterTool;
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
import org.apache.flink.metrics.Counter;
|
|
||||||
import org.apache.flink.metrics.SimpleCounter;
|
|
||||||
import org.apache.flink.runtime.state.JavaSerializer;
|
|
||||||
import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration;
|
import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration;
|
||||||
|
import org.apache.flink.streaming.api.datastream.DataStream;
|
||||||
|
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
||||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
|
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
|
||||||
import org.apache.flink.streaming.api.functions.source.SourceFunction;
|
import org.apache.flink.streaming.api.functions.source.SourceFunction;
|
||||||
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
|
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
|
||||||
import org.apache.flink.streaming.api.operators.*;
|
import org.apache.flink.streaming.api.operators.*;
|
||||||
|
import org.apache.flink.streaming.api.transformations.OneInputTransformation;
|
||||||
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
|
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
|
||||||
import org.apache.flink.streaming.api.windowing.time.Time;
|
import org.apache.flink.streaming.api.windowing.time.Time;
|
||||||
import org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger;
|
import org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger;
|
||||||
@@ -49,6 +42,7 @@ import org.apache.flink.streaming.util.TestHarnessUtil;
|
|||||||
import org.apache.flink.streaming.util.functions.StreamingFunctionUtils;
|
import org.apache.flink.streaming.util.functions.StreamingFunctionUtils;
|
||||||
import org.apache.flink.test.util.MiniClusterWithClientResource;
|
import org.apache.flink.test.util.MiniClusterWithClientResource;
|
||||||
import org.apache.flink.util.Collector;
|
import org.apache.flink.util.Collector;
|
||||||
|
import org.apache.flink.util.OutputTag;
|
||||||
import org.junit.*;
|
import org.junit.*;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
import org.mockito.invocation.InvocationOnMock;
|
import org.mockito.invocation.InvocationOnMock;
|
||||||
@@ -60,38 +54,28 @@ import java.util.*;
|
|||||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
|
|
||||||
public class FileChunkCombinerTests {
|
public class FileChunkCombinerTests {
|
||||||
private static Counter duplicateChunkCounter;
|
|
||||||
private static Counter combineErrorCounter;
|
|
||||||
private static Counter seekChunkCounter;
|
|
||||||
private static Counter appendChunkCounter;
|
|
||||||
private File emlFile;
|
private File emlFile;
|
||||||
private byte[] emlFileBytes;
|
private byte[] emlFileBytes;
|
||||||
private byte[] pcapngFileBytes;
|
private byte[] pcapngFileBytes;
|
||||||
private List<FileChunk> inputFileChunks;
|
private List<FileChunk> inputFileChunks;
|
||||||
|
private List<FileChunk> inputFiles;
|
||||||
private List<byte[]> messagePackList;
|
private List<byte[]> messagePackList;
|
||||||
private List<FileChunk> emlFileChunks;
|
private List<FileChunk> emlFileChunks;
|
||||||
private List<FileChunk> pcapngFileChunks;
|
private List<FileChunk> pcapngFileChunks;
|
||||||
private List<FileChunk> pcapngIncludeMetaFileChunks;
|
private List<FileChunk> pcapngIncludeMetaFileChunks;
|
||||||
private Map<String, Object> pcapngFileMeta;
|
private Map<String, Object> pcapngFileMeta;
|
||||||
private String emlUuid = "1111111111";
|
|
||||||
private String pcapngUuid = "2222222222";
|
|
||||||
private String pcapngIncludeMetaUuid = "3333333333";
|
|
||||||
private int emlChunkCount = 10;
|
private int emlChunkCount = 10;
|
||||||
private int pcapngChunkCount = 10;
|
private int pcapngChunkCount = 10;
|
||||||
private long maxChunkCount;
|
|
||||||
private String pcapChunkData = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
|
private String pcapChunkData = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
|
||||||
private static Configuration configuration;
|
private static Configuration configuration;
|
||||||
|
private CombineChunkProcessWindowFunction processWindowFunction;
|
||||||
|
private OutputTag<FileChunk> delayedChunkOutputTag;
|
||||||
|
private KeyedOneInputStreamOperatorTestHarness<String, FileChunk, FileChunk> testHarness;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void testBefore() throws Exception {
|
public void testBefore() throws Exception {
|
||||||
String path = FileChunkCombinerTests.class.getClassLoader().getResource("common.properties").getPath();
|
ParameterTool parameterTool = ParameterTool.fromPropertiesFile(FileChunkCombinerTests.class.getClassLoader().getResource("common.properties").getPath());
|
||||||
ParameterTool parameterTool = ParameterTool.fromPropertiesFile(path);
|
|
||||||
configuration = parameterTool.getConfiguration();
|
configuration = parameterTool.getConfiguration();
|
||||||
duplicateChunkCounter = new SimpleCounter();
|
|
||||||
combineErrorCounter = new SimpleCounter();
|
|
||||||
seekChunkCounter = new SimpleCounter();
|
|
||||||
appendChunkCounter = new SimpleCounter();
|
|
||||||
maxChunkCount = configuration.get(Configs.FILE_MAX_CHUNK_COUNT);
|
|
||||||
String filePath = "src" + File.separator + "test" + File.separator + "data" + File.separator + "test.eml";
|
String filePath = "src" + File.separator + "test" + File.separator + "data" + File.separator + "test.eml";
|
||||||
emlFile = new File(filePath);
|
emlFile = new File(filePath);
|
||||||
emlFileBytes = FileUtil.readBytes(emlFile);
|
emlFileBytes = FileUtil.readBytes(emlFile);
|
||||||
@@ -104,38 +88,68 @@ public class FileChunkCombinerTests {
|
|||||||
pcapngFileMeta.put("ruleId", 151);
|
pcapngFileMeta.put("ruleId", 151);
|
||||||
pcapngFileMeta.put("taskId", 7477);
|
pcapngFileMeta.put("taskId", 7477);
|
||||||
pcapngFileMeta.put("sledIP", "127.0.0.1");
|
pcapngFileMeta.put("sledIP", "127.0.0.1");
|
||||||
inputFileChunks = new ArrayList<>();
|
|
||||||
emlFileChunks = new ArrayList<>();
|
emlFileChunks = new ArrayList<>();
|
||||||
pcapngFileChunks = new ArrayList<>();
|
pcapngFileChunks = new ArrayList<>();
|
||||||
pcapngIncludeMetaFileChunks = new ArrayList<>();
|
pcapngIncludeMetaFileChunks = new ArrayList<>();
|
||||||
ParseMessagePackMapFunction mapFunction = new ParseMessagePackMapFunction();
|
ObjectInputStream messagePacksInputStream = new ObjectInputStream(new FileInputStream("src" + File.separator + "test" + File.separator + "data" + File.separator + "messagePacks"));
|
||||||
ObjectInputStream inputStream = new ObjectInputStream(new FileInputStream("src" + File.separator + "test" + File.separator + "data" + File.separator + "messagePacks"));
|
messagePackList = (List<byte[]>) messagePacksInputStream.readObject();
|
||||||
messagePackList = (List<byte[]>) inputStream.readObject();
|
messagePacksInputStream.close();
|
||||||
for (byte[] messagePack : messagePackList) {
|
ObjectInputStream fileChunksInputStream = new ObjectInputStream(new FileInputStream("src" + File.separator + "test" + File.separator + "data" + File.separator + "fileChunks"));
|
||||||
FileChunk fileChunk = mapFunction.map(messagePack);
|
inputFileChunks = (List<FileChunk>) fileChunksInputStream.readObject();
|
||||||
inputFileChunks.add(fileChunk);
|
fileChunksInputStream.close();
|
||||||
}
|
ObjectInputStream filesInputStream = new ObjectInputStream(new FileInputStream("src" + File.separator + "test" + File.separator + "data" + File.separator + "files"));
|
||||||
|
inputFiles = (List<FileChunk>) filesInputStream.readObject();
|
||||||
|
filesInputStream.close();
|
||||||
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
List<Trigger<Object, TimeWindow>> triggers = new ArrayList<>();
|
||||||
|
triggers.add(EventTimeTrigger.create());
|
||||||
|
triggers.add(LastChunkOrNoDataInTimeTrigger.of(1000));
|
||||||
|
Trigger<Object, TimeWindow> trigger = MultipleTrigger.of(triggers);
|
||||||
|
processWindowFunction = new CombineChunkProcessWindowFunction(Integer.MAX_VALUE);
|
||||||
|
delayedChunkOutputTag = new OutputTag<>("delayed-chunk") {};
|
||||||
|
DataStreamSource<FileChunk> source = env.fromCollection(inputFileChunks);
|
||||||
|
DataStream<FileChunk> window = source
|
||||||
|
.keyBy(new FileChunkKeySelector())
|
||||||
|
.window(TumblingEventTimeWindows.of(Time.seconds(3)))
|
||||||
|
.trigger(trigger)
|
||||||
|
.sideOutputLateData(delayedChunkOutputTag)
|
||||||
|
.process(processWindowFunction);
|
||||||
|
OneInputTransformation<FileChunk, FileChunk> transform = (OneInputTransformation<FileChunk, FileChunk>) window.getTransformation();
|
||||||
|
OneInputStreamOperator<FileChunk, FileChunk> operator = transform.getOperator();
|
||||||
|
WindowOperator<String, FileChunk, FileChunk, FileChunk, TimeWindow> winOperator = (WindowOperator<String, FileChunk, FileChunk, FileChunk, TimeWindow>) operator;
|
||||||
|
testHarness = new KeyedOneInputStreamOperatorTestHarness<>(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseMessagePackMapFunction() throws Exception {
|
public void testParseMessagePackMapFunction() throws Exception {
|
||||||
OneInputStreamOperatorTestHarness<byte[], FileChunk> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamMap<>(new ParseMessagePackMapFunction()));
|
ParseMessagePackMapFunction mapFunction = new ParseMessagePackMapFunction(false, Long.MAX_VALUE);
|
||||||
|
OneInputStreamOperatorTestHarness<byte[], FileChunk> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamMap<>(mapFunction));
|
||||||
testHarness.setup();
|
testHarness.setup();
|
||||||
testHarness.open();
|
testHarness.open();
|
||||||
for (byte[] messagePack : messagePackList) {
|
for (byte[] messagePack : messagePackList) {
|
||||||
testHarness.processElement(new StreamRecord<>(messagePack));
|
testHarness.processElement(new StreamRecord<>(messagePack));
|
||||||
}
|
}
|
||||||
ConcurrentLinkedQueue<Object> output = testHarness.getOutput();
|
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
|
||||||
Assert.assertEquals(30, output.size());
|
for (FileChunk fileChunk : inputFileChunks) {
|
||||||
for (Object o : output) {
|
expectedOutput.add(new StreamRecord<>(fileChunk));
|
||||||
FileChunk fileChunk = ((StreamRecord<FileChunk>) o).getValue();
|
|
||||||
Assert.assertNotEquals("解析MessagePack数据失败", null, fileChunk.getUuid());
|
|
||||||
}
|
}
|
||||||
|
ConcurrentLinkedQueue<Object> actualOutput = testHarness.getOutput();
|
||||||
|
Assert.assertEquals(30, actualOutput.size());
|
||||||
|
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, actualOutput, (o1, o2) -> {
|
||||||
|
StreamRecord sr0 = (StreamRecord) o1;
|
||||||
|
StreamRecord sr1 = (StreamRecord) o2;
|
||||||
|
return ((FileChunk) sr0.getValue()).getUuid().compareTo(((FileChunk) sr1.getValue()).getUuid());
|
||||||
|
});
|
||||||
|
Assert.assertEquals(30, mapFunction.parseMessagePackCounter.getCount());
|
||||||
|
Assert.assertEquals(0, mapFunction.parseMessagePackErrorCounter.getCount());
|
||||||
|
Assert.assertEquals(0, mapFunction.rateLimitDropCounter.getCount());
|
||||||
|
testHarness.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFileChunkFilterFunction() throws Exception {
|
public void testFileChunkFilterFunction() throws Exception {
|
||||||
StreamFilter<FileChunk> fileChunkStreamFilter = new StreamFilter<>(new FileChunkFilterFunction(100000, "FileChunk.fileType == \"eml\""));
|
FileChunkFilterFunction fileChunkFilterFunction = new FileChunkFilterFunction(Long.MAX_VALUE, "FileChunk.fileType == \"eml\"");
|
||||||
|
StreamFilter<FileChunk> fileChunkStreamFilter = new StreamFilter<>(fileChunkFilterFunction);
|
||||||
OneInputStreamOperatorTestHarness<FileChunk, FileChunk> testHarness = new OneInputStreamOperatorTestHarness<>(fileChunkStreamFilter);
|
OneInputStreamOperatorTestHarness<FileChunk, FileChunk> testHarness = new OneInputStreamOperatorTestHarness<>(fileChunkStreamFilter);
|
||||||
testHarness.setup();
|
testHarness.setup();
|
||||||
testHarness.open();
|
testHarness.open();
|
||||||
@@ -153,83 +167,120 @@ public class FileChunkCombinerTests {
|
|||||||
public int compare(Object o1, Object o2) {
|
public int compare(Object o1, Object o2) {
|
||||||
StreamRecord sr0 = (StreamRecord) o1;
|
StreamRecord sr0 = (StreamRecord) o1;
|
||||||
StreamRecord sr1 = (StreamRecord) o2;
|
StreamRecord sr1 = (StreamRecord) o2;
|
||||||
return Long.compare(((FileChunk) sr0.getValue()).getOffset(), ((FileChunk) sr1.getValue()).getOffset());
|
return ((FileChunk) sr0.getValue()).getUuid().compareTo(((FileChunk) sr1.getValue()).getUuid());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
Assert.assertEquals(20, fileChunkFilterFunction.filterChunkCounter.getCount());
|
||||||
testHarness.close();
|
testHarness.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCombineChunkProcessWindowFunction() throws Exception {
|
public void testCombineChunkProcessWindowFunction() throws Exception {
|
||||||
//seek模式
|
|
||||||
ListStateDescriptor listStateDescriptor = new ListStateDescriptor<FileChunk>("test-seek-window", new ListSerializer(new JavaSerializer()));
|
|
||||||
List<Trigger<Object, TimeWindow>> triggers = new ArrayList<>();
|
|
||||||
triggers.add(EventTimeTrigger.create());
|
|
||||||
triggers.add(LastChunkOrNoDataInTimeTrigger.of(1000));
|
|
||||||
Trigger<Object, TimeWindow> trigger = MultipleTrigger.of(triggers);
|
|
||||||
CombineChunkProcessWindowFunction processWindowFunction = new CombineChunkProcessWindowFunction(configuration);
|
|
||||||
WindowOperator<String, FileChunk, FileChunk, FileChunk, TimeWindow> operator = new WindowOperator<String, FileChunk, FileChunk, FileChunk, TimeWindow>(
|
|
||||||
TumblingEventTimeWindows.of(Time.seconds(3)),
|
|
||||||
new TimeWindow.Serializer(),
|
|
||||||
new FileChunkKeySelector(),
|
|
||||||
BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig()),
|
|
||||||
listStateDescriptor,
|
|
||||||
new InternalIterableProcessWindowFunction(processWindowFunction),
|
|
||||||
trigger,
|
|
||||||
0L, null);
|
|
||||||
ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
|
|
||||||
expectedOutput.add(new StreamRecord<>(PublicUtil.combine(inputFileChunks.subList(0, 10), maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter).get(0), 2999L));
|
|
||||||
KeyedOneInputStreamOperatorTestHarness<String, FileChunk, FileChunk> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, new FileChunkKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
|
|
||||||
testHarness.setup();
|
|
||||||
testHarness.open();
|
testHarness.open();
|
||||||
for (FileChunk fileChunk : inputFileChunks.subList(0, 10)) {
|
testHarness.setProcessingTime(0L);
|
||||||
testHarness.processElement(fileChunk, 1000L);
|
testHarness.processWatermark(-9223372036854775808L);
|
||||||
|
for (FileChunk inputFileChunk : inputFileChunks) {
|
||||||
|
testHarness.processElement(new StreamRecord<>(inputFileChunk, inputFileChunk.getTimestamp() / 1000));
|
||||||
}
|
}
|
||||||
Assert.assertEquals(10, processWindowFunction.seekChunkCounter.getCount());
|
testHarness.setProcessingTime(9223372036854775807L);
|
||||||
Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount());
|
testHarness.processWatermark(9223372036854775807L);
|
||||||
ConcurrentLinkedQueue<Object> actualOutput = testHarness.getOutput();
|
|
||||||
Assert.assertEquals(1, actualOutput.size());
|
|
||||||
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, actualOutput, (o1, o2) -> {
|
|
||||||
StreamRecord sr0 = (StreamRecord) o1;
|
|
||||||
StreamRecord sr1 = (StreamRecord) o2;
|
|
||||||
return Long.compare(((FileChunk) sr0.getValue()).getOffset(), ((FileChunk) sr1.getValue()).getOffset());
|
|
||||||
});
|
|
||||||
testHarness.close();
|
testHarness.close();
|
||||||
//append模式
|
List<Object> expectedOutput = new ArrayList<>(inputFiles);
|
||||||
triggers = new ArrayList<>();
|
List<Object> actualOutput = new ArrayList<>(testHarness.extractOutputValues());
|
||||||
triggers.add(EventTimeTrigger.create());
|
Assert.assertEquals(3, actualOutput.size());
|
||||||
triggers.add(LastChunkOrNoDataInTimeTrigger.of(1000));
|
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, actualOutput, Comparator.comparing(o -> ((FileChunk) o).getUuid()));
|
||||||
trigger = MultipleTrigger.of(triggers);
|
Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount());
|
||||||
listStateDescriptor = new ListStateDescriptor<FileChunk>("test-append-window", new ListSerializer(new JavaSerializer()));
|
Assert.assertEquals(0, processWindowFunction.duplicateChunkCounter.getCount());
|
||||||
processWindowFunction = new CombineChunkProcessWindowFunction(configuration);
|
testHarness.close();
|
||||||
operator = new WindowOperator<String, FileChunk, FileChunk, FileChunk, TimeWindow>(
|
}
|
||||||
TumblingEventTimeWindows.of(Time.seconds(3)),
|
|
||||||
new TimeWindow.Serializer(),
|
@Test
|
||||||
new FileChunkKeySelector(),
|
public void testCombineChunkProcessWindowFunctionByOutputTag() throws Exception {
|
||||||
BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig()),
|
|
||||||
listStateDescriptor,
|
|
||||||
new InternalIterableProcessWindowFunction(processWindowFunction),
|
|
||||||
trigger,
|
|
||||||
0L, null);
|
|
||||||
expectedOutput = new ConcurrentLinkedQueue<>();
|
|
||||||
expectedOutput.add(new StreamRecord<>(PublicUtil.combine(inputFileChunks.subList(10, 20), maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter).get(0), 2999L));
|
|
||||||
expectedOutput.add(new StreamRecord<>(PublicUtil.combine(inputFileChunks.subList(20, inputFileChunks.size()), maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter).get(0), 2999L));
|
|
||||||
testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, new FileChunkKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
|
|
||||||
testHarness.setup();
|
|
||||||
testHarness.open();
|
testHarness.open();
|
||||||
for (FileChunk fileChunk : inputFileChunks.subList(10, inputFileChunks.size())) {
|
categorizeChunks(inputFileChunks);
|
||||||
testHarness.processElement(fileChunk, 1000L);
|
long timestamp = 0L;
|
||||||
|
for (FileChunk fileChunk : emlFileChunks) {
|
||||||
|
testHarness.processElement(fileChunk, timestamp += 10);
|
||||||
|
}
|
||||||
|
for (FileChunk fileChunk : pcapngFileChunks) {
|
||||||
|
testHarness.processElement(fileChunk, timestamp += 10);
|
||||||
|
}
|
||||||
|
testHarness.processWatermark(3000L);
|
||||||
|
for (FileChunk fileChunk : pcapngIncludeMetaFileChunks) {
|
||||||
|
testHarness.processElement(fileChunk, timestamp += 10);
|
||||||
|
}
|
||||||
|
List<Object> expectedOutput = new ArrayList<>();
|
||||||
|
expectedOutput.add(inputFiles.get(0));
|
||||||
|
expectedOutput.add(inputFiles.get(1));
|
||||||
|
List<Object> actualOutput = new ArrayList<>(testHarness.extractOutputValues());
|
||||||
|
Assert.assertEquals(2, actualOutput.size());
|
||||||
|
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, actualOutput, Comparator.comparing(o -> ((FileChunk) o).getUuid()));
|
||||||
|
ConcurrentLinkedQueue<StreamRecord<FileChunk>> sideOutput = testHarness.getSideOutput(delayedChunkOutputTag);
|
||||||
|
List<Object> expectedSideOutput = new ArrayList<>(pcapngIncludeMetaFileChunks);
|
||||||
|
List<Object> actualSideOutput = new ArrayList<>();
|
||||||
|
for (StreamRecord<FileChunk> streamRecord : sideOutput) {
|
||||||
|
actualSideOutput.add(streamRecord.getValue());
|
||||||
|
}
|
||||||
|
Assert.assertEquals(10, sideOutput.size());
|
||||||
|
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedSideOutput, actualSideOutput, Comparator.comparing(o -> ((FileChunk) o).getUuid()));
|
||||||
|
Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount());
|
||||||
|
Assert.assertEquals(0, processWindowFunction.duplicateChunkCounter.getCount());
|
||||||
|
testHarness.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCombineChunkProcessWindowFunctionByDuplicateChunk() throws Exception {
|
||||||
|
testHarness.open();
|
||||||
|
categorizeChunks(inputFileChunks);
|
||||||
|
pcapngFileChunks.add(pcapngFileChunks.get(5));
|
||||||
|
pcapngIncludeMetaFileChunks.add(pcapngIncludeMetaFileChunks.get(5));
|
||||||
|
long timestamp = 0L;
|
||||||
|
testHarness.processElement(emlFileChunks.get(5), timestamp + 100);
|
||||||
|
for (FileChunk fileChunk : emlFileChunks) {
|
||||||
|
testHarness.processElement(fileChunk, timestamp += 10);
|
||||||
|
}
|
||||||
|
for (FileChunk fileChunk : pcapngFileChunks) {
|
||||||
|
testHarness.processElement(fileChunk, timestamp += 10);
|
||||||
|
}
|
||||||
|
for (FileChunk fileChunk : pcapngIncludeMetaFileChunks) {
|
||||||
|
testHarness.processElement(fileChunk, timestamp += 10);
|
||||||
}
|
}
|
||||||
testHarness.setProcessingTime(5000L);
|
testHarness.setProcessingTime(5000L);
|
||||||
Assert.assertEquals(20, processWindowFunction.appendChunkCounter.getCount());
|
List<FileChunk> actualOutput = testHarness.extractOutputValues();
|
||||||
|
Assert.assertEquals(3, actualOutput.size());
|
||||||
|
Assert.assertEquals(inputFiles.get(0), actualOutput.get(0));
|
||||||
|
Assert.assertEquals(inputFiles.get(1).getChunk().length + pcapngFileChunks.get(5).getChunk().length, actualOutput.get(1).getChunk().length);
|
||||||
|
Assert.assertEquals(inputFiles.get(2).getChunk().length + pcapngIncludeMetaFileChunks.get(5).getChunk().length, actualOutput.get(2).getChunk().length);
|
||||||
Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount());
|
Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount());
|
||||||
actualOutput = testHarness.getOutput();
|
Assert.assertEquals(1, processWindowFunction.duplicateChunkCounter.getCount());
|
||||||
Assert.assertEquals(2, actualOutput.size());
|
testHarness.close();
|
||||||
TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, actualOutput, (o1, o2) -> {
|
}
|
||||||
StreamRecord sr0 = (StreamRecord) o1;
|
|
||||||
StreamRecord sr1 = (StreamRecord) o2;
|
@Test
|
||||||
return ((FileChunk) sr0.getValue()).getUuid().compareTo(((FileChunk) sr1.getValue()).getUuid());
|
public void testCombineChunkProcessWindowFunctionByLostChunk() throws Exception {
|
||||||
});
|
testHarness.open();
|
||||||
|
categorizeChunks(inputFileChunks);
|
||||||
|
emlFileChunks.remove(emlFileChunks.get(5));
|
||||||
|
pcapngFileChunks.remove(pcapngFileChunks.get(5));
|
||||||
|
pcapngIncludeMetaFileChunks.remove(pcapngIncludeMetaFileChunks.get(5));
|
||||||
|
long timestamp = 0L;
|
||||||
|
for (FileChunk fileChunk : emlFileChunks) {
|
||||||
|
testHarness.processElement(fileChunk, timestamp += 10);
|
||||||
|
}
|
||||||
|
for (FileChunk fileChunk : pcapngFileChunks) {
|
||||||
|
testHarness.processElement(fileChunk, timestamp += 10);
|
||||||
|
}
|
||||||
|
for (FileChunk fileChunk : pcapngIncludeMetaFileChunks) {
|
||||||
|
testHarness.processElement(fileChunk, timestamp += 10);
|
||||||
|
}
|
||||||
|
testHarness.setProcessingTime(5000L);
|
||||||
|
List<FileChunk> actualOutput = testHarness.extractOutputValues();
|
||||||
|
Assert.assertEquals(4, actualOutput.size());
|
||||||
|
Assert.assertEquals(inputFiles.get(0).getChunk().length - emlFileChunks.get(5).getChunk().length, actualOutput.get(0).getChunk().length + actualOutput.get(1).getChunk().length);
|
||||||
|
Assert.assertEquals(inputFiles.get(1).getChunk().length - pcapngFileChunks.get(5).getChunk().length, actualOutput.get(2).getChunk().length);
|
||||||
|
Assert.assertEquals(inputFiles.get(2).getChunk().length - pcapngIncludeMetaFileChunks.get(5).getChunk().length, actualOutput.get(3).getChunk().length);
|
||||||
|
Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount());
|
||||||
|
Assert.assertEquals(0, processWindowFunction.duplicateChunkCounter.getCount());
|
||||||
testHarness.close();
|
testHarness.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -244,20 +295,22 @@ public class FileChunkCombinerTests {
|
|||||||
testHarness.setup();
|
testHarness.setup();
|
||||||
testHarness.open();
|
testHarness.open();
|
||||||
byte[] data = RandomUtil.randomString(1000).getBytes();
|
byte[] data = RandomUtil.randomString(1000).getBytes();
|
||||||
//seek模式
|
//seek文件
|
||||||
FileChunk fileChunk = new FileChunk("0000000001", "eml", 0, data.length, data, "seek", 1, 5, System.currentTimeMillis());
|
FileChunk fileChunk = new FileChunk("0000000001", "eml", 0, data.length, data, "seek", 1, 5, System.currentTimeMillis());
|
||||||
testHarness.processElement(new StreamRecord<>(fileChunk));
|
testHarness.processElement(new StreamRecord<>(fileChunk));
|
||||||
Assert.assertEquals("上传文件到hos错误", 0, hosSink.sendHosErrorCounter.getCount());
|
Assert.assertEquals(1, hosSink.sendHosCounter.getCount());
|
||||||
Assert.assertEquals("上传文件到hos失败", 1, hosSink.sendHosCounter.getCount());
|
Assert.assertEquals(0, hosSink.sendHosErrorCounter.getCount());
|
||||||
Assert.assertEquals(1, hosSink.sendHosFileCounter.getCount());
|
Assert.assertEquals(1, hosSink.sendHosFileCounter.getCount());
|
||||||
Assert.assertEquals(1, hosSink.sendHosChunkCounter.getCount());
|
Assert.assertEquals(1, hosSink.sendHosChunkCounter.getCount());
|
||||||
//append模式
|
//append文件
|
||||||
fileChunk = new FileChunk("0000000002", "pcapng", data.length, data, "append", 5, System.currentTimeMillis(), pcapngFileMeta, "1-200,2-200,3-200,4-200,5-200");
|
fileChunk = new FileChunk("0000000002", "pcapng", data.length, data, "append", 5, System.currentTimeMillis(), pcapngFileMeta, "1-200,2-200,3-200,4-200,5-200");
|
||||||
testHarness.processElement(new StreamRecord<>(fileChunk));
|
testHarness.processElement(new StreamRecord<>(fileChunk));
|
||||||
Assert.assertEquals("上传文件到hos错误", 0, hosSink.sendHosErrorCounter.getCount());
|
Assert.assertEquals(2, hosSink.sendHosCounter.getCount());
|
||||||
Assert.assertEquals("上传文件到hos次数错误", 2, hosSink.sendHosCounter.getCount());
|
Assert.assertEquals(0, hosSink.sendHosErrorCounter.getCount());
|
||||||
|
Assert.assertEquals(1, hosSink.sendHosFileCounter.getCount());
|
||||||
Assert.assertEquals(2, hosSink.sendHosChunkCounter.getCount());
|
Assert.assertEquals(2, hosSink.sendHosChunkCounter.getCount());
|
||||||
testHarness.close();
|
testHarness.close();
|
||||||
|
|
||||||
//测试批量上传
|
//测试批量上传
|
||||||
configuration.setString(Configs.SINK_TYPE, "hos");
|
configuration.setString(Configs.SINK_TYPE, "hos");
|
||||||
configuration.setBoolean(Configs.SINK_BATCH, true);
|
configuration.setBoolean(Configs.SINK_BATCH, true);
|
||||||
@@ -271,8 +324,8 @@ public class FileChunkCombinerTests {
|
|||||||
testHarness.processElement(new StreamRecord<>(fileChunk));
|
testHarness.processElement(new StreamRecord<>(fileChunk));
|
||||||
fileChunk = new FileChunk("0000000002", "eml", 0, data.length, data, "seek", 1, 5, System.currentTimeMillis());
|
fileChunk = new FileChunk("0000000002", "eml", 0, data.length, data, "seek", 1, 5, System.currentTimeMillis());
|
||||||
testHarness.processElement(new StreamRecord<>(fileChunk));
|
testHarness.processElement(new StreamRecord<>(fileChunk));
|
||||||
Assert.assertEquals("上传文件到hos错误", 0, hosSink.sendHosErrorCounter.getCount());
|
Assert.assertEquals(1, hosSink.sendHosCounter.getCount());
|
||||||
Assert.assertEquals("上传文件到hos失败", 1, hosSink.sendHosCounter.getCount());
|
Assert.assertEquals(0, hosSink.sendHosErrorCounter.getCount());
|
||||||
Assert.assertEquals(2, hosSink.sendHosFileCounter.getCount());
|
Assert.assertEquals(2, hosSink.sendHosFileCounter.getCount());
|
||||||
Assert.assertEquals(2, hosSink.sendHosChunkCounter.getCount());
|
Assert.assertEquals(2, hosSink.sendHosChunkCounter.getCount());
|
||||||
testHarness.close();
|
testHarness.close();
|
||||||
@@ -300,92 +353,6 @@ public class FileChunkCombinerTests {
|
|||||||
testHarness.close();
|
testHarness.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testCombineFullChunk() {
|
|
||||||
categorizeChunks(inputFileChunks);
|
|
||||||
//测试seek合并模式
|
|
||||||
List<FileChunk> fileChunkList = PublicUtil.combine(emlFileChunks, maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter);
|
|
||||||
Assert.assertEquals("seek模式合并错误", 1, fileChunkList.size());
|
|
||||||
Assert.assertEquals("seek模式合并错误,lastChunkFlag错误", 1, fileChunkList.get(0).getLastChunkFlag());
|
|
||||||
Assert.assertEquals("seek模式合并错误,chunkCount错误", emlChunkCount - 1, fileChunkList.get(0).getChunkCount());
|
|
||||||
Assert.assertEquals("seek模式合并错误,文件长度错误", emlFileBytes.length, fileChunkList.get(0).getChunk().length);
|
|
||||||
Assert.assertEquals("seek模式合并错误,文件内容错误", new String(emlFileBytes), new String(fileChunkList.get(0).getChunk()));
|
|
||||||
//测试append合并模式
|
|
||||||
fileChunkList = PublicUtil.combine(pcapngFileChunks, maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter);
|
|
||||||
Assert.assertEquals("append模式合并错误", 1, fileChunkList.size());
|
|
||||||
Assert.assertEquals("append模式合并错误,chunkCount错误", pcapngChunkCount, fileChunkList.get(0).getChunkCount());
|
|
||||||
Assert.assertEquals("append模式合并错误,文件长度错误", pcapngFileBytes.length, fileChunkList.get(0).getChunk().length);
|
|
||||||
Assert.assertEquals("append模式合并错误,文件内容错误", new String(pcapngFileBytes), new String(fileChunkList.get(0).getChunk()));
|
|
||||||
//测试合并携带元信息
|
|
||||||
fileChunkList = PublicUtil.combine(pcapngIncludeMetaFileChunks, maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter);
|
|
||||||
Assert.assertEquals(1, fileChunkList.size());
|
|
||||||
Assert.assertEquals("合并错误,元信息错误", pcapngFileMeta, fileChunkList.get(0).getMeta());
|
|
||||||
|
|
||||||
Assert.assertEquals("监控指标错误", 0, duplicateChunkCounter.getCount());
|
|
||||||
Assert.assertEquals("监控指标错误", 0, combineErrorCounter.getCount());
|
|
||||||
Assert.assertEquals("监控指标错误", emlChunkCount, seekChunkCounter.getCount());
|
|
||||||
Assert.assertEquals("监控指标错误", pcapngChunkCount * 2, appendChunkCounter.getCount());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testCombineDuplicateChunk() {
|
|
||||||
categorizeChunks(inputFileChunks);
|
|
||||||
//测试seek合并模式
|
|
||||||
emlFileChunks.add(emlFileChunks.get(5));
|
|
||||||
List<FileChunk> fileChunkList = PublicUtil.combine(emlFileChunks, maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter);
|
|
||||||
Assert.assertEquals("seek模式合并错误", 1, fileChunkList.size());
|
|
||||||
Assert.assertEquals("seek模式合并错误,lastChunkFlag错误", 1, fileChunkList.get(0).getLastChunkFlag());
|
|
||||||
Assert.assertEquals("seek模式合并错误,chunkCount错误", emlChunkCount - 1, fileChunkList.get(0).getChunkCount());
|
|
||||||
Assert.assertEquals("seek模式合并错误,文件长度错误", emlFileBytes.length, fileChunkList.get(0).getChunk().length);
|
|
||||||
Assert.assertEquals("seek模式合并错误,文件内容错误", new String(emlFileBytes), new String(fileChunkList.get(0).getChunk()));
|
|
||||||
//测试append合并模式
|
|
||||||
pcapngFileChunks.add(pcapngFileChunks.get(5));
|
|
||||||
fileChunkList = PublicUtil.combine(pcapngFileChunks, maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter);
|
|
||||||
Assert.assertEquals("append模式合并错误", 1, fileChunkList.size());
|
|
||||||
Assert.assertEquals("append模式合并错误,chunkCount错误", pcapngChunkCount + 1, fileChunkList.get(0).getChunkCount());
|
|
||||||
Assert.assertEquals("append模式合并错误,文件长度错误", pcapngFileBytes.length + pcapChunkData.length(), fileChunkList.get(0).getChunk().length);
|
|
||||||
Assert.assertEquals("append模式合并错误,文件内容错误", new String(pcapngFileBytes) + pcapChunkData, new String(fileChunkList.get(0).getChunk()));
|
|
||||||
//测试合并携带元信息
|
|
||||||
pcapngIncludeMetaFileChunks.add(pcapngIncludeMetaFileChunks.get(5));
|
|
||||||
fileChunkList = PublicUtil.combine(pcapngIncludeMetaFileChunks, maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter);
|
|
||||||
Assert.assertEquals(1, fileChunkList.size());
|
|
||||||
Assert.assertEquals("合并错误,元信息错误", pcapngFileMeta, fileChunkList.get(0).getMeta());
|
|
||||||
|
|
||||||
Assert.assertEquals("监控指标错误", 1, duplicateChunkCounter.getCount());
|
|
||||||
Assert.assertEquals("监控指标错误", 0, combineErrorCounter.getCount());
|
|
||||||
Assert.assertEquals("监控指标错误", emlChunkCount + 1, seekChunkCounter.getCount());
|
|
||||||
Assert.assertEquals("监控指标错误", pcapngChunkCount * 2 + 2, appendChunkCounter.getCount());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testCombineLostChunk() {
|
|
||||||
categorizeChunks(inputFileChunks);
|
|
||||||
//测试seek合并模式
|
|
||||||
emlFileChunks.remove(emlFileChunks.get(5));
|
|
||||||
List<FileChunk> fileChunkList = PublicUtil.combine(emlFileChunks, maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter);
|
|
||||||
Assert.assertEquals("seek模式合并错误", 2, fileChunkList.size());
|
|
||||||
Assert.assertEquals("seek模式合并错误,lastChunkFlag错误", 1, fileChunkList.get(1).getLastChunkFlag());
|
|
||||||
Assert.assertEquals("append模式合并错误,chunkCount错误", emlChunkCount - 2, fileChunkList.get(0).getChunkCount() + fileChunkList.get(1).getChunkCount());
|
|
||||||
Assert.assertEquals("seek模式合并错误,文件长度错误", emlFileBytes.length - 2000, fileChunkList.get(0).getLength() + fileChunkList.get(1).getLength());
|
|
||||||
//测试append合并模式
|
|
||||||
pcapngFileChunks.remove(pcapngFileChunks.get(5));
|
|
||||||
fileChunkList = PublicUtil.combine(pcapngFileChunks, maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter);
|
|
||||||
Assert.assertEquals("append模式合并错误", 1, fileChunkList.size());
|
|
||||||
Assert.assertEquals("append模式合并错误,chunkCount错误", pcapngChunkCount - 1, fileChunkList.get(0).getChunkCount());
|
|
||||||
Assert.assertEquals("append模式合并错误,文件长度错误", pcapngFileBytes.length - pcapChunkData.length(), fileChunkList.get(0).getChunk().length);
|
|
||||||
Assert.assertEquals("append模式合并错误,文件内容错误", new String(ArrayUtil.sub(pcapngFileBytes, 0, pcapngFileBytes.length - pcapChunkData.length())), new String(fileChunkList.get(0).getChunk()));
|
|
||||||
//测试合并携带元信息
|
|
||||||
pcapngIncludeMetaFileChunks.remove(pcapngIncludeMetaFileChunks.get(5));
|
|
||||||
fileChunkList = PublicUtil.combine(pcapngIncludeMetaFileChunks, maxChunkCount, duplicateChunkCounter, combineErrorCounter, seekChunkCounter, appendChunkCounter);
|
|
||||||
Assert.assertEquals(1, fileChunkList.size());
|
|
||||||
Assert.assertEquals("合并错误,元信息错误", pcapngFileMeta, fileChunkList.get(0).getMeta());
|
|
||||||
|
|
||||||
Assert.assertEquals("监控指标错误", 0, duplicateChunkCounter.getCount());
|
|
||||||
Assert.assertEquals("监控指标错误", 0, combineErrorCounter.getCount());
|
|
||||||
Assert.assertEquals("监控指标错误", emlChunkCount - 1, seekChunkCounter.getCount());
|
|
||||||
Assert.assertEquals("监控指标错误", pcapngChunkCount * 2 - 2, appendChunkCounter.getCount());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPipelineFullChunk() throws Exception {
|
public void testPipelineFullChunk() throws Exception {
|
||||||
CollectSink.values.clear();
|
CollectSink.values.clear();
|
||||||
@@ -650,26 +617,68 @@ public class FileChunkCombinerTests {
|
|||||||
triggers.add(LastChunkOrNoDataInTimeTrigger.of(windowIdleTime * 1000));
|
triggers.add(LastChunkOrNoDataInTimeTrigger.of(windowIdleTime * 1000));
|
||||||
Trigger<Object, TimeWindow> trigger = MultipleTrigger.of(triggers);
|
Trigger<Object, TimeWindow> trigger = MultipleTrigger.of(triggers);
|
||||||
env.addSource(source)
|
env.addSource(source)
|
||||||
.map(new ParseMessagePackMapFunction())
|
.map(new ParseMessagePackMapFunction(false, Long.MAX_VALUE))
|
||||||
.filter((FilterFunction<FileChunk>) Objects::nonNull)
|
.filter(new FileChunkFilterFunction(Long.MAX_VALUE, ""))
|
||||||
.assignTimestampsAndWatermarks(watermarkStrategy)
|
.assignTimestampsAndWatermarks(watermarkStrategy)
|
||||||
.keyBy(new FileChunkKeySelector())
|
.keyBy(new FileChunkKeySelector(), BasicTypeInfo.STRING_TYPE_INFO)
|
||||||
.window(TumblingEventTimeWindows.of(Time.seconds(windowTime)))
|
.window(TumblingEventTimeWindows.of(Time.seconds(windowTime)))
|
||||||
.trigger(trigger)
|
.trigger(trigger)
|
||||||
.process(new CombineChunkProcessWindowFunction(configuration))
|
.process(new CombineChunkProcessWindowFunction(Integer.MAX_VALUE))
|
||||||
.addSink(new CollectSink());
|
.addSink(new CollectSink());
|
||||||
return env;
|
return env;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void categorizeChunks(List<FileChunk> fileChunks) {
|
private void categorizeChunks(List<FileChunk> fileChunks) {
|
||||||
for (FileChunk fileChunk : fileChunks) {
|
for (FileChunk fileChunk : fileChunks) {
|
||||||
if (emlUuid.equals(fileChunk.getUuid())) {
|
if ("eml".equals(fileChunk.getFileType())) {
|
||||||
emlFileChunks.add(fileChunk);
|
emlFileChunks.add(fileChunk);
|
||||||
} else if (pcapngUuid.equals(fileChunk.getUuid())) {
|
} else if ("pcapng".equals(fileChunk.getFileType()) && fileChunk.getMeta() == null) {
|
||||||
pcapngFileChunks.add(fileChunk);
|
pcapngFileChunks.add(fileChunk);
|
||||||
} else if (pcapngIncludeMetaUuid.equals(fileChunk.getUuid())) {
|
} else if ("pcapng".equals(fileChunk.getFileType()) && fileChunk.getMeta() != null) {
|
||||||
pcapngIncludeMetaFileChunks.add(fileChunk);
|
pcapngIncludeMetaFileChunks.add(fileChunk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// @Test
|
||||||
|
// public void testCombineChunkProcessWindowFunction() throws Exception {
|
||||||
|
// List<Trigger<Object, TimeWindow>> triggers = new ArrayList<>();
|
||||||
|
// triggers.add(EventTimeTrigger.create());
|
||||||
|
// triggers.add(LastChunkOrNoDataInTimeTrigger.of(1000));
|
||||||
|
// Trigger<Object, TimeWindow> trigger = MultipleTrigger.of(triggers);
|
||||||
|
// TypeSerializer<FileChunk> serializer = TypeInformation.of(FileChunk.class).createSerializer(new ExecutionConfig());
|
||||||
|
// ListStateDescriptor listStateDescriptor = new ListStateDescriptor<>("test-seek-window", serializer);
|
||||||
|
// CombineChunkProcessWindowFunction processWindowFunction = new CombineChunkProcessWindowFunction(Integer.MAX_VALUE);
|
||||||
|
// WindowOperator<String, FileChunk, FileChunk, FileChunk, TimeWindow> operator = new WindowOperator<String, FileChunk, FileChunk, FileChunk, TimeWindow>(
|
||||||
|
// TumblingEventTimeWindows.of(Time.seconds(3)),
|
||||||
|
// new TimeWindow.Serializer(),
|
||||||
|
// new FileChunkKeySelector(),
|
||||||
|
// BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig()),
|
||||||
|
// listStateDescriptor,
|
||||||
|
// new InternalIterableProcessWindowFunction(processWindowFunction),
|
||||||
|
// trigger,
|
||||||
|
// 0L, null);
|
||||||
|
// KeyedOneInputStreamOperatorTestHarness<String, FileChunk, FileChunk> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, new FileChunkKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
|
||||||
|
// testHarness.setup();
|
||||||
|
// testHarness.open();
|
||||||
|
// ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
|
||||||
|
// for (FileChunk file : inputFiles) {
|
||||||
|
// expectedOutput.add(new StreamRecord<>(file, 2999L));
|
||||||
|
// }
|
||||||
|
// long timestamp = 0L;
|
||||||
|
// for (FileChunk fileChunk : inputFileChunks) {
|
||||||
|
// testHarness.processElement(fileChunk, timestamp += 10);
|
||||||
|
// }
|
||||||
|
// testHarness.setProcessingTime(5000L);
|
||||||
|
// ConcurrentLinkedQueue<Object> actualOutput = testHarness.getOutput();
|
||||||
|
// Assert.assertEquals(3, actualOutput.size());
|
||||||
|
// TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, actualOutput, (o1, o2) -> {
|
||||||
|
// StreamRecord sr0 = (StreamRecord) o1;
|
||||||
|
// StreamRecord sr1 = (StreamRecord) o2;
|
||||||
|
// return ((FileChunk) sr0.getValue()).getUuid().compareTo(((FileChunk) sr1.getValue()).getUuid());
|
||||||
|
// });
|
||||||
|
// Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount());
|
||||||
|
// Assert.assertEquals(0, processWindowFunction.duplicateChunkCounter.getCount());
|
||||||
|
// testHarness.close();
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user