diff --git a/pom.xml b/pom.xml index 3ebc9b2..67c6e96 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.zdjizhi file-chunk-combiner - 1.2.0 + 1.3.0 diff --git a/src/main/java/com/zdjizhi/FileChunkCombiner.java b/src/main/java/com/zdjizhi/FileChunkCombiner.java index e470d42..f64b6bd 100644 --- a/src/main/java/com/zdjizhi/FileChunkCombiner.java +++ b/src/main/java/com/zdjizhi/FileChunkCombiner.java @@ -45,7 +45,7 @@ public class FileChunkCombiner { .filter(new FileChunkFilterFunction(configuration.getLong(Configs.FILE_MAX_SIZE), configuration.getString(Configs.FILTER_EXPRESSION))) .assignTimestampsAndWatermarks(watermarkStrategy); - OutputTag delayedChunkOutputTag = new OutputTag<>("delayed-chunk") { + OutputTag delayedChunkOutputTag = new OutputTag("delayed-chunk") { }; List> triggers = new ArrayList<>(); @@ -69,7 +69,7 @@ public class FileChunkCombiner { windowStream.getSideOutput(delayedChunkOutputTag) .map(new SideOutputMapFunction()) .addSink(new HosSink(configuration)) - .name("Hos Delayed Chunk"); + .name("Delayed Chunk"); } else { windowStream.addSink(new HBaseSink(configuration)) .name("HBase") @@ -77,7 +77,7 @@ public class FileChunkCombiner { windowStream.getSideOutput(delayedChunkOutputTag) .map(new SideOutputMapFunction()) .addSink(new HBaseSink(configuration)) - .name("HBase Delayed Chunk"); + .name("Delayed Chunk"); } environment.execute(configuration.get(Configs.FLINK_JOB_NAME)); diff --git a/src/main/java/com/zdjizhi/function/CombineChunkProcessWindowFunction.java b/src/main/java/com/zdjizhi/function/CombineChunkProcessWindowFunction.java index c94a6f9..9797b1d 100644 --- a/src/main/java/com/zdjizhi/function/CombineChunkProcessWindowFunction.java +++ b/src/main/java/com/zdjizhi/function/CombineChunkProcessWindowFunction.java @@ -22,8 +22,8 @@ import static com.zdjizhi.utils.PublicConstants.COMBINE_MODE_SEEK; public class CombineChunkProcessWindowFunction extends ProcessWindowFunction { private static final Log LOG = LogFactory.get(); - public transient Counter duplicateChunkCounter; - public transient Counter combineErrorCounter; + public transient Counter duplicateChunksCounter; + public transient Counter combineErrorChunksCounter; private final int fileMaxChunkCount; public CombineChunkProcessWindowFunction(int fileMaxChunkCount) { @@ -34,10 +34,10 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction combine(Iterable input) { List combinedFileChunkList = new ArrayList<>(); + List originalFileChunkList = StreamSupport.stream(input.spliterator(), false).collect(Collectors.toList()); try { - List originalFileChunkList = StreamSupport.stream(input.spliterator(), false).collect(Collectors.toList()); List waitingToCombineChunkList = new ArrayList<>(); if (COMBINE_MODE_SEEK.equals(originalFileChunkList.get(0).getCombineMode())) { // 按照offset排序 @@ -71,7 +71,7 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction actualOffset) {// 期望offset大于当前offset,该块为重复块,跳过该块 duplicateCount++; - duplicateChunkCounter.inc(); + duplicateChunksCounter.inc(); } else if (expectedOffset == actualOffset) {// 期望offset等于当前offset,将该块添加到待合并集合中 if (currentFileChunk.getLastChunkFlag() == 1) { lastChunkFlag = currentFileChunk.getLastChunkFlag(); @@ -129,7 +129,7 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction { private final long maxFileSize; private final String filterExpression; - public transient Counter filterChunkCounter; + public transient Counter filterChunksCounter; private JexlExpression jexlExpression; private JexlContext jexlContext; @@ -25,8 +25,8 @@ public class FileChunkFilterFunction extends RichFilterFunction { public void open(Configuration parameters) throws Exception { super.open(parameters); MetricGroup metricGroup = getRuntimeContext().getMetricGroup(); - filterChunkCounter = metricGroup.counter("filterChunkCount"); - metricGroup.meter("filterChunkPerSecond", new MeterView(filterChunkCounter)); + filterChunksCounter = metricGroup.counter("filterChunksCount"); + metricGroup.meter("numChunksFilterPerSecond", new MeterView(filterChunksCounter)); JexlEngine jexlEngine = new JexlBuilder().create(); jexlExpression = jexlEngine.createExpression(filterExpression); jexlContext = new MapContext(); @@ -35,13 +35,13 @@ public class FileChunkFilterFunction extends RichFilterFunction { @Override public boolean filter(FileChunk value) { if (value == null || value.getOffset() > maxFileSize) { - filterChunkCounter.inc(); + filterChunksCounter.inc(); return false; } if (StrUtil.isNotEmpty(filterExpression)) { jexlContext.set(value.getClass().getSimpleName(), value); if (!Boolean.parseBoolean(jexlExpression.evaluate(jexlContext).toString())) { - filterChunkCounter.inc(); + filterChunksCounter.inc(); return false; } } diff --git a/src/main/java/com/zdjizhi/function/ParseMessagePackMapFunction.java b/src/main/java/com/zdjizhi/function/ParseMessagePackMapFunction.java index 4aeefef..c7add8d 100644 --- a/src/main/java/com/zdjizhi/function/ParseMessagePackMapFunction.java +++ b/src/main/java/com/zdjizhi/function/ParseMessagePackMapFunction.java @@ -23,9 +23,21 @@ public class ParseMessagePackMapFunction extends RichMapFunction= 1000) { if (StrUtil.isNotEmpty(rateLimitExpression)) { @@ -75,7 +111,7 @@ public class ParseMessagePackMapFunction extends RichMapFunction { - private transient Counter delayedChunkCounter; + public transient Counter delayedChunksCounter; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); MetricGroup metricGroup = getRuntimeContext().getMetricGroup(); - delayedChunkCounter = metricGroup.counter("delayedChunkCount"); - metricGroup.meter("delayedChunkPerSecond", new MeterView(delayedChunkCounter)); + delayedChunksCounter = metricGroup.counter("delayedChunksCount"); + metricGroup.meter("numChunksDelayPerSecond", new MeterView(delayedChunksCounter)); } @Override public FileChunk map(FileChunk fileChunk) { - delayedChunkCounter.inc(); + delayedChunksCounter.inc(); fileChunk.setChunkCount(1); if (COMBINE_MODE_APPEND.equals(fileChunk.getCombineMode())) { fileChunk.setChunkNumbers(fileChunk.getTimestamp() + "-" + fileChunk.getChunk().length + ";"); diff --git a/src/main/java/com/zdjizhi/sink/HBaseSink.java b/src/main/java/com/zdjizhi/sink/HBaseSink.java index 658b7d3..66ec2c6 100644 --- a/src/main/java/com/zdjizhi/sink/HBaseSink.java +++ b/src/main/java/com/zdjizhi/sink/HBaseSink.java @@ -29,10 +29,16 @@ public class HBaseSink extends RichSinkFunction { private static final Log LOG = LogFactory.get(); private final Configuration configuration; - public transient Counter sendHBaseCounter; - public transient Counter sendHBaseErrorCounter; - public transient Counter sendHBaseFileCounter; - public transient Counter sendHBaseChunkCounter; + public transient Counter sinkRequestsCounter; + public transient Counter sinkErrorRequestsCounter; + public transient Counter sinkFilesCounter; + public transient Counter sinkChunksCounter; + public transient Counter lessThan5KBChunksCounter; + public transient Counter between5KBAnd10KBChunksCounter; + public transient Counter between10KBAnd50KBChunksCounter; + public transient Counter between50KBAnd100KBChunksCounter; + public transient Counter between100KBAnd1MBChunksCounter; + public transient Counter greaterThan1MBChunksCounter; private boolean isAsync; private Connection syncHBaseConnection; private AsyncConnection AsyncHBaseConnection; @@ -58,14 +64,27 @@ public class HBaseSink extends RichSinkFunction { public void open(Configuration parameters) throws Exception { super.open(parameters); MetricGroup metricGroup = getRuntimeContext().getMetricGroup(); - sendHBaseCounter = metricGroup.counter("sendHBaseCount"); - sendHBaseErrorCounter = metricGroup.counter("sendHBaseErrorCount"); - sendHBaseFileCounter = metricGroup.counter("sendHBaseFileCount"); - sendHBaseChunkCounter = metricGroup.counter("sendHBaseChunkCount"); - metricGroup.meter("sendHBasePerSecond", new MeterView(sendHBaseCounter, 5)); - metricGroup.meter("sendHBaseErrorPerSecond", new MeterView(sendHBaseErrorCounter)); - metricGroup.meter("sendHBaseFilePerSecond", new MeterView(sendHBaseFileCounter)); - metricGroup.meter("sendHBaseChunkPerSecond", new MeterView(sendHBaseChunkCounter)); + lessThan5KBChunksCounter = metricGroup.counter("lessThan5KBChunksCount"); + between5KBAnd10KBChunksCounter = metricGroup.counter("between5KBAnd10KBChunksCount"); + between10KBAnd50KBChunksCounter = metricGroup.counter("between10KBAnd50KBChunksCount"); + between50KBAnd100KBChunksCounter = metricGroup.counter("between50KBAnd100KBChunksCount"); + between100KBAnd1MBChunksCounter = metricGroup.counter("between100KBAnd1MBChunksCount"); + greaterThan1MBChunksCounter = metricGroup.counter("greaterThan1MBChunksCount"); + metricGroup.meter("numLessThan5KBChunksOutPerSecond", new MeterView(lessThan5KBChunksCounter)); + metricGroup.meter("numBetween5KBAnd10KBChunksOutPerSecond", new MeterView(between5KBAnd10KBChunksCounter)); + metricGroup.meter("numBetween10KBAnd50KBChunksOutPerSecond", new MeterView(between10KBAnd50KBChunksCounter)); + metricGroup.meter("numBetween50KBAnd100KBChunkPsOuterSecond", new MeterView(between50KBAnd100KBChunksCounter)); + metricGroup.meter("numBetween100KBAnd1MBChunksOutPerSecond", new MeterView(between100KBAnd1MBChunksCounter)); + metricGroup.meter("numGreaterThan1MBChunksOutPerSecond", new MeterView(greaterThan1MBChunksCounter)); + sinkRequestsCounter = metricGroup.counter("sinkRequestsCount"); + sinkErrorRequestsCounter = metricGroup.counter("sinkErrorRequestsCount"); + sinkFilesCounter = metricGroup.counter("sinkFilesCount"); + sinkChunksCounter = metricGroup.counter("sinkChunksCount"); + metricGroup.meter("numRequestsSinkPerSecond", new MeterView(sinkRequestsCounter, 5)); + metricGroup.meter("numErrorRequestsSinkPerSecond", new MeterView(sinkErrorRequestsCounter)); + metricGroup.meter("numFilesSinkPerSecond", new MeterView(sinkFilesCounter)); + metricGroup.meter("numChunksSinkPerSecond", new MeterView(sinkChunksCounter)); + isAsync = configuration.getBoolean(Configs.SINK_ASYNC); if (isAsync) { AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection(); @@ -90,16 +109,17 @@ public class HBaseSink extends RichSinkFunction { @Override public void invoke(FileChunk fileChunk, Context context) { if (COMBINE_MODE_SEEK.equals(fileChunk.getCombineMode()) && configuration.get(Configs.SINK_BATCH)) { - sendHBaseChunkCounter.inc(); + sinkChunksCounter.inc(); byte[] data = "".getBytes(); if (fileChunk.getChunk() != null) { data = fileChunk.getChunk(); } + int chunkLength = data.length; long timestamp = System.currentTimeMillis(); Map partMessageMap = new HashMap<>(); partMessageMap.put(APPEND_FILE_PART_MESSAGE_CHUNK_COUNT, fileChunk.getChunkCount() + ""); partMessageMap.put(APPEND_FILE_PART_MESSAGE_LAST_PART_FLAG, fileChunk.getLastChunkFlag() + ""); - partMessageMap.put(APPEND_FILE_PART_MESSAGE_SIZE, data.length + ""); + partMessageMap.put(APPEND_FILE_PART_MESSAGE_SIZE, chunkLength + ""); Put dataPut = new Put(Bytes.toBytes(PublicUtil.getRowKey(fileChunk.getUuid()) + PublicConstants.FILE_DATA_ROW_SUFFIX)); dataPut.addColumn(BYTE_FAMILY_DATA, Bytes.toBytes(String.valueOf(fileChunk.getOffset())), data); dataPut.addColumn(BYTE_FAMILY_META, Bytes.toBytes(String.valueOf(fileChunk.getOffset())), Bytes.toBytes(partMessageMap.toString())); @@ -126,25 +146,26 @@ public class HBaseSink extends RichSinkFunction { Put indexFilenamePut = new Put(Bytes.toBytes(indexFilenameKey)); indexFilenamePut.addColumn(HBaseColumnConstants.BYTE_FAMILY_META, HBaseColumnConstants.BYTE_COLUMN_FILENAME, Bytes.toBytes(fileChunk.getUuid())); indexFilenamePutList.add(indexFilenamePut); - sendHBaseFileCounter.inc(); + sinkFilesCounter.inc(); } else { Put metaPut = new Put(Bytes.toBytes(PublicUtil.getRowKey(fileChunk.getUuid()))); metaPut.addColumn(HBaseColumnConstants.BYTE_FAMILY_META, HBaseColumnConstants.BYTE_COLUMN_LAST_MODIFIED, Bytes.toBytes(timestamp)); dataPutList.add(metaPut); } chunkCount++; - chunkSize += data.length; + chunkSize += chunkLength; + calculateChunkSize(chunkLength); if (chunkSize >= maxBatchSize || chunkCount >= maxBatchCount) { if (isAsync) { if (dataPutList.size() > 0) { List> futures = asyncTable.batch(dataPutList); - sendHBaseCounter.inc(); + sinkRequestsCounter.inc(); CompletableFuture.supplyAsync(() -> { for (CompletableFuture completableFuture : futures) { completableFuture.whenCompleteAsync((result, error) -> { if (error != null) { LOG.error("put chunk to hbase error. ", error.getMessage()); - sendHBaseErrorCounter.inc(); + sinkErrorRequestsCounter.inc(); } }); } @@ -154,44 +175,44 @@ public class HBaseSink extends RichSinkFunction { } if (indexTimePutList.size() > 0) { asyncIndexTimeTable.batch(indexTimePutList); - sendHBaseCounter.inc(); + sinkRequestsCounter.inc(); indexTimePutList.clear(); } if (indexFilenamePutList.size() > 0) { asyncIndexFilenameTable.batch(indexFilenamePutList); - sendHBaseCounter.inc(); + sinkRequestsCounter.inc(); indexFilenamePutList.clear(); } } else { if (dataPutList.size() > 0) { try { - sendHBaseCounter.inc(); + sinkRequestsCounter.inc(); table.batch(dataPutList, null); } catch (IOException | InterruptedException e) { LOG.error("put chunk to hbase data table error. ", e.getMessage()); - sendHBaseErrorCounter.inc(); + sinkErrorRequestsCounter.inc(); } finally { dataPutList.clear(); } } if (indexTimePutList.size() > 0) { try { - sendHBaseCounter.inc(); + sinkRequestsCounter.inc(); indexTimeTable.batch(indexTimePutList, null); } catch (IOException | InterruptedException e) { LOG.error("put chunk to hbase index time table error. ", e.getMessage()); - sendHBaseErrorCounter.inc(); + sinkErrorRequestsCounter.inc(); } finally { indexTimePutList.clear(); } } if (indexFilenamePutList.size() > 0) { try { - sendHBaseCounter.inc(); + sinkRequestsCounter.inc(); indexFilenameTable.batch(indexFilenamePutList, null); } catch (IOException | InterruptedException e) { LOG.error("put chunk to hbase index filename table error. ", e.getMessage()); - sendHBaseErrorCounter.inc(); + sinkErrorRequestsCounter.inc(); } finally { indexFilenamePutList.clear(); } @@ -211,4 +232,20 @@ public class HBaseSink extends RichSinkFunction { IoUtil.close(syncHBaseConnection); IoUtil.close(AsyncHBaseConnection); } + + private void calculateChunkSize(long length) { + if (length <= 5 * 1024) { + lessThan5KBChunksCounter.inc(); + } else if (length <= 10 * 1024) { + between5KBAnd10KBChunksCounter.inc(); + } else if (length <= 50 * 1024) { + between10KBAnd50KBChunksCounter.inc(); + } else if (length <= 100 * 1024) { + between50KBAnd100KBChunksCounter.inc(); + } else if (length <= 1024 * 1024) { + between100KBAnd1MBChunksCounter.inc(); + } else { + greaterThan1MBChunksCounter.inc(); + } + } } diff --git a/src/main/java/com/zdjizhi/sink/HosSink.java b/src/main/java/com/zdjizhi/sink/HosSink.java index c486529..3256cef 100644 --- a/src/main/java/com/zdjizhi/sink/HosSink.java +++ b/src/main/java/com/zdjizhi/sink/HosSink.java @@ -36,10 +36,16 @@ public class HosSink extends RichSinkFunction { private static final Log LOG = LogFactory.get(); private final Configuration configuration; - public transient Counter sendHosCounter; - public transient Counter sendHosErrorCounter; - public transient Counter sendHosFileCounter; - public transient Counter sendHosChunkCounter; + public transient Counter sinkRequestsCounter; + public transient Counter sinkErrorRequestsCounter; + public transient Counter sinkFilesCounter; + public transient Counter sinkChunksCounter; + public transient Counter lessThan5KBChunksCounter; + public transient Counter between5KBAnd10KBChunksCounter; + public transient Counter between10KBAnd50KBChunksCounter; + public transient Counter between50KBAnd100KBChunksCounter; + public transient Counter between100KBAnd1MBChunksCounter; + public transient Counter greaterThan1MBChunksCounter; private boolean isAsync; private CloseableHttpClient syncHttpClient; private CloseableHttpAsyncClient asyncHttpClient; @@ -66,14 +72,27 @@ public class HosSink extends RichSinkFunction { public void open(Configuration parameters) throws Exception { super.open(parameters); MetricGroup metricGroup = getRuntimeContext().getMetricGroup(); - sendHosCounter = metricGroup.counter("sendHosCount"); - sendHosErrorCounter = metricGroup.counter("sendHosErrorCount"); - sendHosFileCounter = metricGroup.counter("sendHosFileCount"); - sendHosChunkCounter = metricGroup.counter("sendHosChunkCount"); - metricGroup.meter("sendHosPerSecond", new MeterView(sendHosCounter, 5)); - metricGroup.meter("sendHosErrorPerSecond", new MeterView(sendHosErrorCounter)); - metricGroup.meter("sendHosFilePerSecond", new MeterView(sendHosFileCounter)); - metricGroup.meter("sendHosChunkPerSecond", new MeterView(sendHosChunkCounter)); + lessThan5KBChunksCounter = metricGroup.counter("lessThan5KBChunksCount"); + between5KBAnd10KBChunksCounter = metricGroup.counter("between5KBAnd10KBChunksCount"); + between10KBAnd50KBChunksCounter = metricGroup.counter("between10KBAnd50KBChunksCount"); + between50KBAnd100KBChunksCounter = metricGroup.counter("between50KBAnd100KBChunksCount"); + between100KBAnd1MBChunksCounter = metricGroup.counter("between100KBAnd1MBChunksCount"); + greaterThan1MBChunksCounter = metricGroup.counter("greaterThan1MBChunksCount"); + metricGroup.meter("numLessThan5KBChunksOutPerSecond", new MeterView(lessThan5KBChunksCounter)); + metricGroup.meter("numBetween5KBAnd10KBChunksOutPerSecond", new MeterView(between5KBAnd10KBChunksCounter)); + metricGroup.meter("numBetween10KBAnd50KBChunksOutPerSecond", new MeterView(between10KBAnd50KBChunksCounter)); + metricGroup.meter("numBetween50KBAnd100KBChunkPsOuterSecond", new MeterView(between50KBAnd100KBChunksCounter)); + metricGroup.meter("numBetween100KBAnd1MBChunksOutPerSecond", new MeterView(between100KBAnd1MBChunksCounter)); + metricGroup.meter("numGreaterThan1MBChunksOutPerSecond", new MeterView(greaterThan1MBChunksCounter)); + sinkRequestsCounter = metricGroup.counter("sinkRequestsCount"); + sinkErrorRequestsCounter = metricGroup.counter("sinkErrorRequestsCount"); + sinkFilesCounter = metricGroup.counter("sinkFilesCount"); + sinkChunksCounter = metricGroup.counter("sinkChunksCount"); + metricGroup.meter("numRequestsSinkPerSecond", new MeterView(sinkRequestsCounter, 5)); + metricGroup.meter("numErrorRequestsSinkPerSecond", new MeterView(sinkErrorRequestsCounter)); + metricGroup.meter("numFilesSinkPerSecond", new MeterView(sinkFilesCounter)); + metricGroup.meter("numChunksSinkPerSecond", new MeterView(sinkChunksCounter)); + loadBalanceMode = configuration.getInteger(Configs.SINK_HOS_LOAD_BALANCE_MODE); if (loadBalanceMode == 0) { endpoint = configuration.getString(Configs.SINK_HOS_ENDPOINT); @@ -106,7 +125,8 @@ public class HosSink extends RichSinkFunction { if (fileChunk.getChunk() != null) { data = fileChunk.getChunk(); } - sendHosChunkCounter.inc(); + long chunkLength = data.length; + sinkChunksCounter.inc(); if (configuration.get(Configs.SINK_BATCH)) { hosMessage.put(HOS_META_FILE_TYPE, fileChunk.getFileType()); hosMessage.put(HOS_META_FILENAME, fileChunk.getUuid()); @@ -114,7 +134,7 @@ public class HosSink extends RichSinkFunction { hosMessage.put(HOS_OFFSET, fileChunk.getOffset() + ""); hosMessage.put(HOS_PART_LAST_FLAG, fileChunk.getLastChunkFlag() + ""); if (fileChunk.getOffset() == 0) { - sendHosFileCounter.inc(); + sinkFilesCounter.inc(); } } else { hosMessage.put(HOS_PART_NUMBER, fileChunk.getTimestamp() + ""); @@ -129,10 +149,11 @@ public class HosSink extends RichSinkFunction { } objectsMeta += hosMessage.toString() + ";"; hosMessage.clear(); - objectsOffset += data.length + ";"; + objectsOffset += chunkLength + ";"; byteList.add(data); chunkCount++; - chunkSize += data.length; + chunkSize += chunkLength; + calculateChunkSize(chunkLength); if (chunkSize >= maxBatchSize || chunkCount >= maxBatchCount) { HttpPut httpPut = new HttpPut(bathPutUrl); httpPut.setHeader(TOKEN, token); @@ -170,7 +191,7 @@ public class HosSink extends RichSinkFunction { httpPut.setHeader(HOS_OFFSET, fileChunk.getOffset() + ""); httpPut.setHeader(HOS_PART_LAST_FLAG, fileChunk.getLastChunkFlag() + ""); if (fileChunk.getOffset() == 0) { - sendHosFileCounter.inc(); + sinkFilesCounter.inc(); } } else { httpPut.setHeader(HOS_PART_NUMBER, fileChunk.getTimestamp() + ""); @@ -184,6 +205,7 @@ public class HosSink extends RichSinkFunction { } } httpPut.setEntity(new ByteArrayEntity(data)); + calculateChunkSize(chunkLength); executeRequest(httpPut); } } @@ -195,7 +217,7 @@ public class HosSink extends RichSinkFunction { } private void executeRequest(HttpPut httpPut) { - sendHosCounter.inc(); + sinkRequestsCounter.inc(); if (isAsync) { asyncHttpClient.execute(httpPut, new FutureCallback() { @Override @@ -204,18 +226,18 @@ public class HosSink extends RichSinkFunction { if (httpResponse.getStatusLine().getStatusCode() != 200) { String responseEntity = EntityUtils.toString(httpResponse.getEntity(), CharEncoding.UTF_8); LOG.error("put part to hos error. code: " + httpResponse.getStatusLine().getStatusCode() + ". message: " + responseEntity); - sendHosErrorCounter.inc(); + sinkErrorRequestsCounter.inc(); } } catch (IOException e) { LOG.error("put part to hos error.", e); - sendHosErrorCounter.inc(); + sinkErrorRequestsCounter.inc(); } } @Override public void failed(Exception ex) { LOG.error("put part to hos error.", ex); - sendHosErrorCounter.inc(); + sinkErrorRequestsCounter.inc(); if (loadBalanceMode == 1 && ex instanceof ConnectException) { endpoint = ipList.get(RandomUtil.randomInt(ipList.size())) + ":" + portList.get(RandomUtil.randomInt(portList.size())); bathPutUrl = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + PublicUtil.getUUID()) + "?multiFile"; @@ -234,11 +256,11 @@ public class HosSink extends RichSinkFunction { if (response.getStatusLine().getStatusCode() != 200) { String responseEntity = EntityUtils.toString(response.getEntity(), CharEncoding.UTF_8); LOG.error("put part to hos error. code: " + response.getStatusLine().getStatusCode() + ". message: " + responseEntity); - sendHosErrorCounter.inc(); + sinkErrorRequestsCounter.inc(); } } catch (IOException e) { LOG.error("put part to hos error.", e); - sendHosErrorCounter.inc(); + sinkErrorRequestsCounter.inc(); if (loadBalanceMode == 1 && (e instanceof HttpHostConnectException || e instanceof ConnectTimeoutException)) { endpoint = ipList.get(RandomUtil.randomInt(ipList.size())) + ":" + portList.get(RandomUtil.randomInt(portList.size())); } @@ -247,4 +269,20 @@ public class HosSink extends RichSinkFunction { } } } + + private void calculateChunkSize(long length) { + if (length <= 5 * 1024) { + lessThan5KBChunksCounter.inc(); + } else if (length <= 10 * 1024) { + between5KBAnd10KBChunksCounter.inc(); + } else if (length <= 50 * 1024) { + between10KBAnd50KBChunksCounter.inc(); + } else if (length <= 100 * 1024) { + between50KBAnd100KBChunksCounter.inc(); + } else if (length <= 1024 * 1024) { + between100KBAnd1MBChunksCounter.inc(); + } else { + greaterThan1MBChunksCounter.inc(); + } + } } diff --git a/src/test/java/com/zdjizhi/FileChunkCombinerTests.java b/src/test/java/com/zdjizhi/FileChunkCombinerTests.java index acf4926..2cbefba 100644 --- a/src/test/java/com/zdjizhi/FileChunkCombinerTests.java +++ b/src/test/java/com/zdjizhi/FileChunkCombinerTests.java @@ -53,6 +53,8 @@ import java.time.Duration; import java.util.*; import java.util.concurrent.ConcurrentLinkedQueue; +import static com.zdjizhi.utils.PublicConstants.COMBINE_MODE_APPEND; + public class FileChunkCombinerTests { private File emlFile; private byte[] emlFileBytes; @@ -106,7 +108,8 @@ public class FileChunkCombinerTests { triggers.add(LastChunkOrNoDataInTimeTrigger.of(1000)); Trigger trigger = MultipleTrigger.of(triggers); processWindowFunction = new CombineChunkProcessWindowFunction(Integer.MAX_VALUE); - delayedChunkOutputTag = new OutputTag<>("delayed-chunk") {}; + delayedChunkOutputTag = new OutputTag("delayed-chunk") { + }; DataStreamSource source = env.fromCollection(inputFileChunks); DataStream window = source .keyBy(new FileChunkKeySelector()) @@ -122,7 +125,7 @@ public class FileChunkCombinerTests { @Test public void testParseMessagePackMapFunction() throws Exception { - ParseMessagePackMapFunction mapFunction = new ParseMessagePackMapFunction(false, Long.MAX_VALUE,""); + ParseMessagePackMapFunction mapFunction = new ParseMessagePackMapFunction(false, Long.MAX_VALUE, ""); OneInputStreamOperatorTestHarness testHarness = new OneInputStreamOperatorTestHarness<>(new StreamMap<>(mapFunction)); testHarness.setup(); testHarness.open(); @@ -140,9 +143,49 @@ public class FileChunkCombinerTests { StreamRecord sr1 = (StreamRecord) o2; return ((FileChunk) sr0.getValue()).getUuid().compareTo(((FileChunk) sr1.getValue()).getUuid()); }); - Assert.assertEquals(30, mapFunction.parseMessagePackCounter.getCount()); - Assert.assertEquals(0, mapFunction.parseMessagePackErrorCounter.getCount()); - Assert.assertEquals(0, mapFunction.rateLimitDropCounter.getCount()); + Assert.assertEquals(30, mapFunction.parseMessagePacksCounter.getCount()); + Assert.assertEquals(0, mapFunction.parseErrorMessagePacksCounter.getCount()); + Assert.assertEquals(0, mapFunction.rateLimitDropChunksCounter.getCount()); + Assert.assertEquals(21, mapFunction.equal0BChunksCounter.getCount()); + Assert.assertEquals(1, mapFunction.lessThan1KBChunksCounter.getCount()); + Assert.assertEquals(8, mapFunction.between1KBAnd5KBChunksCounter.getCount()); + Assert.assertEquals(0, mapFunction.between5KBAnd10KBChunksCounter.getCount()); + Assert.assertEquals(0, mapFunction.between10KBAnd50KBChunksCounter.getCount()); + Assert.assertEquals(0, mapFunction.between50KBAnd100KBChunksCounter.getCount()); + Assert.assertEquals(0, mapFunction.greaterThan100KBChunksCounter.getCount()); + Assert.assertEquals(10, mapFunction.emlChunksCounter.getCount()); + Assert.assertEquals(20, mapFunction.pcapngChunksCounter.getCount()); + Assert.assertEquals(0, mapFunction.txtChunksCounter.getCount()); + Assert.assertEquals(0, mapFunction.htmlChunksCounter.getCount()); + Assert.assertEquals(0, mapFunction.mediaChunksCounter.getCount()); + testHarness.close(); + } + + @Test + public void testSideOutputMapFunction() throws Exception { + SideOutputMapFunction sideOutputMapFunction = new SideOutputMapFunction(); + OneInputStreamOperatorTestHarness testHarness = new OneInputStreamOperatorTestHarness<>(new StreamMap<>(sideOutputMapFunction)); + testHarness.setup(); + testHarness.open(); + for (FileChunk fileChunk : inputFileChunks) { + testHarness.processElement(new StreamRecord<>(fileChunk)); + } + ConcurrentLinkedQueue expectedOutput = new ConcurrentLinkedQueue<>(); + for (FileChunk fileChunk : inputFileChunks) { + fileChunk.setChunkCount(1); + if (COMBINE_MODE_APPEND.equals(fileChunk.getCombineMode())) { + fileChunk.setChunkNumbers(fileChunk.getTimestamp() + "-" + fileChunk.getChunk().length + ";"); + } + expectedOutput.add(new StreamRecord<>(fileChunk)); + } + ConcurrentLinkedQueue actualOutput = testHarness.getOutput(); + Assert.assertEquals(30, actualOutput.size()); + TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, actualOutput, (o1, o2) -> { + StreamRecord sr0 = (StreamRecord) o1; + StreamRecord sr1 = (StreamRecord) o2; + return ((FileChunk) sr0.getValue()).getUuid().compareTo(((FileChunk) sr1.getValue()).getUuid()); + }); + Assert.assertEquals(30, sideOutputMapFunction.delayedChunksCounter.getCount()); testHarness.close(); } @@ -170,7 +213,7 @@ public class FileChunkCombinerTests { return ((FileChunk) sr0.getValue()).getUuid().compareTo(((FileChunk) sr1.getValue()).getUuid()); } }); - Assert.assertEquals(20, fileChunkFilterFunction.filterChunkCounter.getCount()); + Assert.assertEquals(20, fileChunkFilterFunction.filterChunksCounter.getCount()); testHarness.close(); } @@ -189,8 +232,8 @@ public class FileChunkCombinerTests { List actualOutput = new ArrayList<>(testHarness.extractOutputValues()); Assert.assertEquals(3, actualOutput.size()); TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, actualOutput, Comparator.comparing(o -> ((FileChunk) o).getUuid())); - Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount()); - Assert.assertEquals(0, processWindowFunction.duplicateChunkCounter.getCount()); + Assert.assertEquals(0, processWindowFunction.combineErrorChunksCounter.getCount()); + Assert.assertEquals(0, processWindowFunction.duplicateChunksCounter.getCount()); testHarness.close(); } @@ -223,8 +266,8 @@ public class FileChunkCombinerTests { } Assert.assertEquals(10, sideOutput.size()); TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedSideOutput, actualSideOutput, Comparator.comparing(o -> ((FileChunk) o).getUuid())); - Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount()); - Assert.assertEquals(0, processWindowFunction.duplicateChunkCounter.getCount()); + Assert.assertEquals(0, processWindowFunction.combineErrorChunksCounter.getCount()); + Assert.assertEquals(0, processWindowFunction.duplicateChunksCounter.getCount()); testHarness.close(); } @@ -251,8 +294,8 @@ public class FileChunkCombinerTests { Assert.assertEquals(inputFiles.get(0), actualOutput.get(0)); Assert.assertEquals(inputFiles.get(1).getChunk().length + pcapngFileChunks.get(5).getChunk().length, actualOutput.get(1).getChunk().length); Assert.assertEquals(inputFiles.get(2).getChunk().length + pcapngIncludeMetaFileChunks.get(5).getChunk().length, actualOutput.get(2).getChunk().length); - Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount()); - Assert.assertEquals(1, processWindowFunction.duplicateChunkCounter.getCount()); + Assert.assertEquals(0, processWindowFunction.combineErrorChunksCounter.getCount()); + Assert.assertEquals(1, processWindowFunction.duplicateChunksCounter.getCount()); testHarness.close(); } @@ -279,8 +322,8 @@ public class FileChunkCombinerTests { Assert.assertEquals(inputFiles.get(0).getChunk().length - emlFileChunks.get(5).getChunk().length, actualOutput.get(0).getChunk().length + actualOutput.get(1).getChunk().length); Assert.assertEquals(inputFiles.get(1).getChunk().length - pcapngFileChunks.get(5).getChunk().length, actualOutput.get(2).getChunk().length); Assert.assertEquals(inputFiles.get(2).getChunk().length - pcapngIncludeMetaFileChunks.get(5).getChunk().length, actualOutput.get(3).getChunk().length); - Assert.assertEquals(0, processWindowFunction.combineErrorCounter.getCount()); - Assert.assertEquals(0, processWindowFunction.duplicateChunkCounter.getCount()); + Assert.assertEquals(0, processWindowFunction.combineErrorChunksCounter.getCount()); + Assert.assertEquals(0, processWindowFunction.duplicateChunksCounter.getCount()); testHarness.close(); } @@ -298,20 +341,30 @@ public class FileChunkCombinerTests { //seek文件 FileChunk fileChunk = new FileChunk("0000000001", "eml", 0, data.length, data, "seek", 1, 5, System.currentTimeMillis()); testHarness.processElement(new StreamRecord<>(fileChunk)); - Assert.assertEquals(1, hosSink.sendHosCounter.getCount()); - Assert.assertEquals(0, hosSink.sendHosErrorCounter.getCount()); - Assert.assertEquals(1, hosSink.sendHosFileCounter.getCount()); - Assert.assertEquals(1, hosSink.sendHosChunkCounter.getCount()); + Assert.assertEquals(1, hosSink.sinkRequestsCounter.getCount()); + Assert.assertEquals(0, hosSink.sinkErrorRequestsCounter.getCount()); + Assert.assertEquals(1, hosSink.sinkFilesCounter.getCount()); + Assert.assertEquals(1, hosSink.sinkChunksCounter.getCount()); //append文件 fileChunk = new FileChunk("0000000002", "pcapng", data.length, data, "append", 5, System.currentTimeMillis(), pcapngFileMeta, "1-200,2-200,3-200,4-200,5-200"); testHarness.processElement(new StreamRecord<>(fileChunk)); - Assert.assertEquals(2, hosSink.sendHosCounter.getCount()); - Assert.assertEquals(0, hosSink.sendHosErrorCounter.getCount()); - Assert.assertEquals(1, hosSink.sendHosFileCounter.getCount()); - Assert.assertEquals(2, hosSink.sendHosChunkCounter.getCount()); - testHarness.close(); + Assert.assertEquals(2, hosSink.sinkRequestsCounter.getCount()); + Assert.assertEquals(0, hosSink.sinkErrorRequestsCounter.getCount()); + Assert.assertEquals(1, hosSink.sinkFilesCounter.getCount()); + Assert.assertEquals(2, hosSink.sinkChunksCounter.getCount()); + Assert.assertEquals(2, hosSink.sinkChunksCounter.getCount()); + Assert.assertEquals(2, hosSink.sinkChunksCounter.getCount()); + Assert.assertEquals(2, hosSink.sinkChunksCounter.getCount()); + Assert.assertEquals(2, hosSink.lessThan5KBChunksCounter.getCount()); + Assert.assertEquals(0, hosSink.between5KBAnd10KBChunksCounter.getCount()); + Assert.assertEquals(0, hosSink.between10KBAnd50KBChunksCounter.getCount()); + Assert.assertEquals(0, hosSink.between50KBAnd100KBChunksCounter.getCount()); + Assert.assertEquals(0, hosSink.between100KBAnd1MBChunksCounter.getCount()); + Assert.assertEquals(0, hosSink.greaterThan1MBChunksCounter.getCount()); + testHarness.close(); //测试批量上传 + data = RandomUtil.randomString(10000).getBytes(); configuration.setString(Configs.SINK_TYPE, "hos"); configuration.setBoolean(Configs.SINK_BATCH, true); configuration.setInteger(Configs.SINK_BATCH_COUNT, 2); @@ -324,10 +377,16 @@ public class FileChunkCombinerTests { testHarness.processElement(new StreamRecord<>(fileChunk)); fileChunk = new FileChunk("0000000002", "eml", 0, data.length, data, "seek", 1, 5, System.currentTimeMillis()); testHarness.processElement(new StreamRecord<>(fileChunk)); - Assert.assertEquals(1, hosSink.sendHosCounter.getCount()); - Assert.assertEquals(0, hosSink.sendHosErrorCounter.getCount()); - Assert.assertEquals(2, hosSink.sendHosFileCounter.getCount()); - Assert.assertEquals(2, hosSink.sendHosChunkCounter.getCount()); + Assert.assertEquals(1, hosSink.sinkRequestsCounter.getCount()); + Assert.assertEquals(0, hosSink.sinkErrorRequestsCounter.getCount()); + Assert.assertEquals(2, hosSink.sinkFilesCounter.getCount()); + Assert.assertEquals(2, hosSink.sinkChunksCounter.getCount()); + Assert.assertEquals(0, hosSink.lessThan5KBChunksCounter.getCount()); + Assert.assertEquals(2, hosSink.between5KBAnd10KBChunksCounter.getCount()); + Assert.assertEquals(0, hosSink.between10KBAnd50KBChunksCounter.getCount()); + Assert.assertEquals(0, hosSink.between50KBAnd100KBChunksCounter.getCount()); + Assert.assertEquals(0, hosSink.between100KBAnd1MBChunksCounter.getCount()); + Assert.assertEquals(0, hosSink.greaterThan1MBChunksCounter.getCount()); testHarness.close(); } @@ -346,10 +405,16 @@ public class FileChunkCombinerTests { testHarness.processElement(new StreamRecord<>(fileChunk)); fileChunk = new FileChunk("0000000002", "eml", 0, data.length, data, "seek", 1, 5, System.currentTimeMillis()); testHarness.processElement(new StreamRecord<>(fileChunk)); - Assert.assertEquals("上传文件到hbase错误", 0, hBaseSink.sendHBaseErrorCounter.getCount()); - Assert.assertEquals("上传文件到hbase次数错误", 3, hBaseSink.sendHBaseCounter.getCount()); - Assert.assertEquals(2, hBaseSink.sendHBaseFileCounter.getCount()); - Assert.assertEquals(2, hBaseSink.sendHBaseChunkCounter.getCount()); + Assert.assertEquals(3, hBaseSink.sinkRequestsCounter.getCount()); + Assert.assertEquals(0, hBaseSink.sinkErrorRequestsCounter.getCount()); + Assert.assertEquals(2, hBaseSink.sinkFilesCounter.getCount()); + Assert.assertEquals(2, hBaseSink.sinkChunksCounter.getCount()); + Assert.assertEquals(2, hBaseSink.lessThan5KBChunksCounter.getCount()); + Assert.assertEquals(0, hBaseSink.between5KBAnd10KBChunksCounter.getCount()); + Assert.assertEquals(0, hBaseSink.between10KBAnd50KBChunksCounter.getCount()); + Assert.assertEquals(0, hBaseSink.between50KBAnd100KBChunksCounter.getCount()); + Assert.assertEquals(0, hBaseSink.between100KBAnd1MBChunksCounter.getCount()); + Assert.assertEquals(0, hBaseSink.greaterThan1MBChunksCounter.getCount()); testHarness.close(); } @@ -617,7 +682,7 @@ public class FileChunkCombinerTests { triggers.add(LastChunkOrNoDataInTimeTrigger.of(windowIdleTime * 1000)); Trigger trigger = MultipleTrigger.of(triggers); env.addSource(source) - .map(new ParseMessagePackMapFunction(false, Long.MAX_VALUE,"")) + .map(new ParseMessagePackMapFunction(false, Long.MAX_VALUE, "")) .filter(new FileChunkFilterFunction(Long.MAX_VALUE, "")) .assignTimestampsAndWatermarks(watermarkStrategy) .keyBy(new FileChunkKeySelector(), BasicTypeInfo.STRING_TYPE_INFO)