diff --git a/pom.xml b/pom.xml index aeefdca..11667bf 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.zdjizhi file-chunk-combiner - 1.3.0 + 1.3.1 diff --git a/src/main/java/com/zdjizhi/FileChunkCombiner.java b/src/main/java/com/zdjizhi/FileChunkCombiner.java index 705f673..f60fefe 100644 --- a/src/main/java/com/zdjizhi/FileChunkCombiner.java +++ b/src/main/java/com/zdjizhi/FileChunkCombiner.java @@ -17,7 +17,6 @@ import com.zdjizhi.trigger.LastChunkTrigger; import com.zdjizhi.trigger.MultipleTrigger; import org.apache.flink.api.common.eventtime.WatermarkStrategy; import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.datastream.*; @@ -174,35 +173,6 @@ public class FileChunkCombiner { .filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta")) .name("Filter: Map") .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)); - windowStream - .filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss")) - .name("Filter: Oss") - .setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM)) - .union(fileMetaSessionSingleOutputStreamOperator, fileMetaProxySingleOutputStreamOperator) - .keyBy(new FileChunkKeySelector()) - .addSink(new OssSinkByEhcache(configuration)) - .name("Oss") - .setParallelism(configuration.get(Configs.SINK_PARALLELISM)); - break; - case "oss-caffeine": - fileMetaSessionSingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC))) - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)) - .name(configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC)) - .flatMap(new ParseSessionFileMetaFlatMapFunction()) - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)) - .name("Map: Parse Session File Meta") - .filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_session_file_meta")) - .name("Filter: Map") - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)); - fileMetaProxySingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC))) - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)) - .name(configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC)) - .flatMap(new ParseProxyFileMetaFlatMapFunction()) - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)) - .name("Map: Parse Proxy File Meta") - .filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta")) - .name("Filter: Map") - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)); windowStream .filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss")) .name("Filter: Oss") @@ -213,38 +183,6 @@ public class FileChunkCombiner { .name("Oss") .setParallelism(configuration.get(Configs.SINK_PARALLELISM)); break; - case "test": - fileMetaSessionSingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC))) - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)) - .name(configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC)) - .flatMap(new ParseSessionFileMetaFlatMapFunction()) - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)) - .name("Map: Parse Session File Meta") - .filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_session_file_meta")) - .name("Filter: Map") - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)); - fileMetaProxySingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC))) - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)) - .name(configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC)) - .flatMap(new ParseProxyFileMetaFlatMapFunction()) - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)) - .name("Map: Parse Proxy File Meta") - .filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta")) - .name("Filter: Map") - .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)); - KeyedStream fileMetaStringKeyedStream = fileMetaSessionSingleOutputStreamOperator - .union(fileMetaProxySingleOutputStreamOperator) - .keyBy((KeySelector) FileChunk::getUuid); - windowStream - .filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss")) - .name("Filter: Oss") - .setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM)) - .keyBy((KeySelector) FileChunk::getUuid) - .connect(fileMetaStringKeyedStream) - .process(new TestKeyedCoProcessFunction(configuration)) - .setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM)) - .name("Oss"); - break; } } environment.execute(configuration.get(Configs.FLINK_JOB_NAME)); diff --git a/src/main/java/com/zdjizhi/config/Configs.java b/src/main/java/com/zdjizhi/config/Configs.java index 0f642bf..383d0c0 100644 --- a/src/main/java/com/zdjizhi/config/Configs.java +++ b/src/main/java/com/zdjizhi/config/Configs.java @@ -84,6 +84,9 @@ public class Configs { public static final ConfigOption SINK_BATCH_SIZE = ConfigOptions.key("sink.batch.size") .longType() .defaultValue(Long.MAX_VALUE); + public static final ConfigOption SINK_BATCH_TIME = ConfigOptions.key("sink.batch.time") + .intType() + .defaultValue(5); public static final ConfigOption SINK_FILTER_EXPRESSION = ConfigOptions.key("sink.filter.expression") .stringType() .defaultValue(""); diff --git a/src/main/java/com/zdjizhi/function/CombineChunkProcessWindowFunction.java b/src/main/java/com/zdjizhi/function/CombineChunkProcessWindowFunction.java index cd5b0df..fd1ca2b 100644 --- a/src/main/java/com/zdjizhi/function/CombineChunkProcessWindowFunction.java +++ b/src/main/java/com/zdjizhi/function/CombineChunkProcessWindowFunction.java @@ -120,8 +120,8 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction 0) {//将可合并的chunk合并,清空集合 - FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), originalFileChunkList.get(0).getTimestamp(), null); + if (!waitingToCombineChunkList.isEmpty()) {//将可合并的chunk合并,清空集合 + FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), currentFileChunk.getTimestamp(), null); if (fileChunk != null) { combinedFileChunkList.add(fileChunk); } @@ -139,8 +139,8 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction 0) { - FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), originalFileChunkList.get(0).getTimestamp(), null); + if (!waitingToCombineChunkList.isEmpty()) { + FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), currentFileChunk.getTimestamp(), null); if (fileChunk != null) { combinedFileChunkList.add(fileChunk); } diff --git a/src/main/java/com/zdjizhi/function/TestKeyedCoProcessFunction.java b/src/main/java/com/zdjizhi/function/TestKeyedCoProcessFunction.java deleted file mode 100644 index fb71fd3..0000000 --- a/src/main/java/com/zdjizhi/function/TestKeyedCoProcessFunction.java +++ /dev/null @@ -1,101 +0,0 @@ -package com.zdjizhi.function; - -import cn.hutool.core.io.IoUtil; -import com.zdjizhi.config.Configs; -import com.zdjizhi.pojo.FileChunk; -import com.zdjizhi.utils.HBaseConnectionUtil; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.metrics.Counter; -import org.apache.flink.metrics.MeterView; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction; -import org.apache.flink.util.Collector; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.*; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -public class TestKeyedCoProcessFunction extends KeyedCoProcessFunction { - - private final Configuration configuration; - public transient Counter chunksInCounter; - public transient Counter fileMetasInCounter; - - private boolean isAsync; - private Connection syncHBaseConnection; - private AsyncConnection AsyncHBaseConnection; - private Table table; - private AsyncTable asyncTable; - private List dataPutList; - private List metaPutList; - private long maxBatchCount; - - public TestKeyedCoProcessFunction(Configuration configuration) { - this.configuration = configuration; - } - - @Override - public void open(Configuration parameters) throws Exception { - super.open(parameters); - MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup("file_chunk_combiner", "add_file_meta"); - chunksInCounter = metricGroup.counter("chunksInCount"); - fileMetasInCounter = metricGroup.counter("fileMetasInCount"); - metricGroup.meter("numChunksInPerSecond", new MeterView(chunksInCounter)); - metricGroup.meter("numFileMetasInPerSecond", new MeterView(fileMetasInCounter)); - isAsync = configuration.getBoolean(Configs.SINK_ASYNC); - if (isAsync) { - AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection(); - asyncTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET))); - } else { - syncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getSyncHBaseConnection(); - table = syncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET))); - } - maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT); - dataPutList = new ArrayList<>(); - metaPutList = new ArrayList<>(); - } - - @Override - public void processElement1(FileChunk value, Context ctx, Collector out) throws IOException, InterruptedException { - chunksInCounter.inc(); - Put dataPut = new Put(value.getUuid().getBytes()); - dataPut.addColumn("meta".getBytes(), "data".getBytes(), (value.toString()).getBytes()); - dataPutList.add(dataPut); - if (dataPutList.size() >= maxBatchCount) { - if (isAsync) { - asyncTable.batch(dataPutList); - dataPutList.clear(); - } else { - table.batch(dataPutList, null); - dataPutList.clear(); - } - } - } - - @Override - public void processElement2(FileChunk value, Context ctx, Collector out) throws IOException, InterruptedException { - fileMetasInCounter.inc(); - Put metaPut = new Put(value.getUuid().getBytes()); - metaPut.addColumn("meta".getBytes(), "meta".getBytes(), (value.getMeta().toString()).getBytes()); - metaPutList.add(metaPut); - if (metaPutList.size() >= maxBatchCount) { - if (isAsync) { - asyncTable.batch(metaPutList); - metaPutList.clear(); - } else { - table.batch(metaPutList, null); - metaPutList.clear(); - } - } - } - - @Override - public void close() { - IoUtil.close(table); - IoUtil.close(syncHBaseConnection); - IoUtil.close(AsyncHBaseConnection); - } -} diff --git a/src/main/java/com/zdjizhi/sink/HBaseSink.java b/src/main/java/com/zdjizhi/sink/HBaseSink.java index 3bab6aa..ffcef6e 100644 --- a/src/main/java/com/zdjizhi/sink/HBaseSink.java +++ b/src/main/java/com/zdjizhi/sink/HBaseSink.java @@ -23,6 +23,9 @@ import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; import java.util.*; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; import static com.zdjizhi.utils.PublicConstants.*; import static com.zdjizhi.utils.HBaseColumnConstants.*; @@ -61,7 +64,7 @@ public class HBaseSink extends RichSinkFunction { public transient Counter mediaChunksCounter; private boolean isAsync; private Connection syncHBaseConnection; - private AsyncConnection AsyncHBaseConnection; + private AsyncConnection asyncHBaseConnection; private Table table; private Table indexTimeTable; private Table indexFilenameTable; @@ -72,12 +75,12 @@ public class HBaseSink extends RichSinkFunction { private List indexTimePutList; private List indexFilenamePutList; private long chunkSize; - private int chunkCount; private long maxBatchSize; private long maxBatchCount; + private ScheduledExecutorService executorService; private long rateLimitThreshold; private String rateLimitExpression; - private long timestamp; + private volatile long timestamp; private long count; private JexlExpression jexlExpression; private JexlContext jexlContext; @@ -148,57 +151,75 @@ public class HBaseSink extends RichSinkFunction { metricGroup.meter("numMediaChunksOutPerSecond", new MeterView(mediaChunksCounter)); isAsync = configuration.getBoolean(Configs.SINK_ASYNC); if (isAsync) { - AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection(); - asyncTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET))); - asyncIndexTimeTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET))); - asyncIndexFilenameTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET))); + asyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection(); + asyncTable = asyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET))); + asyncIndexTimeTable = asyncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET))); + asyncIndexFilenameTable = asyncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET))); } else { syncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getSyncHBaseConnection(); table = syncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET))); indexTimeTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET))); indexFilenameTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET))); } - maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE); - maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT); - dataPutList = new ArrayList<>(); - indexTimePutList = new ArrayList<>(); - indexFilenamePutList = new ArrayList<>(); - chunkSize = 0; - chunkCount = 0; - rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD); - rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION); timestamp = System.currentTimeMillis(); - count = 0; - JexlEngine jexlEngine = new JexlBuilder().create(); - jexlExpression = jexlEngine.createExpression(rateLimitExpression); - jexlContext = new MapContext(); + if (configuration.get(Configs.SINK_BATCH)) { + maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE); + maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT); + dataPutList = new ArrayList<>(); + indexTimePutList = new ArrayList<>(); + indexFilenamePutList = new ArrayList<>(); + chunkSize = 0; + executorService = Executors.newScheduledThreadPool(1); + long period = configuration.getInteger(Configs.SINK_BATCH_TIME); + executorService.scheduleWithFixedDelay(() -> { + if (System.currentTimeMillis() - timestamp > (period * 1000)) { + if (!dataPutList.isEmpty()) { + synchronized (this) { + sendBatchData(); + } + } + } + }, period, period, TimeUnit.SECONDS); + } + if (rateLimitThreshold > 0) { + rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD); + rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION); + count = 0; + JexlEngine jexlEngine = new JexlBuilder().create(); + jexlExpression = jexlEngine.createExpression(rateLimitExpression); + jexlContext = new MapContext(); + } } @Override public void invoke(FileChunk fileChunk, Context context) { - chunksInCounter.inc(); - bytesInCounter.inc(fileChunk.getLength()); - if (rateLimitThreshold > 0) { - count++; - if (System.currentTimeMillis() - timestamp < 1000 && count > rateLimitThreshold) { - if (checkFileChunk(fileChunk)) { - sendFileChunk(fileChunk); - } else { - rateLimitDropChunksCounter.inc(); - } - } else if (System.currentTimeMillis() - timestamp >= 1000) { - if (checkFileChunk(fileChunk)) { - sendFileChunk(fileChunk); - } else { - rateLimitDropChunksCounter.inc(); - timestamp = System.currentTimeMillis(); + synchronized (this) { + long currentTimeMillis = System.currentTimeMillis(); + chunksInCounter.inc(); + bytesInCounter.inc(fileChunk.getLength()); + if (rateLimitThreshold > 0) { + count++; + if (currentTimeMillis - timestamp < 1000 && count > rateLimitThreshold) { + if (checkFileChunk(fileChunk)) { + sendFileChunk(fileChunk); + } else { + rateLimitDropChunksCounter.inc(); + } + } else if (currentTimeMillis - timestamp >= 1000) { + if (checkFileChunk(fileChunk)) { + sendFileChunk(fileChunk); + } else { + rateLimitDropChunksCounter.inc(); + } + timestamp = currentTimeMillis; count = 0; + } else { + sendFileChunk(fileChunk); } } else { + timestamp = currentTimeMillis; sendFileChunk(fileChunk); } - } else { - sendFileChunk(fileChunk); } } @@ -208,7 +229,10 @@ public class HBaseSink extends RichSinkFunction { IoUtil.close(indexTimeTable); IoUtil.close(indexFilenameTable); IoUtil.close(syncHBaseConnection); - IoUtil.close(AsyncHBaseConnection); + IoUtil.close(asyncHBaseConnection); + if (executorService != null) { + executorService.shutdown(); + } } private void sendFileChunk(FileChunk fileChunk) { @@ -254,72 +278,52 @@ public class HBaseSink extends RichSinkFunction { metaPut.addColumn(HBaseColumnConstants.BYTE_FAMILY_META, HBaseColumnConstants.BYTE_COLUMN_LAST_MODIFIED, Bytes.toBytes(timestamp)); dataPutList.add(metaPut); } - chunkCount++; chunkSize += chunkLength; chunksOutCounter.inc(); bytesOutCounter.inc(chunkLength); calculateFileChunkMetrics(fileChunk); - if (chunkSize >= maxBatchSize || chunkCount >= maxBatchCount) { - if (isAsync) { - if (dataPutList.size() > 0) { - List> futures = asyncTable.batch(dataPutList); - CompletableFuture.supplyAsync(() -> { - for (CompletableFuture completableFuture : futures) { - completableFuture.whenCompleteAsync((result, error) -> { - if (error != null) { - LOG.error("put chunk to hbase error. ", error.getMessage()); - errorChunksCounter.inc(); - } - }); - } - return null; - }); - dataPutList.clear(); - } - if (indexTimePutList.size() > 0) { - asyncIndexTimeTable.batch(indexTimePutList); - indexTimePutList.clear(); - } - if (indexFilenamePutList.size() > 0) { - asyncIndexFilenameTable.batch(indexFilenamePutList); - indexFilenamePutList.clear(); - } - } else { - if (dataPutList.size() > 0) { - try { - table.batch(dataPutList, null); - } catch (IOException | InterruptedException e) { - LOG.error("put chunk to hbase data table error. ", e.getMessage()); - errorChunksCounter.inc(dataPutList.size()); - } finally { - dataPutList.clear(); - } - } - if (indexTimePutList.size() > 0) { - try { - indexTimeTable.batch(indexTimePutList, null); - } catch (IOException | InterruptedException e) { - LOG.error("put chunk to hbase index time table error. ", e.getMessage()); - } finally { - indexTimePutList.clear(); - } - } - if (indexFilenamePutList.size() > 0) { - try { - indexFilenameTable.batch(indexFilenamePutList, null); - } catch (IOException | InterruptedException e) { - LOG.error("put chunk to hbase index filename table error. ", e.getMessage()); - } finally { - indexFilenamePutList.clear(); - } - } - } - chunkSize = 0; - chunkCount = 0; + if (chunkSize >= maxBatchSize || dataPutList.size() >= maxBatchCount) { + sendBatchData(); } } } + private void sendBatchData() { + if (isAsync) { + List> futures = asyncTable.batch(dataPutList); + CompletableFuture.supplyAsync(() -> { + for (CompletableFuture completableFuture : futures) { + completableFuture.whenCompleteAsync((result, error) -> { + if (error != null) { + LOG.error("Put chunk to hbase error. ", error.getMessage()); + errorChunksCounter.inc(); + } + }); + } + return null; + }); + dataPutList.clear(); + asyncIndexTimeTable.batch(indexTimePutList); + indexTimePutList.clear(); + asyncIndexFilenameTable.batch(indexFilenamePutList); + indexFilenamePutList.clear(); + } else { + try { + table.batch(dataPutList, null); + indexTimeTable.batch(indexTimePutList, null); + indexFilenameTable.batch(indexFilenamePutList, null); + } catch (IOException | InterruptedException e) { + LOG.error("Put chunk to hbase error. ", e.getMessage()); + errorChunksCounter.inc(dataPutList.size()); + } finally { + dataPutList.clear(); + indexTimePutList.clear(); + indexFilenamePutList.clear(); + } + } + chunkSize = 0; + } + private boolean checkFileChunk(FileChunk fileChunk) { if (StrUtil.isNotEmpty(rateLimitExpression)) { jexlContext.set(fileChunk.getClass().getSimpleName(), fileChunk); diff --git a/src/main/java/com/zdjizhi/sink/HosSink.java b/src/main/java/com/zdjizhi/sink/HosSink.java index ac25022..b79794e 100644 --- a/src/main/java/com/zdjizhi/sink/HosSink.java +++ b/src/main/java/com/zdjizhi/sink/HosSink.java @@ -28,6 +28,9 @@ import org.apache.http.util.EntityUtils; import java.io.IOException; import java.net.ConnectException; import java.util.*; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; import static com.zdjizhi.utils.HttpHeaderConstants.*; import static com.zdjizhi.utils.PublicConstants.*; @@ -73,16 +76,16 @@ public class HosSink extends RichSinkFunction { private String token; private volatile String bathPutUrl; private HashMap hosMessage; - private String objectsMeta = ""; - private String objectsOffset = ""; + private String objectsMeta; + private String objectsOffset; private List byteList; private long maxBatchSize; private long maxBatchCount; - private long chunkSize = 0; - private int chunkCount = 0; + private long chunkSize; + private ScheduledExecutorService executorService; private long rateLimitThreshold; private String rateLimitExpression; - private long timestamp; + private volatile long timestamp; private long count; private JexlExpression jexlExpression; private JexlContext jexlContext; @@ -167,47 +170,67 @@ public class HosSink extends RichSinkFunction { } else { syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient(); } - bathPutUrl = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + PublicUtil.getUUID()) + "?multiFile"; - maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE); - maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT); - hosMessage = new HashMap<>(); - objectsMeta = ""; - objectsOffset = ""; - byteList = new ArrayList<>(); - rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD); - rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION); timestamp = System.currentTimeMillis(); - count = 0; - JexlEngine jexlEngine = new JexlBuilder().create(); - jexlExpression = jexlEngine.createExpression(rateLimitExpression); - jexlContext = new MapContext(); + if (configuration.get(Configs.SINK_BATCH)) { + bathPutUrl = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + PublicUtil.getUUID()) + "?multiFile"; + maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE); + maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT); + hosMessage = new HashMap<>(); + byteList = new ArrayList<>(); + objectsMeta = ""; + objectsOffset = ""; + chunkSize = 0; + executorService = Executors.newScheduledThreadPool(1); + long period = configuration.getInteger(Configs.SINK_BATCH_TIME); + executorService.scheduleWithFixedDelay(() -> { + if (System.currentTimeMillis() - timestamp > (period * 1000)) { + if (!byteList.isEmpty()) { + synchronized (this) { + sendBatchData(); + } + } + } + }, period, period, TimeUnit.SECONDS); + } + if (rateLimitThreshold > 0) { + rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD); + rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION); + count = 0; + JexlEngine jexlEngine = new JexlBuilder().create(); + jexlExpression = jexlEngine.createExpression(rateLimitExpression); + jexlContext = new MapContext(); + } } @Override public void invoke(FileChunk fileChunk, Context context) { - chunksInCounter.inc(); - bytesInCounter.inc(fileChunk.getLength()); - if (rateLimitThreshold > 0) { - count++; - if (System.currentTimeMillis() - timestamp < 1000 && count > rateLimitThreshold) { - if (checkFileChunk(fileChunk)) { - sendFileChunk(fileChunk); - } else { - rateLimitDropChunksCounter.inc(); - } - } else if (System.currentTimeMillis() - timestamp >= 1000) { - if (checkFileChunk(fileChunk)) { - sendFileChunk(fileChunk); - } else { - rateLimitDropChunksCounter.inc(); - timestamp = System.currentTimeMillis(); + synchronized (this) { + long currentTimeMillis = System.currentTimeMillis(); + chunksInCounter.inc(); + bytesInCounter.inc(fileChunk.getLength()); + if (rateLimitThreshold > 0) { + count++; + if (currentTimeMillis - timestamp < 1000 && count > rateLimitThreshold) { + if (checkFileChunk(fileChunk)) { + sendFileChunk(fileChunk); + } else { + rateLimitDropChunksCounter.inc(); + } + } else if (currentTimeMillis - timestamp >= 1000) { + if (checkFileChunk(fileChunk)) { + sendFileChunk(fileChunk); + } else { + rateLimitDropChunksCounter.inc(); + } + timestamp = currentTimeMillis; count = 0; + } else { + sendFileChunk(fileChunk); } } else { + timestamp = currentTimeMillis; sendFileChunk(fileChunk); } - } else { - sendFileChunk(fileChunk); } } @@ -215,6 +238,9 @@ public class HosSink extends RichSinkFunction { public void close() { IoUtil.close(syncHttpClient); IoUtil.close(asyncHttpClient); + if (executorService != null) { + executorService.shutdown(); + } } private void sendFileChunk(FileChunk fileChunk) { @@ -236,7 +262,7 @@ public class HosSink extends RichSinkFunction { } hosMessage.put(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + ""); Map metaMap = fileChunk.getMeta(); - if (metaMap != null && metaMap.size() > 0) { + if (metaMap != null && !metaMap.isEmpty()) { for (String meta : metaMap.keySet()) { hosMessage.put(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + ""); } @@ -245,28 +271,12 @@ public class HosSink extends RichSinkFunction { hosMessage.clear(); objectsOffset += chunkLength + ";"; byteList.add(data); - chunkCount++; chunkSize += chunkLength; chunksOutCounter.inc(); bytesOutCounter.inc(chunkLength); calculateFileChunkMetrics(fileChunk); - if (chunkSize >= maxBatchSize || chunkCount >= maxBatchCount) { - HttpPut httpPut = new HttpPut(bathPutUrl); - httpPut.setHeader(TOKEN, token); - httpPut.setHeader(HOS_UPLOAD_TYPE, UPLOAD_TYPE_APPENDV2); - httpPut.setHeader(HOS_COMBINE_MODE, fileChunk.getCombineMode()); - httpPut.setHeader(HOS_OBJECTS_META, objectsMeta); - httpPut.setHeader(HOS_OBJECTS_OFFSET, objectsOffset); - byte[][] bytes = new byte[byteList.size()][]; - byteList.toArray(bytes); - byte[] newData = ArrayUtil.addAll(bytes); - httpPut.setEntity(new ByteArrayEntity(newData)); - byteList.clear(); - executeRequest(httpPut); - objectsMeta = ""; - objectsOffset = ""; - chunkSize = 0; - chunkCount = 0; + if (chunkSize >= maxBatchSize || byteList.size() >= maxBatchCount) { + sendBatchData(); } } else { String url = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + fileChunk.getUuid()); @@ -292,7 +302,7 @@ public class HosSink extends RichSinkFunction { } httpPut.setHeader(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + ""); Map metaMap = fileChunk.getMeta(); - if (metaMap != null && metaMap.size() > 0) { + if (metaMap != null && !metaMap.isEmpty()) { for (String meta : metaMap.keySet()) { httpPut.setHeader(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + ""); } @@ -309,6 +319,24 @@ public class HosSink extends RichSinkFunction { } } + private void sendBatchData() { + HttpPut httpPut = new HttpPut(bathPutUrl); + httpPut.setHeader(TOKEN, token); + httpPut.setHeader(HOS_UPLOAD_TYPE, UPLOAD_TYPE_APPENDV2); + httpPut.setHeader(HOS_COMBINE_MODE, COMBINE_MODE_SEEK); + httpPut.setHeader(HOS_OBJECTS_META, objectsMeta); + httpPut.setHeader(HOS_OBJECTS_OFFSET, objectsOffset); + byte[][] bytes = new byte[byteList.size()][]; + byteList.toArray(bytes); + byte[] newData = ArrayUtil.addAll(bytes); + httpPut.setEntity(new ByteArrayEntity(newData)); + executeRequest(httpPut); + objectsMeta = ""; + objectsOffset = ""; + byteList.clear(); + chunkSize = 0; + } + private void executeRequest(HttpPut httpPut) { if (isAsync) { asyncHttpClient.execute(httpPut, new FutureCallback() { diff --git a/src/main/java/com/zdjizhi/sink/OssSinkByCaffeineCache.java b/src/main/java/com/zdjizhi/sink/OssSinkByCaffeineCache.java index 056d793..7d61967 100644 --- a/src/main/java/com/zdjizhi/sink/OssSinkByCaffeineCache.java +++ b/src/main/java/com/zdjizhi/sink/OssSinkByCaffeineCache.java @@ -73,7 +73,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction { private CloseableHttpClient syncHttpClient; private CloseableHttpAsyncClient asyncHttpClient; private List endpointList; - private CaffeineCacheUtil caffeineCacheUtil; private Cache cache; public OssSinkByCaffeineCache(Configuration configuration) { @@ -92,8 +91,7 @@ public class OssSinkByCaffeineCache extends RichSinkFunction { } else { syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient(); } - caffeineCacheUtil = CaffeineCacheUtil.getInstance(configuration); - cache = caffeineCacheUtil.getCaffeineCache(); + cache = CaffeineCacheUtil.getInstance(configuration).getCaffeineCache(); metricGroup.gauge("cacheLength", (Gauge) () -> cache.estimatedSize()); lessThan1KBChunksCounter = metricGroup.counter("lessThan1KBChunksCount"); between1KBAnd5KBChunksCounter = metricGroup.counter("between1KBAnd5KBChunksCount"); @@ -183,8 +181,8 @@ public class OssSinkByCaffeineCache extends RichSinkFunction { } FileChunk data = cache.getIfPresent(uuid + "_data"); if (data != null) { - sendFile(data, meta); cache.invalidate(uuid + "_data"); + sendFile(data, meta); } else { cache.put(fileChunk.getUuid() + "_meta", fileChunk); } @@ -193,8 +191,8 @@ public class OssSinkByCaffeineCache extends RichSinkFunction { bytesInCounter.inc(fileChunk.getLength()); FileChunk meta = cache.getIfPresent(uuid + "_meta"); if (meta != null) { - sendFile(fileChunk, meta.getMeta()); cache.invalidate(uuid + "_meta"); + sendFile(fileChunk, meta.getMeta()); } else { cache.put(fileChunk.getUuid() + "_data", fileChunk); } @@ -205,7 +203,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction { public void close() { IoUtil.close(syncHttpClient); IoUtil.close(asyncHttpClient); - caffeineCacheUtil.close(); } private void sendFile(FileChunk fileChunk, Map metaMap) { @@ -322,6 +319,15 @@ public class OssSinkByCaffeineCache extends RichSinkFunction { } else if (fileId.contains("_2")) { responseFilesCounter.inc(); } + if (fileChunk.getChunk() == null) { + nullChunksCounter.inc(); + if ("eml".equals(fileType)) { + nullEmlChunksCounter.inc(); + } else if ("txt".equals(fileType)) { + nullTxtChunksCounter.inc(); + } + LOG.info("send file data is null. " + fileChunk.toString()); + } if (fileChunk.getOffset() == 0 && fileChunk.getLastChunkFlag() == 1) { completeFilesCounter.inc(); if ("eml".equals(fileType)) { @@ -329,15 +335,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction { } else if ("txt".equals(fileType)) { completeTxtFilesCounter.inc(); } - if (fileChunk.getChunk() == null) { - nullChunksCounter.inc(); - if ("eml".equals(fileType)) { - nullEmlChunksCounter.inc(); - } else if ("txt".equals(fileType)) { - nullTxtChunksCounter.inc(); - } - LOG.info("send file data is null. " + fileChunk.toString()); - } if (fileId.contains("_1")) { completeRequestFilesCounter.inc(); } else if (fileId.contains("_2")) { diff --git a/src/main/java/com/zdjizhi/sink/OssSinkByEhcache.java b/src/main/java/com/zdjizhi/sink/OssSinkByEhcache.java deleted file mode 100644 index 7c4209e..0000000 --- a/src/main/java/com/zdjizhi/sink/OssSinkByEhcache.java +++ /dev/null @@ -1,396 +0,0 @@ -package com.zdjizhi.sink; - -import cn.hutool.core.io.IoUtil; -import cn.hutool.core.util.RandomUtil; -import cn.hutool.core.util.URLUtil; -import cn.hutool.log.Log; -import cn.hutool.log.LogFactory; -import com.zdjizhi.config.Configs; -import com.zdjizhi.pojo.FileChunk; -import com.zdjizhi.utils.EhcacheUtil; -import com.zdjizhi.utils.FormatUtils; -import com.zdjizhi.utils.HttpClientUtil; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.metrics.Counter; -import org.apache.flink.metrics.MeterView; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; -import org.apache.http.HttpResponse; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.concurrent.FutureCallback; -import org.apache.http.entity.ByteArrayEntity; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.nio.client.CloseableHttpAsyncClient; -import org.apache.http.util.EntityUtils; -import org.ehcache.Cache; -import org.ehcache.CacheManager; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -public class OssSinkByEhcache extends RichSinkFunction { - private static final Log LOG = LogFactory.get(); - - private final Configuration configuration; - public transient Counter chunksInCounter; - public transient Counter chunksOutCounter; - public transient Counter bytesInCounter; - public transient Counter bytesOutCounter; - public transient Counter errorChunksCounter; - public transient Counter fileMetasCounter; - public transient Counter requestFileMetasCounter; - public transient Counter responseFileMetasCounter; - public transient Counter requestFilesCounter; - public transient Counter responseFilesCounter; - public transient Counter emlChunksCounter; - public transient Counter txtChunksCounter; - public transient Counter completeFilesCounter; - public transient Counter completeEmlFilesCounter; - public transient Counter completeTxtFilesCounter; - public transient Counter completeRequestFilesCounter; - public transient Counter completeResponseFilesCounter; - public transient Counter nullChunksCounter; - public transient Counter nullTxtChunksCounter; - public transient Counter nullEmlChunksCounter; - public transient Counter lessThan1KBChunksCounter; - public transient Counter between1KBAnd5KBChunksCounter; - public transient Counter between5KBAnd10KBChunksCounter; - public transient Counter between10KBAnd100KBChunksCounter; - public transient Counter between100KBAnd1MBChunksCounter; - public transient Counter greaterThan1MBChunksCounter; - public transient Counter lessThan10KBEmlChunksCounter; - public transient Counter between1MBAnd10MBEmlChunksCounter; - public transient Counter between10KBAnd100KBEmlChunksCounter; - public transient Counter between100KBAnd1MBEmlChunksCounter; - public transient Counter greaterThan10MBEmlChunksCounter; - public transient Counter lessThan10KBTxtChunksCounter; - public transient Counter between1MBAnd10MBTxtChunksCounter; - public transient Counter between10KBAnd100KBTxtChunksCounter; - public transient Counter between100KBAnd1MBTxtChunksCounter; - public transient Counter greaterThan10MBTxtChunksCounter; - private boolean isAsync; - private CloseableHttpClient syncHttpClient; - private CloseableHttpAsyncClient asyncHttpClient; - private List endpointList; - private EhcacheUtil ehcacheUtil; - private Cache dataCache; - private Cache metaCache; - - public OssSinkByEhcache(Configuration configuration) { - this.configuration = configuration; - } - - @Override - public void open(Configuration parameters) throws Exception { - super.open(parameters); - MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup("file_chunk_combiner", "sink_oss"); - endpointList = Arrays.asList(configuration.get(Configs.SINK_OSS_ENDPOINT).split(",")); - isAsync = configuration.getBoolean(Configs.SINK_OSS_ASYNC); - if (isAsync) { - asyncHttpClient = HttpClientUtil.getInstance(configuration).getAsyncHttpClient(); - asyncHttpClient.start(); - } else { - syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient(); - } - ehcacheUtil = EhcacheUtil.getInstance(); - CacheManager ehcacheManager = EhcacheUtil.getInstance().getEhcacheManager(); - dataCache = ehcacheManager.getCache("data", String.class, FileChunk.class); - metaCache = ehcacheManager.getCache("meta", String.class, FileChunk.class); - lessThan1KBChunksCounter = metricGroup.counter("lessThan1KBChunksCount"); - between1KBAnd5KBChunksCounter = metricGroup.counter("between1KBAnd5KBChunksCount"); - between5KBAnd10KBChunksCounter = metricGroup.counter("between5KBAnd10KBChunksCount"); - between10KBAnd100KBChunksCounter = metricGroup.counter("between10KBAnd100KBChunksCount"); - between100KBAnd1MBChunksCounter = metricGroup.counter("between100KBAnd1MBChunksCount"); - greaterThan1MBChunksCounter = metricGroup.counter("greaterThan1MBChunksCount"); - metricGroup.meter("numLessThan1KBFilesOutPerSecond", new MeterView(lessThan1KBChunksCounter)); - metricGroup.meter("numBetween1KBAnd5KBFilesOutPerSecond", new MeterView(between1KBAnd5KBChunksCounter)); - metricGroup.meter("numBetween5KBAnd10KBFilesOutPerSecond", new MeterView(between5KBAnd10KBChunksCounter)); - metricGroup.meter("numBetween10KBAnd100KBFilesOutPerSecond", new MeterView(between10KBAnd100KBChunksCounter)); - metricGroup.meter("numBetween100KBAnd1MBFilesOutPerSecond", new MeterView(between100KBAnd1MBChunksCounter)); - metricGroup.meter("numGreaterThan1MBFilesOutPerSecond", new MeterView(greaterThan1MBChunksCounter)); - lessThan10KBEmlChunksCounter = metricGroup.counter("lessThan10KBEmlChunksCount"); - between10KBAnd100KBEmlChunksCounter = metricGroup.counter("between10KBAnd100KBEmlChunksCount"); - between100KBAnd1MBEmlChunksCounter = metricGroup.counter("between100KBAnd1MBEmlChunksCount"); - between1MBAnd10MBEmlChunksCounter = metricGroup.counter("between1MBAnd10MBEmlChunksCount"); - greaterThan10MBEmlChunksCounter = metricGroup.counter("greaterThan10MBEmlChunksCount"); - metricGroup.meter("numLessThan10KBEmlFilesOutPerSecond", new MeterView(lessThan10KBEmlChunksCounter)); - metricGroup.meter("numBetween10KBAnd100KBEmlFilesOutPerSecond", new MeterView(between10KBAnd100KBEmlChunksCounter)); - metricGroup.meter("numBetween100KBAnd1MBEmlFilesOutPerSecond", new MeterView(between100KBAnd1MBEmlChunksCounter)); - metricGroup.meter("numBetween1MBAnd10MBEmlFilesOutPerSecond", new MeterView(between1MBAnd10MBEmlChunksCounter)); - metricGroup.meter("numGreaterThan10MBEmlFilesOutPerSecond", new MeterView(greaterThan10MBEmlChunksCounter)); - lessThan10KBTxtChunksCounter = metricGroup.counter("lessThan10KBTxtChunksCount"); - between10KBAnd100KBTxtChunksCounter = metricGroup.counter("between10KBAnd100KBTxtChunksCount"); - between100KBAnd1MBTxtChunksCounter = metricGroup.counter("between100KBAnd1MBTxtChunksCount"); - between1MBAnd10MBTxtChunksCounter = metricGroup.counter("between1MBAnd10MBTxtChunksCount"); - greaterThan10MBTxtChunksCounter = metricGroup.counter("greaterThan10MBTxtChunksCount"); - metricGroup.meter("numLessThan10KBTxtChunksOutPerSecond", new MeterView(lessThan10KBTxtChunksCounter)); - metricGroup.meter("numBetween10KBAnd100KBTxtChunksOutPerSecond", new MeterView(between10KBAnd100KBTxtChunksCounter)); - metricGroup.meter("numBetween100KBAnd1MBTxtChunksOutPerSecond", new MeterView(between100KBAnd1MBTxtChunksCounter)); - metricGroup.meter("numBetween1MBAnd10MBTxtChunksOutPerSecond", new MeterView(between1MBAnd10MBTxtChunksCounter)); - metricGroup.meter("numGreaterThan10MBTxtChunksOutPerSecond", new MeterView(greaterThan10MBTxtChunksCounter)); - emlChunksCounter = metricGroup.counter("emlChunksCount"); - txtChunksCounter = metricGroup.counter("txtChunksCount"); - metricGroup.meter("numEmlChunksOutPerSecond", new MeterView(emlChunksCounter)); - metricGroup.meter("numTxtChunksOutPerSecond", new MeterView(txtChunksCounter)); - fileMetasCounter = metricGroup.counter("fileMetasCount"); - metricGroup.meter("numFileMetasInPerSecond", new MeterView(fileMetasCounter)); - requestFileMetasCounter = metricGroup.counter("requestFileMetasCount"); - responseFileMetasCounter = metricGroup.counter("responseFileMetasCount"); - requestFilesCounter = metricGroup.counter("requestFilesCount"); - responseFilesCounter = metricGroup.counter("responseFilesCount"); - metricGroup.meter("numRequestFileMetasInPerSecond", new MeterView(requestFileMetasCounter)); - metricGroup.meter("numResponseFileMetasInPerSecond", new MeterView(responseFileMetasCounter)); - metricGroup.meter("numRequestFilesOutPerSecond", new MeterView(requestFilesCounter)); - metricGroup.meter("numResponseFilesOutPerSecond", new MeterView(responseFilesCounter)); - errorChunksCounter = metricGroup.counter("errorChunksCount"); - chunksInCounter = metricGroup.counter("chunksInCount"); - chunksOutCounter = metricGroup.counter("chunksOutCount"); - bytesInCounter = metricGroup.counter("bytesInCount"); - bytesOutCounter = metricGroup.counter("bytesOutCount"); - metricGroup.meter("numChunksInPerSecond", new MeterView(chunksInCounter)); - metricGroup.meter("numChunksOutPerSecond", new MeterView(chunksOutCounter)); - metricGroup.meter("numBytesInPerSecond", new MeterView(bytesInCounter)); - metricGroup.meter("numBytesOutPerSecond", new MeterView(bytesOutCounter)); - metricGroup.meter("numErrorChunksPerSecond", new MeterView(errorChunksCounter)); - completeFilesCounter = metricGroup.counter("completeFilesCount"); - completeEmlFilesCounter = metricGroup.counter("completeEmlFilesCount"); - completeTxtFilesCounter = metricGroup.counter("completeTxtFilesCount"); - completeRequestFilesCounter = metricGroup.counter("completeRequestFilesCount"); - completeResponseFilesCounter = metricGroup.counter("completeResponseFilesCount"); - metricGroup.meter("numCompleteFilesOutPerSecond", new MeterView(completeFilesCounter)); - metricGroup.meter("numCompleteEmlFilesOutPerSecond", new MeterView(completeEmlFilesCounter)); - metricGroup.meter("numCompleteTxtFilesOutPerSecond", new MeterView(completeTxtFilesCounter)); - metricGroup.meter("numCompleteRequestFilesOutPerSecond", new MeterView(completeRequestFilesCounter)); - metricGroup.meter("numCompleteResponseFilesOutPerSecond", new MeterView(completeResponseFilesCounter)); - nullChunksCounter = metricGroup.counter("nullChunksCount"); - nullEmlChunksCounter = metricGroup.counter("nullTxtChunksCount"); - nullTxtChunksCounter = metricGroup.counter("nullEmlChunksCount"); - metricGroup.meter("numNullFilesOutPerSecond", new MeterView(nullChunksCounter)); - metricGroup.meter("numNullEmlFilesOutPerSecond", new MeterView(nullEmlChunksCounter)); - metricGroup.meter("numNullTxtFilesOutPerSecond", new MeterView(nullTxtChunksCounter)); - } - - @Override - public void invoke(FileChunk fileChunk, Context context) { - String uuid = fileChunk.getUuid(); - if (fileChunk.getMeta() != null) { //日志 - fileMetasCounter.inc(); - Map meta = fileChunk.getMeta(); - String fileId = meta.get("fileId").toString(); - if (fileId.contains("_1")) { - requestFileMetasCounter.inc(); - } else if (fileId.contains("_2")) { - responseFileMetasCounter.inc(); - } - FileChunk data = dataCache.get(uuid); - if (data != null) { - sendFile(data, meta); - dataCache.remove(uuid); - } else { - metaCache.put(fileChunk.getUuid(), fileChunk); - } - } else { //文件 - chunksInCounter.inc(); - bytesInCounter.inc(fileChunk.getLength()); - FileChunk meta = metaCache.get(uuid); - if (meta != null) { - sendFile(fileChunk, meta.getMeta()); - metaCache.remove(uuid); - } else { - dataCache.put(fileChunk.getUuid(), fileChunk); - } - } - } - - @Override - public void close() { - IoUtil.close(syncHttpClient); - IoUtil.close(asyncHttpClient); - ehcacheUtil.close(); - } - - private void sendFile(FileChunk fileChunk, Map metaMap) { - String url = ""; - try { - byte[] data; - String fileType = fileChunk.getFileType(); - if (fileChunk.getChunk() != null) { - data = fileChunk.getChunk(); - } else { - data = "".getBytes(); - } - String fileId = metaMap != null && metaMap.containsKey("fileId") ? metaMap.get("fileId").toString() : ""; - String policyId = metaMap != null && metaMap.containsKey("policyId") ? metaMap.get("policyId").toString() : "0"; - String serverIP = metaMap != null && metaMap.containsKey("serverIP") ? metaMap.get("serverIP").toString() : ""; - String serverPort = metaMap != null && metaMap.containsKey("serverPort") ? metaMap.get("serverPort").toString() : ""; - String clientIP = metaMap != null && metaMap.containsKey("clientIP") ? metaMap.get("clientIP").toString() : ""; - String clientPort = metaMap != null && metaMap.containsKey("clientPort") ? metaMap.get("clientPort").toString() : ""; - String domain = metaMap != null && metaMap.containsKey("httpHost") ? FormatUtils.getTopPrivateDomain(metaMap.get("httpHost").toString()) : ""; - String subscriberId = metaMap != null && metaMap.containsKey("subscriberId") ? metaMap.get("subscriberId").toString() : ""; - String foundTime = metaMap != null && metaMap.containsKey("foundTime") ? metaMap.get("foundTime").toString() : "0"; - url = URLUtil.normalize(endpointList.get(RandomUtil.randomInt(endpointList.size())) + "/v3/upload?" + - "cfg_id=" + policyId + - "&file_id=" + fileId + - "&file_type=" + fileType + - "&found_time=" + foundTime + - "&s_ip=" + serverIP + - "&s_port=" + serverPort + - "&d_ip=" + clientIP + - "&d_port=" + clientPort + - "&domain=" + domain + - "&account=" + subscriberId); - HttpPost httpPost = new HttpPost(url); - httpPost.setEntity(new ByteArrayEntity(data)); - executeRequest(httpPost, url); - chunksOutCounter.inc(); - bytesOutCounter.inc(data.length); - calculateFileChunkMetrics(fileChunk, fileId); - } catch (Exception e) { - LOG.error("post file error. current url: " + url, e); - errorChunksCounter.inc(); - } - } - - private void executeRequest(HttpPost httpPost, String url) { - if (isAsync) { - asyncHttpClient.execute(httpPost, new FutureCallback() { - @Override - public void completed(HttpResponse httpResponse) { - try { - String responseEntity = EntityUtils.toString(httpResponse.getEntity(), "UTF-8"); - if (httpResponse.getStatusLine().getStatusCode() == 200) { - if (!responseEntity.contains("\"code\":200")) { - LOG.error("post file error. current url: {}, msg: {}", url, responseEntity); - errorChunksCounter.inc(); - } - } else { - LOG.error("post file error. current url: {}, code: {}, msg: {}", url, httpResponse.getStatusLine().getStatusCode(), responseEntity); - errorChunksCounter.inc(); - } - } catch (IOException e) { - LOG.error("post file error. current url: " + url, e); - errorChunksCounter.inc(); - } - } - - @Override - public void failed(Exception ex) { - LOG.error("post file error. current url: " + url, ex); - errorChunksCounter.inc(); - } - - @Override - public void cancelled() { - - } - }); - } else { - CloseableHttpResponse response = null; - try { - response = syncHttpClient.execute(httpPost); - String responseEntity = EntityUtils.toString(response.getEntity(), "UTF-8"); - if (response.getStatusLine().getStatusCode() == 200) { - if (!responseEntity.contains("\"code\":200")) { - LOG.error("post file error. current url: {}, msg: {}", url, responseEntity); - errorChunksCounter.inc(); - } - } else { - LOG.error("post file error. current url: {}, code: {}, msg: {}", url, response.getStatusLine().getStatusCode(), responseEntity); - errorChunksCounter.inc(); - } - } catch (IOException e) { - LOG.error("post file error. current url: " + url, e); - errorChunksCounter.inc(); - } finally { - IoUtil.close(response); - } - } - } - - private void calculateFileChunkMetrics(FileChunk fileChunk, String fileId) { - String fileType = fileChunk.getFileType(); - long length = fileChunk.getLength(); - calculateChunkSize(length); - if ("eml".equals(fileType)) { - emlChunksCounter.inc(); - calculateEmlChunkSize(length); - } else if ("txt".equals(fileType)) { - txtChunksCounter.inc(); - calculateTxtChunkSize(length); - } - if (fileId.contains("_1")) { - requestFilesCounter.inc(); - } else if (fileId.contains("_2")) { - responseFilesCounter.inc(); - } - if (fileChunk.getOffset() == 0 && fileChunk.getLastChunkFlag() == 1) { - completeFilesCounter.inc(); - if ("eml".equals(fileType)) { - completeEmlFilesCounter.inc(); - } else if ("txt".equals(fileType)) { - completeTxtFilesCounter.inc(); - } - if (fileChunk.getChunk() == null) { - nullChunksCounter.inc(); - if ("eml".equals(fileType)) { - nullEmlChunksCounter.inc(); - } else if ("txt".equals(fileType)) { - nullTxtChunksCounter.inc(); - } - LOG.info("send file data is null. " + fileChunk.toString()); - } - if (fileId.contains("_1")) { - completeRequestFilesCounter.inc(); - } else if (fileId.contains("_2")) { - completeResponseFilesCounter.inc(); - } - } - } - - private void calculateChunkSize(long length) { - if (length <= 1024) { - lessThan1KBChunksCounter.inc(); - } else if (length <= 5 * 1024) { - between1KBAnd5KBChunksCounter.inc(); - } else if (length <= 10 * 1024) { - between5KBAnd10KBChunksCounter.inc(); - } else if (length <= 100 * 1024) { - between10KBAnd100KBChunksCounter.inc(); - } else if (length <= 1024 * 1024) { - between100KBAnd1MBChunksCounter.inc(); - } else { - greaterThan1MBChunksCounter.inc(); - } - } - - private void calculateEmlChunkSize(long length) { - if (length <= 10 * 1024) { - lessThan10KBEmlChunksCounter.inc(); - } else if (length <= 100 * 1024) { - between10KBAnd100KBEmlChunksCounter.inc(); - } else if (length <= 1024 * 1024) { - between100KBAnd1MBEmlChunksCounter.inc(); - } else if (length <= 10 * 1024 * 1024) { - between1MBAnd10MBEmlChunksCounter.inc(); - } else { - greaterThan10MBEmlChunksCounter.inc(); - } - } - - private void calculateTxtChunkSize(long length) { - if (length <= 10 * 1024) { - lessThan10KBTxtChunksCounter.inc(); - } else if (length <= 100 * 1024) { - between10KBAnd100KBTxtChunksCounter.inc(); - } else if (length <= 1024 * 1024) { - between100KBAnd1MBTxtChunksCounter.inc(); - } else if (length <= 10 * 1024 * 1024) { - between1MBAnd10MBTxtChunksCounter.inc(); - } else { - greaterThan10MBTxtChunksCounter.inc(); - } - } -} diff --git a/src/main/java/com/zdjizhi/trigger/IdleTimeTrigger.java b/src/main/java/com/zdjizhi/trigger/IdleTimeTrigger.java new file mode 100644 index 0000000..422b5d3 --- /dev/null +++ b/src/main/java/com/zdjizhi/trigger/IdleTimeTrigger.java @@ -0,0 +1,66 @@ +package com.zdjizhi.trigger; + +import org.apache.flink.api.common.functions.ReduceFunction; +import org.apache.flink.api.common.state.ReducingState; +import org.apache.flink.api.common.state.ReducingStateDescriptor; +import org.apache.flink.api.common.typeutils.base.LongSerializer; +import org.apache.flink.streaming.api.windowing.triggers.Trigger; +import org.apache.flink.streaming.api.windowing.triggers.TriggerResult; +import org.apache.flink.streaming.api.windowing.windows.TimeWindow; + +public class IdleTimeTrigger extends Trigger { + private static final long serialVersionUID = 1L; + + private final long maxIdleTime; + + private IdleTimeTrigger(long maxIdleTime) { + this.maxIdleTime = maxIdleTime; + } + + public static IdleTimeTrigger of(long maxIdleTime) { + return new IdleTimeTrigger<>(maxIdleTime); + } + + private final ReducingStateDescriptor processingTimeStateDesc = + new ReducingStateDescriptor<>("processTimer", new ReduceMax(), LongSerializer.INSTANCE); + + @Override + public TriggerResult onElement(Object element, long timestamp, TimeWindow window, TriggerContext ctx) throws Exception { + ReducingState fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc); + fireTimestamp.clear(); + long nextFireTimestamp = ctx.getCurrentProcessingTime() + maxIdleTime; + ctx.registerProcessingTimeTimer(nextFireTimestamp); + fireTimestamp.add(nextFireTimestamp); + return TriggerResult.CONTINUE; + } + + @Override + public TriggerResult onProcessingTime(long time, TimeWindow window, TriggerContext ctx) throws Exception { + ReducingState fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc); + if (fireTimestamp.get() != null && fireTimestamp.get() == time) { + fireTimestamp.clear(); + return TriggerResult.FIRE; + } + return TriggerResult.CONTINUE; + } + + @Override + public TriggerResult onEventTime(long time, TimeWindow window, TriggerContext ctx) { + return TriggerResult.CONTINUE; + } + + @Override + public void clear(TimeWindow window, TriggerContext ctx) { + ReducingState fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc); + fireTimestamp.clear(); + } + + private static class ReduceMax implements ReduceFunction { + private static final long serialVersionUID = 1L; + + @Override + public Long reduce(Long value1, Long value2) { + return Math.max(value1, value2); + } + } +} diff --git a/src/main/resources/common.properties b/src/main/resources/common.properties index d08d918..b710c00 100644 --- a/src/main/resources/common.properties +++ b/src/main/resources/common.properties @@ -38,9 +38,10 @@ sink.parallelism=1 #ѡhososshbase sink.type=hos sink.async=false -sink.batch=false +sink.batch=true sink.batch.count=1000 sink.batch.size=1048576 +sink.batch.time=10 #sink.filter.expression= #sink.rate.limit.threshold=0 #sink.rate.limit.exclusion.expression=FileChunk.fileType == "eml" diff --git a/src/main/resources/ehcache.xml b/src/main/resources/ehcache.xml deleted file mode 100644 index 85ee37e..0000000 --- a/src/main/resources/ehcache.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - java.lang.String - com.zdjizhi.pojo.FileChunk - - 600 - - - 100000 - - - - - - - java.lang.String - com.zdjizhi.pojo.FileChunk - - 1200 - - - 100000 - - - - - \ No newline at end of file