[TSG-20820]修复file-chunk-combiner的sink使用批量时导致文件无法下载的问题。

This commit is contained in:
houjinchuan
2024-05-08 10:56:17 +08:00
parent af97ab0a6d
commit 2e8b8f98e6
12 changed files with 273 additions and 769 deletions

View File

@@ -6,7 +6,7 @@
<groupId>com.zdjizhi</groupId> <groupId>com.zdjizhi</groupId>
<artifactId>file-chunk-combiner</artifactId> <artifactId>file-chunk-combiner</artifactId>
<version>1.3.0</version> <version>1.3.1</version>
<repositories> <repositories>
<repository> <repository>

View File

@@ -17,7 +17,6 @@ import com.zdjizhi.trigger.LastChunkTrigger;
import com.zdjizhi.trigger.MultipleTrigger; import com.zdjizhi.trigger.MultipleTrigger;
import org.apache.flink.api.common.eventtime.WatermarkStrategy; import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo; import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.*; import org.apache.flink.streaming.api.datastream.*;
@@ -174,35 +173,6 @@ public class FileChunkCombiner {
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta")) .filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta"))
.name("Filter: Map") .name("Filter: Map")
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM)); .setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
windowStream
.filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss"))
.name("Filter: Oss")
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
.union(fileMetaSessionSingleOutputStreamOperator, fileMetaProxySingleOutputStreamOperator)
.keyBy(new FileChunkKeySelector())
.addSink(new OssSinkByEhcache(configuration))
.name("Oss")
.setParallelism(configuration.get(Configs.SINK_PARALLELISM));
break;
case "oss-caffeine":
fileMetaSessionSingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC)))
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name(configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC))
.flatMap(new ParseSessionFileMetaFlatMapFunction())
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name("Map: Parse Session File Meta")
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_session_file_meta"))
.name("Filter: Map")
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
fileMetaProxySingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC)))
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name(configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC))
.flatMap(new ParseProxyFileMetaFlatMapFunction())
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name("Map: Parse Proxy File Meta")
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta"))
.name("Filter: Map")
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
windowStream windowStream
.filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss")) .filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss"))
.name("Filter: Oss") .name("Filter: Oss")
@@ -213,38 +183,6 @@ public class FileChunkCombiner {
.name("Oss") .name("Oss")
.setParallelism(configuration.get(Configs.SINK_PARALLELISM)); .setParallelism(configuration.get(Configs.SINK_PARALLELISM));
break; break;
case "test":
fileMetaSessionSingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC)))
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name(configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC))
.flatMap(new ParseSessionFileMetaFlatMapFunction())
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name("Map: Parse Session File Meta")
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_session_file_meta"))
.name("Filter: Map")
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
fileMetaProxySingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC)))
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name(configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC))
.flatMap(new ParseProxyFileMetaFlatMapFunction())
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name("Map: Parse Proxy File Meta")
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta"))
.name("Filter: Map")
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
KeyedStream<FileChunk, String> fileMetaStringKeyedStream = fileMetaSessionSingleOutputStreamOperator
.union(fileMetaProxySingleOutputStreamOperator)
.keyBy((KeySelector<FileChunk, String>) FileChunk::getUuid);
windowStream
.filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss"))
.name("Filter: Oss")
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
.keyBy((KeySelector<FileChunk, String>) FileChunk::getUuid)
.connect(fileMetaStringKeyedStream)
.process(new TestKeyedCoProcessFunction(configuration))
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
.name("Oss");
break;
} }
} }
environment.execute(configuration.get(Configs.FLINK_JOB_NAME)); environment.execute(configuration.get(Configs.FLINK_JOB_NAME));

View File

@@ -84,6 +84,9 @@ public class Configs {
public static final ConfigOption<Long> SINK_BATCH_SIZE = ConfigOptions.key("sink.batch.size") public static final ConfigOption<Long> SINK_BATCH_SIZE = ConfigOptions.key("sink.batch.size")
.longType() .longType()
.defaultValue(Long.MAX_VALUE); .defaultValue(Long.MAX_VALUE);
public static final ConfigOption<Integer> SINK_BATCH_TIME = ConfigOptions.key("sink.batch.time")
.intType()
.defaultValue(5);
public static final ConfigOption<String> SINK_FILTER_EXPRESSION = ConfigOptions.key("sink.filter.expression") public static final ConfigOption<String> SINK_FILTER_EXPRESSION = ConfigOptions.key("sink.filter.expression")
.stringType() .stringType()
.defaultValue(""); .defaultValue("");

View File

@@ -120,8 +120,8 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction<Fil
waitingToCombineChunkList.add(currentFileChunk.getChunk()); waitingToCombineChunkList.add(currentFileChunk.getChunk());
} }
} else {// 期望offset小于当前offset说明缺块 } else {// 期望offset小于当前offset说明缺块
if (waitingToCombineChunkList.size() > 0) {//将可合并的chunk合并清空集合 if (!waitingToCombineChunkList.isEmpty()) {//将可合并的chunk合并清空集合
FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), originalFileChunkList.get(0).getTimestamp(), null); FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), currentFileChunk.getTimestamp(), null);
if (fileChunk != null) { if (fileChunk != null) {
combinedFileChunkList.add(fileChunk); combinedFileChunkList.add(fileChunk);
} }
@@ -139,8 +139,8 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction<Fil
} }
} }
} }
if (waitingToCombineChunkList.size() > 0) { if (!waitingToCombineChunkList.isEmpty()) {
FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), originalFileChunkList.get(0).getTimestamp(), null); FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), currentFileChunk.getTimestamp(), null);
if (fileChunk != null) { if (fileChunk != null) {
combinedFileChunkList.add(fileChunk); combinedFileChunkList.add(fileChunk);
} }

View File

@@ -1,101 +0,0 @@
package com.zdjizhi.function;
import cn.hutool.core.io.IoUtil;
import com.zdjizhi.config.Configs;
import com.zdjizhi.pojo.FileChunk;
import com.zdjizhi.utils.HBaseConnectionUtil;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.metrics.Counter;
import org.apache.flink.metrics.MeterView;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class TestKeyedCoProcessFunction extends KeyedCoProcessFunction<String, FileChunk, FileChunk, FileChunk> {
private final Configuration configuration;
public transient Counter chunksInCounter;
public transient Counter fileMetasInCounter;
private boolean isAsync;
private Connection syncHBaseConnection;
private AsyncConnection AsyncHBaseConnection;
private Table table;
private AsyncTable<AdvancedScanResultConsumer> asyncTable;
private List<Put> dataPutList;
private List<Put> metaPutList;
private long maxBatchCount;
public TestKeyedCoProcessFunction(Configuration configuration) {
this.configuration = configuration;
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup("file_chunk_combiner", "add_file_meta");
chunksInCounter = metricGroup.counter("chunksInCount");
fileMetasInCounter = metricGroup.counter("fileMetasInCount");
metricGroup.meter("numChunksInPerSecond", new MeterView(chunksInCounter));
metricGroup.meter("numFileMetasInPerSecond", new MeterView(fileMetasInCounter));
isAsync = configuration.getBoolean(Configs.SINK_ASYNC);
if (isAsync) {
AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection();
asyncTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
} else {
syncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getSyncHBaseConnection();
table = syncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
}
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
dataPutList = new ArrayList<>();
metaPutList = new ArrayList<>();
}
@Override
public void processElement1(FileChunk value, Context ctx, Collector<FileChunk> out) throws IOException, InterruptedException {
chunksInCounter.inc();
Put dataPut = new Put(value.getUuid().getBytes());
dataPut.addColumn("meta".getBytes(), "data".getBytes(), (value.toString()).getBytes());
dataPutList.add(dataPut);
if (dataPutList.size() >= maxBatchCount) {
if (isAsync) {
asyncTable.batch(dataPutList);
dataPutList.clear();
} else {
table.batch(dataPutList, null);
dataPutList.clear();
}
}
}
@Override
public void processElement2(FileChunk value, Context ctx, Collector<FileChunk> out) throws IOException, InterruptedException {
fileMetasInCounter.inc();
Put metaPut = new Put(value.getUuid().getBytes());
metaPut.addColumn("meta".getBytes(), "meta".getBytes(), (value.getMeta().toString()).getBytes());
metaPutList.add(metaPut);
if (metaPutList.size() >= maxBatchCount) {
if (isAsync) {
asyncTable.batch(metaPutList);
metaPutList.clear();
} else {
table.batch(metaPutList, null);
metaPutList.clear();
}
}
}
@Override
public void close() {
IoUtil.close(table);
IoUtil.close(syncHBaseConnection);
IoUtil.close(AsyncHBaseConnection);
}
}

View File

@@ -23,6 +23,9 @@ import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import static com.zdjizhi.utils.PublicConstants.*; import static com.zdjizhi.utils.PublicConstants.*;
import static com.zdjizhi.utils.HBaseColumnConstants.*; import static com.zdjizhi.utils.HBaseColumnConstants.*;
@@ -61,7 +64,7 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
public transient Counter mediaChunksCounter; public transient Counter mediaChunksCounter;
private boolean isAsync; private boolean isAsync;
private Connection syncHBaseConnection; private Connection syncHBaseConnection;
private AsyncConnection AsyncHBaseConnection; private AsyncConnection asyncHBaseConnection;
private Table table; private Table table;
private Table indexTimeTable; private Table indexTimeTable;
private Table indexFilenameTable; private Table indexFilenameTable;
@@ -72,12 +75,12 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
private List<Put> indexTimePutList; private List<Put> indexTimePutList;
private List<Put> indexFilenamePutList; private List<Put> indexFilenamePutList;
private long chunkSize; private long chunkSize;
private int chunkCount;
private long maxBatchSize; private long maxBatchSize;
private long maxBatchCount; private long maxBatchCount;
private ScheduledExecutorService executorService;
private long rateLimitThreshold; private long rateLimitThreshold;
private String rateLimitExpression; private String rateLimitExpression;
private long timestamp; private volatile long timestamp;
private long count; private long count;
private JexlExpression jexlExpression; private JexlExpression jexlExpression;
private JexlContext jexlContext; private JexlContext jexlContext;
@@ -148,59 +151,77 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
metricGroup.meter("numMediaChunksOutPerSecond", new MeterView(mediaChunksCounter)); metricGroup.meter("numMediaChunksOutPerSecond", new MeterView(mediaChunksCounter));
isAsync = configuration.getBoolean(Configs.SINK_ASYNC); isAsync = configuration.getBoolean(Configs.SINK_ASYNC);
if (isAsync) { if (isAsync) {
AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection(); asyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection();
asyncTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET))); asyncTable = asyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
asyncIndexTimeTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET))); asyncIndexTimeTable = asyncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET)));
asyncIndexFilenameTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET))); asyncIndexFilenameTable = asyncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET)));
} else { } else {
syncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getSyncHBaseConnection(); syncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getSyncHBaseConnection();
table = syncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET))); table = syncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
indexTimeTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET))); indexTimeTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET)));
indexFilenameTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET))); indexFilenameTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET)));
} }
timestamp = System.currentTimeMillis();
if (configuration.get(Configs.SINK_BATCH)) {
maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE); maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE);
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT); maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
dataPutList = new ArrayList<>(); dataPutList = new ArrayList<>();
indexTimePutList = new ArrayList<>(); indexTimePutList = new ArrayList<>();
indexFilenamePutList = new ArrayList<>(); indexFilenamePutList = new ArrayList<>();
chunkSize = 0; chunkSize = 0;
chunkCount = 0; executorService = Executors.newScheduledThreadPool(1);
long period = configuration.getInteger(Configs.SINK_BATCH_TIME);
executorService.scheduleWithFixedDelay(() -> {
if (System.currentTimeMillis() - timestamp > (period * 1000)) {
if (!dataPutList.isEmpty()) {
synchronized (this) {
sendBatchData();
}
}
}
}, period, period, TimeUnit.SECONDS);
}
if (rateLimitThreshold > 0) {
rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD); rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD);
rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION); rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION);
timestamp = System.currentTimeMillis();
count = 0; count = 0;
JexlEngine jexlEngine = new JexlBuilder().create(); JexlEngine jexlEngine = new JexlBuilder().create();
jexlExpression = jexlEngine.createExpression(rateLimitExpression); jexlExpression = jexlEngine.createExpression(rateLimitExpression);
jexlContext = new MapContext(); jexlContext = new MapContext();
} }
}
@Override @Override
public void invoke(FileChunk fileChunk, Context context) { public void invoke(FileChunk fileChunk, Context context) {
synchronized (this) {
long currentTimeMillis = System.currentTimeMillis();
chunksInCounter.inc(); chunksInCounter.inc();
bytesInCounter.inc(fileChunk.getLength()); bytesInCounter.inc(fileChunk.getLength());
if (rateLimitThreshold > 0) { if (rateLimitThreshold > 0) {
count++; count++;
if (System.currentTimeMillis() - timestamp < 1000 && count > rateLimitThreshold) { if (currentTimeMillis - timestamp < 1000 && count > rateLimitThreshold) {
if (checkFileChunk(fileChunk)) { if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk); sendFileChunk(fileChunk);
} else { } else {
rateLimitDropChunksCounter.inc(); rateLimitDropChunksCounter.inc();
} }
} else if (System.currentTimeMillis() - timestamp >= 1000) { } else if (currentTimeMillis - timestamp >= 1000) {
if (checkFileChunk(fileChunk)) { if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk); sendFileChunk(fileChunk);
} else { } else {
rateLimitDropChunksCounter.inc(); rateLimitDropChunksCounter.inc();
timestamp = System.currentTimeMillis(); }
timestamp = currentTimeMillis;
count = 0; count = 0;
}
} else { } else {
sendFileChunk(fileChunk); sendFileChunk(fileChunk);
} }
} else { } else {
timestamp = currentTimeMillis;
sendFileChunk(fileChunk); sendFileChunk(fileChunk);
} }
} }
}
@Override @Override
public void close() { public void close() {
@@ -208,7 +229,10 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
IoUtil.close(indexTimeTable); IoUtil.close(indexTimeTable);
IoUtil.close(indexFilenameTable); IoUtil.close(indexFilenameTable);
IoUtil.close(syncHBaseConnection); IoUtil.close(syncHBaseConnection);
IoUtil.close(AsyncHBaseConnection); IoUtil.close(asyncHBaseConnection);
if (executorService != null) {
executorService.shutdown();
}
} }
private void sendFileChunk(FileChunk fileChunk) { private void sendFileChunk(FileChunk fileChunk) {
@@ -254,20 +278,24 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
metaPut.addColumn(HBaseColumnConstants.BYTE_FAMILY_META, HBaseColumnConstants.BYTE_COLUMN_LAST_MODIFIED, Bytes.toBytes(timestamp)); metaPut.addColumn(HBaseColumnConstants.BYTE_FAMILY_META, HBaseColumnConstants.BYTE_COLUMN_LAST_MODIFIED, Bytes.toBytes(timestamp));
dataPutList.add(metaPut); dataPutList.add(metaPut);
} }
chunkCount++;
chunkSize += chunkLength; chunkSize += chunkLength;
chunksOutCounter.inc(); chunksOutCounter.inc();
bytesOutCounter.inc(chunkLength); bytesOutCounter.inc(chunkLength);
calculateFileChunkMetrics(fileChunk); calculateFileChunkMetrics(fileChunk);
if (chunkSize >= maxBatchSize || chunkCount >= maxBatchCount) { if (chunkSize >= maxBatchSize || dataPutList.size() >= maxBatchCount) {
sendBatchData();
}
}
}
private void sendBatchData() {
if (isAsync) { if (isAsync) {
if (dataPutList.size() > 0) {
List<CompletableFuture<Object>> futures = asyncTable.batch(dataPutList); List<CompletableFuture<Object>> futures = asyncTable.batch(dataPutList);
CompletableFuture.supplyAsync(() -> { CompletableFuture.supplyAsync(() -> {
for (CompletableFuture<Object> completableFuture : futures) { for (CompletableFuture<Object> completableFuture : futures) {
completableFuture.whenCompleteAsync((result, error) -> { completableFuture.whenCompleteAsync((result, error) -> {
if (error != null) { if (error != null) {
LOG.error("put chunk to hbase error. ", error.getMessage()); LOG.error("Put chunk to hbase error. ", error.getMessage());
errorChunksCounter.inc(); errorChunksCounter.inc();
} }
}); });
@@ -275,49 +303,25 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
return null; return null;
}); });
dataPutList.clear(); dataPutList.clear();
}
if (indexTimePutList.size() > 0) {
asyncIndexTimeTable.batch(indexTimePutList); asyncIndexTimeTable.batch(indexTimePutList);
indexTimePutList.clear(); indexTimePutList.clear();
}
if (indexFilenamePutList.size() > 0) {
asyncIndexFilenameTable.batch(indexFilenamePutList); asyncIndexFilenameTable.batch(indexFilenamePutList);
indexFilenamePutList.clear(); indexFilenamePutList.clear();
}
} else { } else {
if (dataPutList.size() > 0) {
try { try {
table.batch(dataPutList, null); table.batch(dataPutList, null);
indexTimeTable.batch(indexTimePutList, null);
indexFilenameTable.batch(indexFilenamePutList, null);
} catch (IOException | InterruptedException e) { } catch (IOException | InterruptedException e) {
LOG.error("put chunk to hbase data table error. ", e.getMessage()); LOG.error("Put chunk to hbase error. ", e.getMessage());
errorChunksCounter.inc(dataPutList.size()); errorChunksCounter.inc(dataPutList.size());
} finally { } finally {
dataPutList.clear(); dataPutList.clear();
}
}
if (indexTimePutList.size() > 0) {
try {
indexTimeTable.batch(indexTimePutList, null);
} catch (IOException | InterruptedException e) {
LOG.error("put chunk to hbase index time table error. ", e.getMessage());
} finally {
indexTimePutList.clear(); indexTimePutList.clear();
}
}
if (indexFilenamePutList.size() > 0) {
try {
indexFilenameTable.batch(indexFilenamePutList, null);
} catch (IOException | InterruptedException e) {
LOG.error("put chunk to hbase index filename table error. ", e.getMessage());
} finally {
indexFilenamePutList.clear(); indexFilenamePutList.clear();
} }
} }
}
chunkSize = 0; chunkSize = 0;
chunkCount = 0;
}
}
} }
private boolean checkFileChunk(FileChunk fileChunk) { private boolean checkFileChunk(FileChunk fileChunk) {

View File

@@ -28,6 +28,9 @@ import org.apache.http.util.EntityUtils;
import java.io.IOException; import java.io.IOException;
import java.net.ConnectException; import java.net.ConnectException;
import java.util.*; import java.util.*;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import static com.zdjizhi.utils.HttpHeaderConstants.*; import static com.zdjizhi.utils.HttpHeaderConstants.*;
import static com.zdjizhi.utils.PublicConstants.*; import static com.zdjizhi.utils.PublicConstants.*;
@@ -73,16 +76,16 @@ public class HosSink extends RichSinkFunction<FileChunk> {
private String token; private String token;
private volatile String bathPutUrl; private volatile String bathPutUrl;
private HashMap<String, String> hosMessage; private HashMap<String, String> hosMessage;
private String objectsMeta = ""; private String objectsMeta;
private String objectsOffset = ""; private String objectsOffset;
private List<byte[]> byteList; private List<byte[]> byteList;
private long maxBatchSize; private long maxBatchSize;
private long maxBatchCount; private long maxBatchCount;
private long chunkSize = 0; private long chunkSize;
private int chunkCount = 0; private ScheduledExecutorService executorService;
private long rateLimitThreshold; private long rateLimitThreshold;
private String rateLimitExpression; private String rateLimitExpression;
private long timestamp; private volatile long timestamp;
private long count; private long count;
private JexlExpression jexlExpression; private JexlExpression jexlExpression;
private JexlContext jexlContext; private JexlContext jexlContext;
@@ -167,54 +170,77 @@ public class HosSink extends RichSinkFunction<FileChunk> {
} else { } else {
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient(); syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
} }
timestamp = System.currentTimeMillis();
if (configuration.get(Configs.SINK_BATCH)) {
bathPutUrl = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + PublicUtil.getUUID()) + "?multiFile"; bathPutUrl = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + PublicUtil.getUUID()) + "?multiFile";
maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE); maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE);
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT); maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
hosMessage = new HashMap<>(); hosMessage = new HashMap<>();
byteList = new ArrayList<>();
objectsMeta = ""; objectsMeta = "";
objectsOffset = ""; objectsOffset = "";
byteList = new ArrayList<>(); chunkSize = 0;
executorService = Executors.newScheduledThreadPool(1);
long period = configuration.getInteger(Configs.SINK_BATCH_TIME);
executorService.scheduleWithFixedDelay(() -> {
if (System.currentTimeMillis() - timestamp > (period * 1000)) {
if (!byteList.isEmpty()) {
synchronized (this) {
sendBatchData();
}
}
}
}, period, period, TimeUnit.SECONDS);
}
if (rateLimitThreshold > 0) {
rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD); rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD);
rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION); rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION);
timestamp = System.currentTimeMillis();
count = 0; count = 0;
JexlEngine jexlEngine = new JexlBuilder().create(); JexlEngine jexlEngine = new JexlBuilder().create();
jexlExpression = jexlEngine.createExpression(rateLimitExpression); jexlExpression = jexlEngine.createExpression(rateLimitExpression);
jexlContext = new MapContext(); jexlContext = new MapContext();
} }
}
@Override @Override
public void invoke(FileChunk fileChunk, Context context) { public void invoke(FileChunk fileChunk, Context context) {
synchronized (this) {
long currentTimeMillis = System.currentTimeMillis();
chunksInCounter.inc(); chunksInCounter.inc();
bytesInCounter.inc(fileChunk.getLength()); bytesInCounter.inc(fileChunk.getLength());
if (rateLimitThreshold > 0) { if (rateLimitThreshold > 0) {
count++; count++;
if (System.currentTimeMillis() - timestamp < 1000 && count > rateLimitThreshold) { if (currentTimeMillis - timestamp < 1000 && count > rateLimitThreshold) {
if (checkFileChunk(fileChunk)) { if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk); sendFileChunk(fileChunk);
} else { } else {
rateLimitDropChunksCounter.inc(); rateLimitDropChunksCounter.inc();
} }
} else if (System.currentTimeMillis() - timestamp >= 1000) { } else if (currentTimeMillis - timestamp >= 1000) {
if (checkFileChunk(fileChunk)) { if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk); sendFileChunk(fileChunk);
} else { } else {
rateLimitDropChunksCounter.inc(); rateLimitDropChunksCounter.inc();
timestamp = System.currentTimeMillis(); }
timestamp = currentTimeMillis;
count = 0; count = 0;
}
} else { } else {
sendFileChunk(fileChunk); sendFileChunk(fileChunk);
} }
} else { } else {
timestamp = currentTimeMillis;
sendFileChunk(fileChunk); sendFileChunk(fileChunk);
} }
} }
}
@Override @Override
public void close() { public void close() {
IoUtil.close(syncHttpClient); IoUtil.close(syncHttpClient);
IoUtil.close(asyncHttpClient); IoUtil.close(asyncHttpClient);
if (executorService != null) {
executorService.shutdown();
}
} }
private void sendFileChunk(FileChunk fileChunk) { private void sendFileChunk(FileChunk fileChunk) {
@@ -236,7 +262,7 @@ public class HosSink extends RichSinkFunction<FileChunk> {
} }
hosMessage.put(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + ""); hosMessage.put(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + "");
Map<String, Object> metaMap = fileChunk.getMeta(); Map<String, Object> metaMap = fileChunk.getMeta();
if (metaMap != null && metaMap.size() > 0) { if (metaMap != null && !metaMap.isEmpty()) {
for (String meta : metaMap.keySet()) { for (String meta : metaMap.keySet()) {
hosMessage.put(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + ""); hosMessage.put(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + "");
} }
@@ -245,28 +271,12 @@ public class HosSink extends RichSinkFunction<FileChunk> {
hosMessage.clear(); hosMessage.clear();
objectsOffset += chunkLength + ";"; objectsOffset += chunkLength + ";";
byteList.add(data); byteList.add(data);
chunkCount++;
chunkSize += chunkLength; chunkSize += chunkLength;
chunksOutCounter.inc(); chunksOutCounter.inc();
bytesOutCounter.inc(chunkLength); bytesOutCounter.inc(chunkLength);
calculateFileChunkMetrics(fileChunk); calculateFileChunkMetrics(fileChunk);
if (chunkSize >= maxBatchSize || chunkCount >= maxBatchCount) { if (chunkSize >= maxBatchSize || byteList.size() >= maxBatchCount) {
HttpPut httpPut = new HttpPut(bathPutUrl); sendBatchData();
httpPut.setHeader(TOKEN, token);
httpPut.setHeader(HOS_UPLOAD_TYPE, UPLOAD_TYPE_APPENDV2);
httpPut.setHeader(HOS_COMBINE_MODE, fileChunk.getCombineMode());
httpPut.setHeader(HOS_OBJECTS_META, objectsMeta);
httpPut.setHeader(HOS_OBJECTS_OFFSET, objectsOffset);
byte[][] bytes = new byte[byteList.size()][];
byteList.toArray(bytes);
byte[] newData = ArrayUtil.addAll(bytes);
httpPut.setEntity(new ByteArrayEntity(newData));
byteList.clear();
executeRequest(httpPut);
objectsMeta = "";
objectsOffset = "";
chunkSize = 0;
chunkCount = 0;
} }
} else { } else {
String url = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + fileChunk.getUuid()); String url = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + fileChunk.getUuid());
@@ -292,7 +302,7 @@ public class HosSink extends RichSinkFunction<FileChunk> {
} }
httpPut.setHeader(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + ""); httpPut.setHeader(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + "");
Map<String, Object> metaMap = fileChunk.getMeta(); Map<String, Object> metaMap = fileChunk.getMeta();
if (metaMap != null && metaMap.size() > 0) { if (metaMap != null && !metaMap.isEmpty()) {
for (String meta : metaMap.keySet()) { for (String meta : metaMap.keySet()) {
httpPut.setHeader(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + ""); httpPut.setHeader(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + "");
} }
@@ -309,6 +319,24 @@ public class HosSink extends RichSinkFunction<FileChunk> {
} }
} }
private void sendBatchData() {
HttpPut httpPut = new HttpPut(bathPutUrl);
httpPut.setHeader(TOKEN, token);
httpPut.setHeader(HOS_UPLOAD_TYPE, UPLOAD_TYPE_APPENDV2);
httpPut.setHeader(HOS_COMBINE_MODE, COMBINE_MODE_SEEK);
httpPut.setHeader(HOS_OBJECTS_META, objectsMeta);
httpPut.setHeader(HOS_OBJECTS_OFFSET, objectsOffset);
byte[][] bytes = new byte[byteList.size()][];
byteList.toArray(bytes);
byte[] newData = ArrayUtil.addAll(bytes);
httpPut.setEntity(new ByteArrayEntity(newData));
executeRequest(httpPut);
objectsMeta = "";
objectsOffset = "";
byteList.clear();
chunkSize = 0;
}
private void executeRequest(HttpPut httpPut) { private void executeRequest(HttpPut httpPut) {
if (isAsync) { if (isAsync) {
asyncHttpClient.execute(httpPut, new FutureCallback<HttpResponse>() { asyncHttpClient.execute(httpPut, new FutureCallback<HttpResponse>() {

View File

@@ -73,7 +73,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
private CloseableHttpClient syncHttpClient; private CloseableHttpClient syncHttpClient;
private CloseableHttpAsyncClient asyncHttpClient; private CloseableHttpAsyncClient asyncHttpClient;
private List<String> endpointList; private List<String> endpointList;
private CaffeineCacheUtil caffeineCacheUtil;
private Cache<String, FileChunk> cache; private Cache<String, FileChunk> cache;
public OssSinkByCaffeineCache(Configuration configuration) { public OssSinkByCaffeineCache(Configuration configuration) {
@@ -92,8 +91,7 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
} else { } else {
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient(); syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
} }
caffeineCacheUtil = CaffeineCacheUtil.getInstance(configuration); cache = CaffeineCacheUtil.getInstance(configuration).getCaffeineCache();
cache = caffeineCacheUtil.getCaffeineCache();
metricGroup.gauge("cacheLength", (Gauge<Long>) () -> cache.estimatedSize()); metricGroup.gauge("cacheLength", (Gauge<Long>) () -> cache.estimatedSize());
lessThan1KBChunksCounter = metricGroup.counter("lessThan1KBChunksCount"); lessThan1KBChunksCounter = metricGroup.counter("lessThan1KBChunksCount");
between1KBAnd5KBChunksCounter = metricGroup.counter("between1KBAnd5KBChunksCount"); between1KBAnd5KBChunksCounter = metricGroup.counter("between1KBAnd5KBChunksCount");
@@ -183,8 +181,8 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
} }
FileChunk data = cache.getIfPresent(uuid + "_data"); FileChunk data = cache.getIfPresent(uuid + "_data");
if (data != null) { if (data != null) {
sendFile(data, meta);
cache.invalidate(uuid + "_data"); cache.invalidate(uuid + "_data");
sendFile(data, meta);
} else { } else {
cache.put(fileChunk.getUuid() + "_meta", fileChunk); cache.put(fileChunk.getUuid() + "_meta", fileChunk);
} }
@@ -193,8 +191,8 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
bytesInCounter.inc(fileChunk.getLength()); bytesInCounter.inc(fileChunk.getLength());
FileChunk meta = cache.getIfPresent(uuid + "_meta"); FileChunk meta = cache.getIfPresent(uuid + "_meta");
if (meta != null) { if (meta != null) {
sendFile(fileChunk, meta.getMeta());
cache.invalidate(uuid + "_meta"); cache.invalidate(uuid + "_meta");
sendFile(fileChunk, meta.getMeta());
} else { } else {
cache.put(fileChunk.getUuid() + "_data", fileChunk); cache.put(fileChunk.getUuid() + "_data", fileChunk);
} }
@@ -205,7 +203,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
public void close() { public void close() {
IoUtil.close(syncHttpClient); IoUtil.close(syncHttpClient);
IoUtil.close(asyncHttpClient); IoUtil.close(asyncHttpClient);
caffeineCacheUtil.close();
} }
private void sendFile(FileChunk fileChunk, Map<String, Object> metaMap) { private void sendFile(FileChunk fileChunk, Map<String, Object> metaMap) {
@@ -322,13 +319,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
} else if (fileId.contains("_2")) { } else if (fileId.contains("_2")) {
responseFilesCounter.inc(); responseFilesCounter.inc();
} }
if (fileChunk.getOffset() == 0 && fileChunk.getLastChunkFlag() == 1) {
completeFilesCounter.inc();
if ("eml".equals(fileType)) {
completeEmlFilesCounter.inc();
} else if ("txt".equals(fileType)) {
completeTxtFilesCounter.inc();
}
if (fileChunk.getChunk() == null) { if (fileChunk.getChunk() == null) {
nullChunksCounter.inc(); nullChunksCounter.inc();
if ("eml".equals(fileType)) { if ("eml".equals(fileType)) {
@@ -338,6 +328,13 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
} }
LOG.info("send file data is null. " + fileChunk.toString()); LOG.info("send file data is null. " + fileChunk.toString());
} }
if (fileChunk.getOffset() == 0 && fileChunk.getLastChunkFlag() == 1) {
completeFilesCounter.inc();
if ("eml".equals(fileType)) {
completeEmlFilesCounter.inc();
} else if ("txt".equals(fileType)) {
completeTxtFilesCounter.inc();
}
if (fileId.contains("_1")) { if (fileId.contains("_1")) {
completeRequestFilesCounter.inc(); completeRequestFilesCounter.inc();
} else if (fileId.contains("_2")) { } else if (fileId.contains("_2")) {

View File

@@ -1,396 +0,0 @@
package com.zdjizhi.sink;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.core.util.URLUtil;
import cn.hutool.log.Log;
import cn.hutool.log.LogFactory;
import com.zdjizhi.config.Configs;
import com.zdjizhi.pojo.FileChunk;
import com.zdjizhi.utils.EhcacheUtil;
import com.zdjizhi.utils.FormatUtils;
import com.zdjizhi.utils.HttpClientUtil;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.metrics.Counter;
import org.apache.flink.metrics.MeterView;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.concurrent.FutureCallback;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
import org.apache.http.util.EntityUtils;
import org.ehcache.Cache;
import org.ehcache.CacheManager;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
public class OssSinkByEhcache extends RichSinkFunction<FileChunk> {
private static final Log LOG = LogFactory.get();
private final Configuration configuration;
public transient Counter chunksInCounter;
public transient Counter chunksOutCounter;
public transient Counter bytesInCounter;
public transient Counter bytesOutCounter;
public transient Counter errorChunksCounter;
public transient Counter fileMetasCounter;
public transient Counter requestFileMetasCounter;
public transient Counter responseFileMetasCounter;
public transient Counter requestFilesCounter;
public transient Counter responseFilesCounter;
public transient Counter emlChunksCounter;
public transient Counter txtChunksCounter;
public transient Counter completeFilesCounter;
public transient Counter completeEmlFilesCounter;
public transient Counter completeTxtFilesCounter;
public transient Counter completeRequestFilesCounter;
public transient Counter completeResponseFilesCounter;
public transient Counter nullChunksCounter;
public transient Counter nullTxtChunksCounter;
public transient Counter nullEmlChunksCounter;
public transient Counter lessThan1KBChunksCounter;
public transient Counter between1KBAnd5KBChunksCounter;
public transient Counter between5KBAnd10KBChunksCounter;
public transient Counter between10KBAnd100KBChunksCounter;
public transient Counter between100KBAnd1MBChunksCounter;
public transient Counter greaterThan1MBChunksCounter;
public transient Counter lessThan10KBEmlChunksCounter;
public transient Counter between1MBAnd10MBEmlChunksCounter;
public transient Counter between10KBAnd100KBEmlChunksCounter;
public transient Counter between100KBAnd1MBEmlChunksCounter;
public transient Counter greaterThan10MBEmlChunksCounter;
public transient Counter lessThan10KBTxtChunksCounter;
public transient Counter between1MBAnd10MBTxtChunksCounter;
public transient Counter between10KBAnd100KBTxtChunksCounter;
public transient Counter between100KBAnd1MBTxtChunksCounter;
public transient Counter greaterThan10MBTxtChunksCounter;
private boolean isAsync;
private CloseableHttpClient syncHttpClient;
private CloseableHttpAsyncClient asyncHttpClient;
private List<String> endpointList;
private EhcacheUtil ehcacheUtil;
private Cache<String, FileChunk> dataCache;
private Cache<String, FileChunk> metaCache;
public OssSinkByEhcache(Configuration configuration) {
this.configuration = configuration;
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup("file_chunk_combiner", "sink_oss");
endpointList = Arrays.asList(configuration.get(Configs.SINK_OSS_ENDPOINT).split(","));
isAsync = configuration.getBoolean(Configs.SINK_OSS_ASYNC);
if (isAsync) {
asyncHttpClient = HttpClientUtil.getInstance(configuration).getAsyncHttpClient();
asyncHttpClient.start();
} else {
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
}
ehcacheUtil = EhcacheUtil.getInstance();
CacheManager ehcacheManager = EhcacheUtil.getInstance().getEhcacheManager();
dataCache = ehcacheManager.getCache("data", String.class, FileChunk.class);
metaCache = ehcacheManager.getCache("meta", String.class, FileChunk.class);
lessThan1KBChunksCounter = metricGroup.counter("lessThan1KBChunksCount");
between1KBAnd5KBChunksCounter = metricGroup.counter("between1KBAnd5KBChunksCount");
between5KBAnd10KBChunksCounter = metricGroup.counter("between5KBAnd10KBChunksCount");
between10KBAnd100KBChunksCounter = metricGroup.counter("between10KBAnd100KBChunksCount");
between100KBAnd1MBChunksCounter = metricGroup.counter("between100KBAnd1MBChunksCount");
greaterThan1MBChunksCounter = metricGroup.counter("greaterThan1MBChunksCount");
metricGroup.meter("numLessThan1KBFilesOutPerSecond", new MeterView(lessThan1KBChunksCounter));
metricGroup.meter("numBetween1KBAnd5KBFilesOutPerSecond", new MeterView(between1KBAnd5KBChunksCounter));
metricGroup.meter("numBetween5KBAnd10KBFilesOutPerSecond", new MeterView(between5KBAnd10KBChunksCounter));
metricGroup.meter("numBetween10KBAnd100KBFilesOutPerSecond", new MeterView(between10KBAnd100KBChunksCounter));
metricGroup.meter("numBetween100KBAnd1MBFilesOutPerSecond", new MeterView(between100KBAnd1MBChunksCounter));
metricGroup.meter("numGreaterThan1MBFilesOutPerSecond", new MeterView(greaterThan1MBChunksCounter));
lessThan10KBEmlChunksCounter = metricGroup.counter("lessThan10KBEmlChunksCount");
between10KBAnd100KBEmlChunksCounter = metricGroup.counter("between10KBAnd100KBEmlChunksCount");
between100KBAnd1MBEmlChunksCounter = metricGroup.counter("between100KBAnd1MBEmlChunksCount");
between1MBAnd10MBEmlChunksCounter = metricGroup.counter("between1MBAnd10MBEmlChunksCount");
greaterThan10MBEmlChunksCounter = metricGroup.counter("greaterThan10MBEmlChunksCount");
metricGroup.meter("numLessThan10KBEmlFilesOutPerSecond", new MeterView(lessThan10KBEmlChunksCounter));
metricGroup.meter("numBetween10KBAnd100KBEmlFilesOutPerSecond", new MeterView(between10KBAnd100KBEmlChunksCounter));
metricGroup.meter("numBetween100KBAnd1MBEmlFilesOutPerSecond", new MeterView(between100KBAnd1MBEmlChunksCounter));
metricGroup.meter("numBetween1MBAnd10MBEmlFilesOutPerSecond", new MeterView(between1MBAnd10MBEmlChunksCounter));
metricGroup.meter("numGreaterThan10MBEmlFilesOutPerSecond", new MeterView(greaterThan10MBEmlChunksCounter));
lessThan10KBTxtChunksCounter = metricGroup.counter("lessThan10KBTxtChunksCount");
between10KBAnd100KBTxtChunksCounter = metricGroup.counter("between10KBAnd100KBTxtChunksCount");
between100KBAnd1MBTxtChunksCounter = metricGroup.counter("between100KBAnd1MBTxtChunksCount");
between1MBAnd10MBTxtChunksCounter = metricGroup.counter("between1MBAnd10MBTxtChunksCount");
greaterThan10MBTxtChunksCounter = metricGroup.counter("greaterThan10MBTxtChunksCount");
metricGroup.meter("numLessThan10KBTxtChunksOutPerSecond", new MeterView(lessThan10KBTxtChunksCounter));
metricGroup.meter("numBetween10KBAnd100KBTxtChunksOutPerSecond", new MeterView(between10KBAnd100KBTxtChunksCounter));
metricGroup.meter("numBetween100KBAnd1MBTxtChunksOutPerSecond", new MeterView(between100KBAnd1MBTxtChunksCounter));
metricGroup.meter("numBetween1MBAnd10MBTxtChunksOutPerSecond", new MeterView(between1MBAnd10MBTxtChunksCounter));
metricGroup.meter("numGreaterThan10MBTxtChunksOutPerSecond", new MeterView(greaterThan10MBTxtChunksCounter));
emlChunksCounter = metricGroup.counter("emlChunksCount");
txtChunksCounter = metricGroup.counter("txtChunksCount");
metricGroup.meter("numEmlChunksOutPerSecond", new MeterView(emlChunksCounter));
metricGroup.meter("numTxtChunksOutPerSecond", new MeterView(txtChunksCounter));
fileMetasCounter = metricGroup.counter("fileMetasCount");
metricGroup.meter("numFileMetasInPerSecond", new MeterView(fileMetasCounter));
requestFileMetasCounter = metricGroup.counter("requestFileMetasCount");
responseFileMetasCounter = metricGroup.counter("responseFileMetasCount");
requestFilesCounter = metricGroup.counter("requestFilesCount");
responseFilesCounter = metricGroup.counter("responseFilesCount");
metricGroup.meter("numRequestFileMetasInPerSecond", new MeterView(requestFileMetasCounter));
metricGroup.meter("numResponseFileMetasInPerSecond", new MeterView(responseFileMetasCounter));
metricGroup.meter("numRequestFilesOutPerSecond", new MeterView(requestFilesCounter));
metricGroup.meter("numResponseFilesOutPerSecond", new MeterView(responseFilesCounter));
errorChunksCounter = metricGroup.counter("errorChunksCount");
chunksInCounter = metricGroup.counter("chunksInCount");
chunksOutCounter = metricGroup.counter("chunksOutCount");
bytesInCounter = metricGroup.counter("bytesInCount");
bytesOutCounter = metricGroup.counter("bytesOutCount");
metricGroup.meter("numChunksInPerSecond", new MeterView(chunksInCounter));
metricGroup.meter("numChunksOutPerSecond", new MeterView(chunksOutCounter));
metricGroup.meter("numBytesInPerSecond", new MeterView(bytesInCounter));
metricGroup.meter("numBytesOutPerSecond", new MeterView(bytesOutCounter));
metricGroup.meter("numErrorChunksPerSecond", new MeterView(errorChunksCounter));
completeFilesCounter = metricGroup.counter("completeFilesCount");
completeEmlFilesCounter = metricGroup.counter("completeEmlFilesCount");
completeTxtFilesCounter = metricGroup.counter("completeTxtFilesCount");
completeRequestFilesCounter = metricGroup.counter("completeRequestFilesCount");
completeResponseFilesCounter = metricGroup.counter("completeResponseFilesCount");
metricGroup.meter("numCompleteFilesOutPerSecond", new MeterView(completeFilesCounter));
metricGroup.meter("numCompleteEmlFilesOutPerSecond", new MeterView(completeEmlFilesCounter));
metricGroup.meter("numCompleteTxtFilesOutPerSecond", new MeterView(completeTxtFilesCounter));
metricGroup.meter("numCompleteRequestFilesOutPerSecond", new MeterView(completeRequestFilesCounter));
metricGroup.meter("numCompleteResponseFilesOutPerSecond", new MeterView(completeResponseFilesCounter));
nullChunksCounter = metricGroup.counter("nullChunksCount");
nullEmlChunksCounter = metricGroup.counter("nullTxtChunksCount");
nullTxtChunksCounter = metricGroup.counter("nullEmlChunksCount");
metricGroup.meter("numNullFilesOutPerSecond", new MeterView(nullChunksCounter));
metricGroup.meter("numNullEmlFilesOutPerSecond", new MeterView(nullEmlChunksCounter));
metricGroup.meter("numNullTxtFilesOutPerSecond", new MeterView(nullTxtChunksCounter));
}
@Override
public void invoke(FileChunk fileChunk, Context context) {
String uuid = fileChunk.getUuid();
if (fileChunk.getMeta() != null) { //日志
fileMetasCounter.inc();
Map<String, Object> meta = fileChunk.getMeta();
String fileId = meta.get("fileId").toString();
if (fileId.contains("_1")) {
requestFileMetasCounter.inc();
} else if (fileId.contains("_2")) {
responseFileMetasCounter.inc();
}
FileChunk data = dataCache.get(uuid);
if (data != null) {
sendFile(data, meta);
dataCache.remove(uuid);
} else {
metaCache.put(fileChunk.getUuid(), fileChunk);
}
} else { //文件
chunksInCounter.inc();
bytesInCounter.inc(fileChunk.getLength());
FileChunk meta = metaCache.get(uuid);
if (meta != null) {
sendFile(fileChunk, meta.getMeta());
metaCache.remove(uuid);
} else {
dataCache.put(fileChunk.getUuid(), fileChunk);
}
}
}
@Override
public void close() {
IoUtil.close(syncHttpClient);
IoUtil.close(asyncHttpClient);
ehcacheUtil.close();
}
private void sendFile(FileChunk fileChunk, Map<String, Object> metaMap) {
String url = "";
try {
byte[] data;
String fileType = fileChunk.getFileType();
if (fileChunk.getChunk() != null) {
data = fileChunk.getChunk();
} else {
data = "".getBytes();
}
String fileId = metaMap != null && metaMap.containsKey("fileId") ? metaMap.get("fileId").toString() : "";
String policyId = metaMap != null && metaMap.containsKey("policyId") ? metaMap.get("policyId").toString() : "0";
String serverIP = metaMap != null && metaMap.containsKey("serverIP") ? metaMap.get("serverIP").toString() : "";
String serverPort = metaMap != null && metaMap.containsKey("serverPort") ? metaMap.get("serverPort").toString() : "";
String clientIP = metaMap != null && metaMap.containsKey("clientIP") ? metaMap.get("clientIP").toString() : "";
String clientPort = metaMap != null && metaMap.containsKey("clientPort") ? metaMap.get("clientPort").toString() : "";
String domain = metaMap != null && metaMap.containsKey("httpHost") ? FormatUtils.getTopPrivateDomain(metaMap.get("httpHost").toString()) : "";
String subscriberId = metaMap != null && metaMap.containsKey("subscriberId") ? metaMap.get("subscriberId").toString() : "";
String foundTime = metaMap != null && metaMap.containsKey("foundTime") ? metaMap.get("foundTime").toString() : "0";
url = URLUtil.normalize(endpointList.get(RandomUtil.randomInt(endpointList.size())) + "/v3/upload?" +
"cfg_id=" + policyId +
"&file_id=" + fileId +
"&file_type=" + fileType +
"&found_time=" + foundTime +
"&s_ip=" + serverIP +
"&s_port=" + serverPort +
"&d_ip=" + clientIP +
"&d_port=" + clientPort +
"&domain=" + domain +
"&account=" + subscriberId);
HttpPost httpPost = new HttpPost(url);
httpPost.setEntity(new ByteArrayEntity(data));
executeRequest(httpPost, url);
chunksOutCounter.inc();
bytesOutCounter.inc(data.length);
calculateFileChunkMetrics(fileChunk, fileId);
} catch (Exception e) {
LOG.error("post file error. current url: " + url, e);
errorChunksCounter.inc();
}
}
private void executeRequest(HttpPost httpPost, String url) {
if (isAsync) {
asyncHttpClient.execute(httpPost, new FutureCallback<HttpResponse>() {
@Override
public void completed(HttpResponse httpResponse) {
try {
String responseEntity = EntityUtils.toString(httpResponse.getEntity(), "UTF-8");
if (httpResponse.getStatusLine().getStatusCode() == 200) {
if (!responseEntity.contains("\"code\":200")) {
LOG.error("post file error. current url: {}, msg: {}", url, responseEntity);
errorChunksCounter.inc();
}
} else {
LOG.error("post file error. current url: {}, code: {}, msg: {}", url, httpResponse.getStatusLine().getStatusCode(), responseEntity);
errorChunksCounter.inc();
}
} catch (IOException e) {
LOG.error("post file error. current url: " + url, e);
errorChunksCounter.inc();
}
}
@Override
public void failed(Exception ex) {
LOG.error("post file error. current url: " + url, ex);
errorChunksCounter.inc();
}
@Override
public void cancelled() {
}
});
} else {
CloseableHttpResponse response = null;
try {
response = syncHttpClient.execute(httpPost);
String responseEntity = EntityUtils.toString(response.getEntity(), "UTF-8");
if (response.getStatusLine().getStatusCode() == 200) {
if (!responseEntity.contains("\"code\":200")) {
LOG.error("post file error. current url: {}, msg: {}", url, responseEntity);
errorChunksCounter.inc();
}
} else {
LOG.error("post file error. current url: {}, code: {}, msg: {}", url, response.getStatusLine().getStatusCode(), responseEntity);
errorChunksCounter.inc();
}
} catch (IOException e) {
LOG.error("post file error. current url: " + url, e);
errorChunksCounter.inc();
} finally {
IoUtil.close(response);
}
}
}
private void calculateFileChunkMetrics(FileChunk fileChunk, String fileId) {
String fileType = fileChunk.getFileType();
long length = fileChunk.getLength();
calculateChunkSize(length);
if ("eml".equals(fileType)) {
emlChunksCounter.inc();
calculateEmlChunkSize(length);
} else if ("txt".equals(fileType)) {
txtChunksCounter.inc();
calculateTxtChunkSize(length);
}
if (fileId.contains("_1")) {
requestFilesCounter.inc();
} else if (fileId.contains("_2")) {
responseFilesCounter.inc();
}
if (fileChunk.getOffset() == 0 && fileChunk.getLastChunkFlag() == 1) {
completeFilesCounter.inc();
if ("eml".equals(fileType)) {
completeEmlFilesCounter.inc();
} else if ("txt".equals(fileType)) {
completeTxtFilesCounter.inc();
}
if (fileChunk.getChunk() == null) {
nullChunksCounter.inc();
if ("eml".equals(fileType)) {
nullEmlChunksCounter.inc();
} else if ("txt".equals(fileType)) {
nullTxtChunksCounter.inc();
}
LOG.info("send file data is null. " + fileChunk.toString());
}
if (fileId.contains("_1")) {
completeRequestFilesCounter.inc();
} else if (fileId.contains("_2")) {
completeResponseFilesCounter.inc();
}
}
}
private void calculateChunkSize(long length) {
if (length <= 1024) {
lessThan1KBChunksCounter.inc();
} else if (length <= 5 * 1024) {
between1KBAnd5KBChunksCounter.inc();
} else if (length <= 10 * 1024) {
between5KBAnd10KBChunksCounter.inc();
} else if (length <= 100 * 1024) {
between10KBAnd100KBChunksCounter.inc();
} else if (length <= 1024 * 1024) {
between100KBAnd1MBChunksCounter.inc();
} else {
greaterThan1MBChunksCounter.inc();
}
}
private void calculateEmlChunkSize(long length) {
if (length <= 10 * 1024) {
lessThan10KBEmlChunksCounter.inc();
} else if (length <= 100 * 1024) {
between10KBAnd100KBEmlChunksCounter.inc();
} else if (length <= 1024 * 1024) {
between100KBAnd1MBEmlChunksCounter.inc();
} else if (length <= 10 * 1024 * 1024) {
between1MBAnd10MBEmlChunksCounter.inc();
} else {
greaterThan10MBEmlChunksCounter.inc();
}
}
private void calculateTxtChunkSize(long length) {
if (length <= 10 * 1024) {
lessThan10KBTxtChunksCounter.inc();
} else if (length <= 100 * 1024) {
between10KBAnd100KBTxtChunksCounter.inc();
} else if (length <= 1024 * 1024) {
between100KBAnd1MBTxtChunksCounter.inc();
} else if (length <= 10 * 1024 * 1024) {
between1MBAnd10MBTxtChunksCounter.inc();
} else {
greaterThan10MBTxtChunksCounter.inc();
}
}
}

View File

@@ -0,0 +1,66 @@
package com.zdjizhi.trigger;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.state.ReducingState;
import org.apache.flink.api.common.state.ReducingStateDescriptor;
import org.apache.flink.api.common.typeutils.base.LongSerializer;
import org.apache.flink.streaming.api.windowing.triggers.Trigger;
import org.apache.flink.streaming.api.windowing.triggers.TriggerResult;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
public class IdleTimeTrigger<W extends TimeWindow> extends Trigger<Object, TimeWindow> {
private static final long serialVersionUID = 1L;
private final long maxIdleTime;
private IdleTimeTrigger(long maxIdleTime) {
this.maxIdleTime = maxIdleTime;
}
public static <W extends TimeWindow> IdleTimeTrigger<TimeWindow> of(long maxIdleTime) {
return new IdleTimeTrigger<>(maxIdleTime);
}
private final ReducingStateDescriptor<Long> processingTimeStateDesc =
new ReducingStateDescriptor<>("processTimer", new ReduceMax(), LongSerializer.INSTANCE);
@Override
public TriggerResult onElement(Object element, long timestamp, TimeWindow window, TriggerContext ctx) throws Exception {
ReducingState<Long> fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc);
fireTimestamp.clear();
long nextFireTimestamp = ctx.getCurrentProcessingTime() + maxIdleTime;
ctx.registerProcessingTimeTimer(nextFireTimestamp);
fireTimestamp.add(nextFireTimestamp);
return TriggerResult.CONTINUE;
}
@Override
public TriggerResult onProcessingTime(long time, TimeWindow window, TriggerContext ctx) throws Exception {
ReducingState<Long> fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc);
if (fireTimestamp.get() != null && fireTimestamp.get() == time) {
fireTimestamp.clear();
return TriggerResult.FIRE;
}
return TriggerResult.CONTINUE;
}
@Override
public TriggerResult onEventTime(long time, TimeWindow window, TriggerContext ctx) {
return TriggerResult.CONTINUE;
}
@Override
public void clear(TimeWindow window, TriggerContext ctx) {
ReducingState<Long> fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc);
fireTimestamp.clear();
}
private static class ReduceMax implements ReduceFunction<Long> {
private static final long serialVersionUID = 1L;
@Override
public Long reduce(Long value1, Long value2) {
return Math.max(value1, value2);
}
}
}

View File

@@ -38,9 +38,10 @@ sink.parallelism=1
#<23><>ѡhos<6F><73>oss<73><73>hbase #<23><>ѡhos<6F><73>oss<73><73>hbase
sink.type=hos sink.type=hos
sink.async=false sink.async=false
sink.batch=false sink.batch=true
sink.batch.count=1000 sink.batch.count=1000
sink.batch.size=1048576 sink.batch.size=1048576
sink.batch.time=10
#sink.filter.expression= #sink.filter.expression=
#sink.rate.limit.threshold=0 #sink.rate.limit.threshold=0
#sink.rate.limit.exclusion.expression=FileChunk.fileType == "eml" #sink.rate.limit.exclusion.expression=FileChunk.fileType == "eml"

View File

@@ -1,36 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://www.ehcache.org/v3"
xmlns:jsr107="http://www.ehcache.org/v3/jsr107"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.ehcache.org/v3
http://www.ehcache.org/schema/ehcache-core-3.10.xsd
http://www.ehcache.org/v3/jsr107
http://www.ehcache.org/schema/ehcache-107-ext-3.10.xsd">
<!-- <persistence directory="D:\myCache"/>-->
<cache alias="data">
<key-type>java.lang.String</key-type>
<value-type>com.zdjizhi.pojo.FileChunk</value-type>
<expiry>
<ttl unit="seconds">600</ttl>
</expiry>
<resources>
<heap unit="entries">100000</heap>
<!-- <offheap unit="GB">15</offheap>-->
<!-- <disk persistent="true" unit="MB">500</disk>-->
</resources>
</cache>
<cache alias="meta">
<key-type>java.lang.String</key-type>
<value-type>com.zdjizhi.pojo.FileChunk</value-type>
<expiry>
<ttl unit="seconds">1200</ttl>
</expiry>
<resources>
<heap unit="entries">100000</heap>
<!-- <offheap unit="GB">5</offheap>-->
<!-- <disk persistent="true" unit="MB">500</disk>-->
</resources>
</cache>
</config>