[TSG-20820]修复file-chunk-combiner的sink使用批量时导致文件无法下载的问题。

This commit is contained in:
houjinchuan
2024-05-08 10:56:17 +08:00
parent af97ab0a6d
commit 2e8b8f98e6
12 changed files with 273 additions and 769 deletions

View File

@@ -17,7 +17,6 @@ import com.zdjizhi.trigger.LastChunkTrigger;
import com.zdjizhi.trigger.MultipleTrigger;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.*;
@@ -174,35 +173,6 @@ public class FileChunkCombiner {
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta"))
.name("Filter: Map")
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
windowStream
.filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss"))
.name("Filter: Oss")
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
.union(fileMetaSessionSingleOutputStreamOperator, fileMetaProxySingleOutputStreamOperator)
.keyBy(new FileChunkKeySelector())
.addSink(new OssSinkByEhcache(configuration))
.name("Oss")
.setParallelism(configuration.get(Configs.SINK_PARALLELISM));
break;
case "oss-caffeine":
fileMetaSessionSingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC)))
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name(configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC))
.flatMap(new ParseSessionFileMetaFlatMapFunction())
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name("Map: Parse Session File Meta")
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_session_file_meta"))
.name("Filter: Map")
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
fileMetaProxySingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC)))
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name(configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC))
.flatMap(new ParseProxyFileMetaFlatMapFunction())
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name("Map: Parse Proxy File Meta")
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta"))
.name("Filter: Map")
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
windowStream
.filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss"))
.name("Filter: Oss")
@@ -213,38 +183,6 @@ public class FileChunkCombiner {
.name("Oss")
.setParallelism(configuration.get(Configs.SINK_PARALLELISM));
break;
case "test":
fileMetaSessionSingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC)))
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name(configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC))
.flatMap(new ParseSessionFileMetaFlatMapFunction())
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name("Map: Parse Session File Meta")
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_session_file_meta"))
.name("Filter: Map")
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
fileMetaProxySingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC)))
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name(configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC))
.flatMap(new ParseProxyFileMetaFlatMapFunction())
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
.name("Map: Parse Proxy File Meta")
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta"))
.name("Filter: Map")
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
KeyedStream<FileChunk, String> fileMetaStringKeyedStream = fileMetaSessionSingleOutputStreamOperator
.union(fileMetaProxySingleOutputStreamOperator)
.keyBy((KeySelector<FileChunk, String>) FileChunk::getUuid);
windowStream
.filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss"))
.name("Filter: Oss")
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
.keyBy((KeySelector<FileChunk, String>) FileChunk::getUuid)
.connect(fileMetaStringKeyedStream)
.process(new TestKeyedCoProcessFunction(configuration))
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
.name("Oss");
break;
}
}
environment.execute(configuration.get(Configs.FLINK_JOB_NAME));

View File

@@ -84,6 +84,9 @@ public class Configs {
public static final ConfigOption<Long> SINK_BATCH_SIZE = ConfigOptions.key("sink.batch.size")
.longType()
.defaultValue(Long.MAX_VALUE);
public static final ConfigOption<Integer> SINK_BATCH_TIME = ConfigOptions.key("sink.batch.time")
.intType()
.defaultValue(5);
public static final ConfigOption<String> SINK_FILTER_EXPRESSION = ConfigOptions.key("sink.filter.expression")
.stringType()
.defaultValue("");

View File

@@ -120,8 +120,8 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction<Fil
waitingToCombineChunkList.add(currentFileChunk.getChunk());
}
} else {// 期望offset小于当前offset说明缺块
if (waitingToCombineChunkList.size() > 0) {//将可合并的chunk合并清空集合
FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), originalFileChunkList.get(0).getTimestamp(), null);
if (!waitingToCombineChunkList.isEmpty()) {//将可合并的chunk合并清空集合
FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), currentFileChunk.getTimestamp(), null);
if (fileChunk != null) {
combinedFileChunkList.add(fileChunk);
}
@@ -139,8 +139,8 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction<Fil
}
}
}
if (waitingToCombineChunkList.size() > 0) {
FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), originalFileChunkList.get(0).getTimestamp(), null);
if (!waitingToCombineChunkList.isEmpty()) {
FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), currentFileChunk.getTimestamp(), null);
if (fileChunk != null) {
combinedFileChunkList.add(fileChunk);
}

View File

@@ -1,101 +0,0 @@
package com.zdjizhi.function;
import cn.hutool.core.io.IoUtil;
import com.zdjizhi.config.Configs;
import com.zdjizhi.pojo.FileChunk;
import com.zdjizhi.utils.HBaseConnectionUtil;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.metrics.Counter;
import org.apache.flink.metrics.MeterView;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class TestKeyedCoProcessFunction extends KeyedCoProcessFunction<String, FileChunk, FileChunk, FileChunk> {
private final Configuration configuration;
public transient Counter chunksInCounter;
public transient Counter fileMetasInCounter;
private boolean isAsync;
private Connection syncHBaseConnection;
private AsyncConnection AsyncHBaseConnection;
private Table table;
private AsyncTable<AdvancedScanResultConsumer> asyncTable;
private List<Put> dataPutList;
private List<Put> metaPutList;
private long maxBatchCount;
public TestKeyedCoProcessFunction(Configuration configuration) {
this.configuration = configuration;
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup("file_chunk_combiner", "add_file_meta");
chunksInCounter = metricGroup.counter("chunksInCount");
fileMetasInCounter = metricGroup.counter("fileMetasInCount");
metricGroup.meter("numChunksInPerSecond", new MeterView(chunksInCounter));
metricGroup.meter("numFileMetasInPerSecond", new MeterView(fileMetasInCounter));
isAsync = configuration.getBoolean(Configs.SINK_ASYNC);
if (isAsync) {
AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection();
asyncTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
} else {
syncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getSyncHBaseConnection();
table = syncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
}
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
dataPutList = new ArrayList<>();
metaPutList = new ArrayList<>();
}
@Override
public void processElement1(FileChunk value, Context ctx, Collector<FileChunk> out) throws IOException, InterruptedException {
chunksInCounter.inc();
Put dataPut = new Put(value.getUuid().getBytes());
dataPut.addColumn("meta".getBytes(), "data".getBytes(), (value.toString()).getBytes());
dataPutList.add(dataPut);
if (dataPutList.size() >= maxBatchCount) {
if (isAsync) {
asyncTable.batch(dataPutList);
dataPutList.clear();
} else {
table.batch(dataPutList, null);
dataPutList.clear();
}
}
}
@Override
public void processElement2(FileChunk value, Context ctx, Collector<FileChunk> out) throws IOException, InterruptedException {
fileMetasInCounter.inc();
Put metaPut = new Put(value.getUuid().getBytes());
metaPut.addColumn("meta".getBytes(), "meta".getBytes(), (value.getMeta().toString()).getBytes());
metaPutList.add(metaPut);
if (metaPutList.size() >= maxBatchCount) {
if (isAsync) {
asyncTable.batch(metaPutList);
metaPutList.clear();
} else {
table.batch(metaPutList, null);
metaPutList.clear();
}
}
}
@Override
public void close() {
IoUtil.close(table);
IoUtil.close(syncHBaseConnection);
IoUtil.close(AsyncHBaseConnection);
}
}

View File

@@ -23,6 +23,9 @@ import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import static com.zdjizhi.utils.PublicConstants.*;
import static com.zdjizhi.utils.HBaseColumnConstants.*;
@@ -61,7 +64,7 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
public transient Counter mediaChunksCounter;
private boolean isAsync;
private Connection syncHBaseConnection;
private AsyncConnection AsyncHBaseConnection;
private AsyncConnection asyncHBaseConnection;
private Table table;
private Table indexTimeTable;
private Table indexFilenameTable;
@@ -72,12 +75,12 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
private List<Put> indexTimePutList;
private List<Put> indexFilenamePutList;
private long chunkSize;
private int chunkCount;
private long maxBatchSize;
private long maxBatchCount;
private ScheduledExecutorService executorService;
private long rateLimitThreshold;
private String rateLimitExpression;
private long timestamp;
private volatile long timestamp;
private long count;
private JexlExpression jexlExpression;
private JexlContext jexlContext;
@@ -148,57 +151,75 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
metricGroup.meter("numMediaChunksOutPerSecond", new MeterView(mediaChunksCounter));
isAsync = configuration.getBoolean(Configs.SINK_ASYNC);
if (isAsync) {
AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection();
asyncTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
asyncIndexTimeTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET)));
asyncIndexFilenameTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET)));
asyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection();
asyncTable = asyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
asyncIndexTimeTable = asyncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET)));
asyncIndexFilenameTable = asyncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET)));
} else {
syncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getSyncHBaseConnection();
table = syncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
indexTimeTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET)));
indexFilenameTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET)));
}
maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE);
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
dataPutList = new ArrayList<>();
indexTimePutList = new ArrayList<>();
indexFilenamePutList = new ArrayList<>();
chunkSize = 0;
chunkCount = 0;
rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD);
rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION);
timestamp = System.currentTimeMillis();
count = 0;
JexlEngine jexlEngine = new JexlBuilder().create();
jexlExpression = jexlEngine.createExpression(rateLimitExpression);
jexlContext = new MapContext();
if (configuration.get(Configs.SINK_BATCH)) {
maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE);
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
dataPutList = new ArrayList<>();
indexTimePutList = new ArrayList<>();
indexFilenamePutList = new ArrayList<>();
chunkSize = 0;
executorService = Executors.newScheduledThreadPool(1);
long period = configuration.getInteger(Configs.SINK_BATCH_TIME);
executorService.scheduleWithFixedDelay(() -> {
if (System.currentTimeMillis() - timestamp > (period * 1000)) {
if (!dataPutList.isEmpty()) {
synchronized (this) {
sendBatchData();
}
}
}
}, period, period, TimeUnit.SECONDS);
}
if (rateLimitThreshold > 0) {
rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD);
rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION);
count = 0;
JexlEngine jexlEngine = new JexlBuilder().create();
jexlExpression = jexlEngine.createExpression(rateLimitExpression);
jexlContext = new MapContext();
}
}
@Override
public void invoke(FileChunk fileChunk, Context context) {
chunksInCounter.inc();
bytesInCounter.inc(fileChunk.getLength());
if (rateLimitThreshold > 0) {
count++;
if (System.currentTimeMillis() - timestamp < 1000 && count > rateLimitThreshold) {
if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk);
} else {
rateLimitDropChunksCounter.inc();
}
} else if (System.currentTimeMillis() - timestamp >= 1000) {
if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk);
} else {
rateLimitDropChunksCounter.inc();
timestamp = System.currentTimeMillis();
synchronized (this) {
long currentTimeMillis = System.currentTimeMillis();
chunksInCounter.inc();
bytesInCounter.inc(fileChunk.getLength());
if (rateLimitThreshold > 0) {
count++;
if (currentTimeMillis - timestamp < 1000 && count > rateLimitThreshold) {
if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk);
} else {
rateLimitDropChunksCounter.inc();
}
} else if (currentTimeMillis - timestamp >= 1000) {
if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk);
} else {
rateLimitDropChunksCounter.inc();
}
timestamp = currentTimeMillis;
count = 0;
} else {
sendFileChunk(fileChunk);
}
} else {
timestamp = currentTimeMillis;
sendFileChunk(fileChunk);
}
} else {
sendFileChunk(fileChunk);
}
}
@@ -208,7 +229,10 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
IoUtil.close(indexTimeTable);
IoUtil.close(indexFilenameTable);
IoUtil.close(syncHBaseConnection);
IoUtil.close(AsyncHBaseConnection);
IoUtil.close(asyncHBaseConnection);
if (executorService != null) {
executorService.shutdown();
}
}
private void sendFileChunk(FileChunk fileChunk) {
@@ -254,72 +278,52 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
metaPut.addColumn(HBaseColumnConstants.BYTE_FAMILY_META, HBaseColumnConstants.BYTE_COLUMN_LAST_MODIFIED, Bytes.toBytes(timestamp));
dataPutList.add(metaPut);
}
chunkCount++;
chunkSize += chunkLength;
chunksOutCounter.inc();
bytesOutCounter.inc(chunkLength);
calculateFileChunkMetrics(fileChunk);
if (chunkSize >= maxBatchSize || chunkCount >= maxBatchCount) {
if (isAsync) {
if (dataPutList.size() > 0) {
List<CompletableFuture<Object>> futures = asyncTable.batch(dataPutList);
CompletableFuture.supplyAsync(() -> {
for (CompletableFuture<Object> completableFuture : futures) {
completableFuture.whenCompleteAsync((result, error) -> {
if (error != null) {
LOG.error("put chunk to hbase error. ", error.getMessage());
errorChunksCounter.inc();
}
});
}
return null;
});
dataPutList.clear();
}
if (indexTimePutList.size() > 0) {
asyncIndexTimeTable.batch(indexTimePutList);
indexTimePutList.clear();
}
if (indexFilenamePutList.size() > 0) {
asyncIndexFilenameTable.batch(indexFilenamePutList);
indexFilenamePutList.clear();
}
} else {
if (dataPutList.size() > 0) {
try {
table.batch(dataPutList, null);
} catch (IOException | InterruptedException e) {
LOG.error("put chunk to hbase data table error. ", e.getMessage());
errorChunksCounter.inc(dataPutList.size());
} finally {
dataPutList.clear();
}
}
if (indexTimePutList.size() > 0) {
try {
indexTimeTable.batch(indexTimePutList, null);
} catch (IOException | InterruptedException e) {
LOG.error("put chunk to hbase index time table error. ", e.getMessage());
} finally {
indexTimePutList.clear();
}
}
if (indexFilenamePutList.size() > 0) {
try {
indexFilenameTable.batch(indexFilenamePutList, null);
} catch (IOException | InterruptedException e) {
LOG.error("put chunk to hbase index filename table error. ", e.getMessage());
} finally {
indexFilenamePutList.clear();
}
}
}
chunkSize = 0;
chunkCount = 0;
if (chunkSize >= maxBatchSize || dataPutList.size() >= maxBatchCount) {
sendBatchData();
}
}
}
private void sendBatchData() {
if (isAsync) {
List<CompletableFuture<Object>> futures = asyncTable.batch(dataPutList);
CompletableFuture.supplyAsync(() -> {
for (CompletableFuture<Object> completableFuture : futures) {
completableFuture.whenCompleteAsync((result, error) -> {
if (error != null) {
LOG.error("Put chunk to hbase error. ", error.getMessage());
errorChunksCounter.inc();
}
});
}
return null;
});
dataPutList.clear();
asyncIndexTimeTable.batch(indexTimePutList);
indexTimePutList.clear();
asyncIndexFilenameTable.batch(indexFilenamePutList);
indexFilenamePutList.clear();
} else {
try {
table.batch(dataPutList, null);
indexTimeTable.batch(indexTimePutList, null);
indexFilenameTable.batch(indexFilenamePutList, null);
} catch (IOException | InterruptedException e) {
LOG.error("Put chunk to hbase error. ", e.getMessage());
errorChunksCounter.inc(dataPutList.size());
} finally {
dataPutList.clear();
indexTimePutList.clear();
indexFilenamePutList.clear();
}
}
chunkSize = 0;
}
private boolean checkFileChunk(FileChunk fileChunk) {
if (StrUtil.isNotEmpty(rateLimitExpression)) {
jexlContext.set(fileChunk.getClass().getSimpleName(), fileChunk);

View File

@@ -28,6 +28,9 @@ import org.apache.http.util.EntityUtils;
import java.io.IOException;
import java.net.ConnectException;
import java.util.*;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import static com.zdjizhi.utils.HttpHeaderConstants.*;
import static com.zdjizhi.utils.PublicConstants.*;
@@ -73,16 +76,16 @@ public class HosSink extends RichSinkFunction<FileChunk> {
private String token;
private volatile String bathPutUrl;
private HashMap<String, String> hosMessage;
private String objectsMeta = "";
private String objectsOffset = "";
private String objectsMeta;
private String objectsOffset;
private List<byte[]> byteList;
private long maxBatchSize;
private long maxBatchCount;
private long chunkSize = 0;
private int chunkCount = 0;
private long chunkSize;
private ScheduledExecutorService executorService;
private long rateLimitThreshold;
private String rateLimitExpression;
private long timestamp;
private volatile long timestamp;
private long count;
private JexlExpression jexlExpression;
private JexlContext jexlContext;
@@ -167,47 +170,67 @@ public class HosSink extends RichSinkFunction<FileChunk> {
} else {
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
}
bathPutUrl = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + PublicUtil.getUUID()) + "?multiFile";
maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE);
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
hosMessage = new HashMap<>();
objectsMeta = "";
objectsOffset = "";
byteList = new ArrayList<>();
rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD);
rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION);
timestamp = System.currentTimeMillis();
count = 0;
JexlEngine jexlEngine = new JexlBuilder().create();
jexlExpression = jexlEngine.createExpression(rateLimitExpression);
jexlContext = new MapContext();
if (configuration.get(Configs.SINK_BATCH)) {
bathPutUrl = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + PublicUtil.getUUID()) + "?multiFile";
maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE);
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
hosMessage = new HashMap<>();
byteList = new ArrayList<>();
objectsMeta = "";
objectsOffset = "";
chunkSize = 0;
executorService = Executors.newScheduledThreadPool(1);
long period = configuration.getInteger(Configs.SINK_BATCH_TIME);
executorService.scheduleWithFixedDelay(() -> {
if (System.currentTimeMillis() - timestamp > (period * 1000)) {
if (!byteList.isEmpty()) {
synchronized (this) {
sendBatchData();
}
}
}
}, period, period, TimeUnit.SECONDS);
}
if (rateLimitThreshold > 0) {
rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD);
rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION);
count = 0;
JexlEngine jexlEngine = new JexlBuilder().create();
jexlExpression = jexlEngine.createExpression(rateLimitExpression);
jexlContext = new MapContext();
}
}
@Override
public void invoke(FileChunk fileChunk, Context context) {
chunksInCounter.inc();
bytesInCounter.inc(fileChunk.getLength());
if (rateLimitThreshold > 0) {
count++;
if (System.currentTimeMillis() - timestamp < 1000 && count > rateLimitThreshold) {
if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk);
} else {
rateLimitDropChunksCounter.inc();
}
} else if (System.currentTimeMillis() - timestamp >= 1000) {
if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk);
} else {
rateLimitDropChunksCounter.inc();
timestamp = System.currentTimeMillis();
synchronized (this) {
long currentTimeMillis = System.currentTimeMillis();
chunksInCounter.inc();
bytesInCounter.inc(fileChunk.getLength());
if (rateLimitThreshold > 0) {
count++;
if (currentTimeMillis - timestamp < 1000 && count > rateLimitThreshold) {
if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk);
} else {
rateLimitDropChunksCounter.inc();
}
} else if (currentTimeMillis - timestamp >= 1000) {
if (checkFileChunk(fileChunk)) {
sendFileChunk(fileChunk);
} else {
rateLimitDropChunksCounter.inc();
}
timestamp = currentTimeMillis;
count = 0;
} else {
sendFileChunk(fileChunk);
}
} else {
timestamp = currentTimeMillis;
sendFileChunk(fileChunk);
}
} else {
sendFileChunk(fileChunk);
}
}
@@ -215,6 +238,9 @@ public class HosSink extends RichSinkFunction<FileChunk> {
public void close() {
IoUtil.close(syncHttpClient);
IoUtil.close(asyncHttpClient);
if (executorService != null) {
executorService.shutdown();
}
}
private void sendFileChunk(FileChunk fileChunk) {
@@ -236,7 +262,7 @@ public class HosSink extends RichSinkFunction<FileChunk> {
}
hosMessage.put(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + "");
Map<String, Object> metaMap = fileChunk.getMeta();
if (metaMap != null && metaMap.size() > 0) {
if (metaMap != null && !metaMap.isEmpty()) {
for (String meta : metaMap.keySet()) {
hosMessage.put(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + "");
}
@@ -245,28 +271,12 @@ public class HosSink extends RichSinkFunction<FileChunk> {
hosMessage.clear();
objectsOffset += chunkLength + ";";
byteList.add(data);
chunkCount++;
chunkSize += chunkLength;
chunksOutCounter.inc();
bytesOutCounter.inc(chunkLength);
calculateFileChunkMetrics(fileChunk);
if (chunkSize >= maxBatchSize || chunkCount >= maxBatchCount) {
HttpPut httpPut = new HttpPut(bathPutUrl);
httpPut.setHeader(TOKEN, token);
httpPut.setHeader(HOS_UPLOAD_TYPE, UPLOAD_TYPE_APPENDV2);
httpPut.setHeader(HOS_COMBINE_MODE, fileChunk.getCombineMode());
httpPut.setHeader(HOS_OBJECTS_META, objectsMeta);
httpPut.setHeader(HOS_OBJECTS_OFFSET, objectsOffset);
byte[][] bytes = new byte[byteList.size()][];
byteList.toArray(bytes);
byte[] newData = ArrayUtil.addAll(bytes);
httpPut.setEntity(new ByteArrayEntity(newData));
byteList.clear();
executeRequest(httpPut);
objectsMeta = "";
objectsOffset = "";
chunkSize = 0;
chunkCount = 0;
if (chunkSize >= maxBatchSize || byteList.size() >= maxBatchCount) {
sendBatchData();
}
} else {
String url = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + fileChunk.getUuid());
@@ -292,7 +302,7 @@ public class HosSink extends RichSinkFunction<FileChunk> {
}
httpPut.setHeader(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + "");
Map<String, Object> metaMap = fileChunk.getMeta();
if (metaMap != null && metaMap.size() > 0) {
if (metaMap != null && !metaMap.isEmpty()) {
for (String meta : metaMap.keySet()) {
httpPut.setHeader(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + "");
}
@@ -309,6 +319,24 @@ public class HosSink extends RichSinkFunction<FileChunk> {
}
}
private void sendBatchData() {
HttpPut httpPut = new HttpPut(bathPutUrl);
httpPut.setHeader(TOKEN, token);
httpPut.setHeader(HOS_UPLOAD_TYPE, UPLOAD_TYPE_APPENDV2);
httpPut.setHeader(HOS_COMBINE_MODE, COMBINE_MODE_SEEK);
httpPut.setHeader(HOS_OBJECTS_META, objectsMeta);
httpPut.setHeader(HOS_OBJECTS_OFFSET, objectsOffset);
byte[][] bytes = new byte[byteList.size()][];
byteList.toArray(bytes);
byte[] newData = ArrayUtil.addAll(bytes);
httpPut.setEntity(new ByteArrayEntity(newData));
executeRequest(httpPut);
objectsMeta = "";
objectsOffset = "";
byteList.clear();
chunkSize = 0;
}
private void executeRequest(HttpPut httpPut) {
if (isAsync) {
asyncHttpClient.execute(httpPut, new FutureCallback<HttpResponse>() {

View File

@@ -73,7 +73,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
private CloseableHttpClient syncHttpClient;
private CloseableHttpAsyncClient asyncHttpClient;
private List<String> endpointList;
private CaffeineCacheUtil caffeineCacheUtil;
private Cache<String, FileChunk> cache;
public OssSinkByCaffeineCache(Configuration configuration) {
@@ -92,8 +91,7 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
} else {
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
}
caffeineCacheUtil = CaffeineCacheUtil.getInstance(configuration);
cache = caffeineCacheUtil.getCaffeineCache();
cache = CaffeineCacheUtil.getInstance(configuration).getCaffeineCache();
metricGroup.gauge("cacheLength", (Gauge<Long>) () -> cache.estimatedSize());
lessThan1KBChunksCounter = metricGroup.counter("lessThan1KBChunksCount");
between1KBAnd5KBChunksCounter = metricGroup.counter("between1KBAnd5KBChunksCount");
@@ -183,8 +181,8 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
}
FileChunk data = cache.getIfPresent(uuid + "_data");
if (data != null) {
sendFile(data, meta);
cache.invalidate(uuid + "_data");
sendFile(data, meta);
} else {
cache.put(fileChunk.getUuid() + "_meta", fileChunk);
}
@@ -193,8 +191,8 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
bytesInCounter.inc(fileChunk.getLength());
FileChunk meta = cache.getIfPresent(uuid + "_meta");
if (meta != null) {
sendFile(fileChunk, meta.getMeta());
cache.invalidate(uuid + "_meta");
sendFile(fileChunk, meta.getMeta());
} else {
cache.put(fileChunk.getUuid() + "_data", fileChunk);
}
@@ -205,7 +203,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
public void close() {
IoUtil.close(syncHttpClient);
IoUtil.close(asyncHttpClient);
caffeineCacheUtil.close();
}
private void sendFile(FileChunk fileChunk, Map<String, Object> metaMap) {
@@ -322,6 +319,15 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
} else if (fileId.contains("_2")) {
responseFilesCounter.inc();
}
if (fileChunk.getChunk() == null) {
nullChunksCounter.inc();
if ("eml".equals(fileType)) {
nullEmlChunksCounter.inc();
} else if ("txt".equals(fileType)) {
nullTxtChunksCounter.inc();
}
LOG.info("send file data is null. " + fileChunk.toString());
}
if (fileChunk.getOffset() == 0 && fileChunk.getLastChunkFlag() == 1) {
completeFilesCounter.inc();
if ("eml".equals(fileType)) {
@@ -329,15 +335,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
} else if ("txt".equals(fileType)) {
completeTxtFilesCounter.inc();
}
if (fileChunk.getChunk() == null) {
nullChunksCounter.inc();
if ("eml".equals(fileType)) {
nullEmlChunksCounter.inc();
} else if ("txt".equals(fileType)) {
nullTxtChunksCounter.inc();
}
LOG.info("send file data is null. " + fileChunk.toString());
}
if (fileId.contains("_1")) {
completeRequestFilesCounter.inc();
} else if (fileId.contains("_2")) {

View File

@@ -1,396 +0,0 @@
package com.zdjizhi.sink;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.core.util.URLUtil;
import cn.hutool.log.Log;
import cn.hutool.log.LogFactory;
import com.zdjizhi.config.Configs;
import com.zdjizhi.pojo.FileChunk;
import com.zdjizhi.utils.EhcacheUtil;
import com.zdjizhi.utils.FormatUtils;
import com.zdjizhi.utils.HttpClientUtil;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.metrics.Counter;
import org.apache.flink.metrics.MeterView;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.concurrent.FutureCallback;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
import org.apache.http.util.EntityUtils;
import org.ehcache.Cache;
import org.ehcache.CacheManager;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
public class OssSinkByEhcache extends RichSinkFunction<FileChunk> {
private static final Log LOG = LogFactory.get();
private final Configuration configuration;
public transient Counter chunksInCounter;
public transient Counter chunksOutCounter;
public transient Counter bytesInCounter;
public transient Counter bytesOutCounter;
public transient Counter errorChunksCounter;
public transient Counter fileMetasCounter;
public transient Counter requestFileMetasCounter;
public transient Counter responseFileMetasCounter;
public transient Counter requestFilesCounter;
public transient Counter responseFilesCounter;
public transient Counter emlChunksCounter;
public transient Counter txtChunksCounter;
public transient Counter completeFilesCounter;
public transient Counter completeEmlFilesCounter;
public transient Counter completeTxtFilesCounter;
public transient Counter completeRequestFilesCounter;
public transient Counter completeResponseFilesCounter;
public transient Counter nullChunksCounter;
public transient Counter nullTxtChunksCounter;
public transient Counter nullEmlChunksCounter;
public transient Counter lessThan1KBChunksCounter;
public transient Counter between1KBAnd5KBChunksCounter;
public transient Counter between5KBAnd10KBChunksCounter;
public transient Counter between10KBAnd100KBChunksCounter;
public transient Counter between100KBAnd1MBChunksCounter;
public transient Counter greaterThan1MBChunksCounter;
public transient Counter lessThan10KBEmlChunksCounter;
public transient Counter between1MBAnd10MBEmlChunksCounter;
public transient Counter between10KBAnd100KBEmlChunksCounter;
public transient Counter between100KBAnd1MBEmlChunksCounter;
public transient Counter greaterThan10MBEmlChunksCounter;
public transient Counter lessThan10KBTxtChunksCounter;
public transient Counter between1MBAnd10MBTxtChunksCounter;
public transient Counter between10KBAnd100KBTxtChunksCounter;
public transient Counter between100KBAnd1MBTxtChunksCounter;
public transient Counter greaterThan10MBTxtChunksCounter;
private boolean isAsync;
private CloseableHttpClient syncHttpClient;
private CloseableHttpAsyncClient asyncHttpClient;
private List<String> endpointList;
private EhcacheUtil ehcacheUtil;
private Cache<String, FileChunk> dataCache;
private Cache<String, FileChunk> metaCache;
public OssSinkByEhcache(Configuration configuration) {
this.configuration = configuration;
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup("file_chunk_combiner", "sink_oss");
endpointList = Arrays.asList(configuration.get(Configs.SINK_OSS_ENDPOINT).split(","));
isAsync = configuration.getBoolean(Configs.SINK_OSS_ASYNC);
if (isAsync) {
asyncHttpClient = HttpClientUtil.getInstance(configuration).getAsyncHttpClient();
asyncHttpClient.start();
} else {
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
}
ehcacheUtil = EhcacheUtil.getInstance();
CacheManager ehcacheManager = EhcacheUtil.getInstance().getEhcacheManager();
dataCache = ehcacheManager.getCache("data", String.class, FileChunk.class);
metaCache = ehcacheManager.getCache("meta", String.class, FileChunk.class);
lessThan1KBChunksCounter = metricGroup.counter("lessThan1KBChunksCount");
between1KBAnd5KBChunksCounter = metricGroup.counter("between1KBAnd5KBChunksCount");
between5KBAnd10KBChunksCounter = metricGroup.counter("between5KBAnd10KBChunksCount");
between10KBAnd100KBChunksCounter = metricGroup.counter("between10KBAnd100KBChunksCount");
between100KBAnd1MBChunksCounter = metricGroup.counter("between100KBAnd1MBChunksCount");
greaterThan1MBChunksCounter = metricGroup.counter("greaterThan1MBChunksCount");
metricGroup.meter("numLessThan1KBFilesOutPerSecond", new MeterView(lessThan1KBChunksCounter));
metricGroup.meter("numBetween1KBAnd5KBFilesOutPerSecond", new MeterView(between1KBAnd5KBChunksCounter));
metricGroup.meter("numBetween5KBAnd10KBFilesOutPerSecond", new MeterView(between5KBAnd10KBChunksCounter));
metricGroup.meter("numBetween10KBAnd100KBFilesOutPerSecond", new MeterView(between10KBAnd100KBChunksCounter));
metricGroup.meter("numBetween100KBAnd1MBFilesOutPerSecond", new MeterView(between100KBAnd1MBChunksCounter));
metricGroup.meter("numGreaterThan1MBFilesOutPerSecond", new MeterView(greaterThan1MBChunksCounter));
lessThan10KBEmlChunksCounter = metricGroup.counter("lessThan10KBEmlChunksCount");
between10KBAnd100KBEmlChunksCounter = metricGroup.counter("between10KBAnd100KBEmlChunksCount");
between100KBAnd1MBEmlChunksCounter = metricGroup.counter("between100KBAnd1MBEmlChunksCount");
between1MBAnd10MBEmlChunksCounter = metricGroup.counter("between1MBAnd10MBEmlChunksCount");
greaterThan10MBEmlChunksCounter = metricGroup.counter("greaterThan10MBEmlChunksCount");
metricGroup.meter("numLessThan10KBEmlFilesOutPerSecond", new MeterView(lessThan10KBEmlChunksCounter));
metricGroup.meter("numBetween10KBAnd100KBEmlFilesOutPerSecond", new MeterView(between10KBAnd100KBEmlChunksCounter));
metricGroup.meter("numBetween100KBAnd1MBEmlFilesOutPerSecond", new MeterView(between100KBAnd1MBEmlChunksCounter));
metricGroup.meter("numBetween1MBAnd10MBEmlFilesOutPerSecond", new MeterView(between1MBAnd10MBEmlChunksCounter));
metricGroup.meter("numGreaterThan10MBEmlFilesOutPerSecond", new MeterView(greaterThan10MBEmlChunksCounter));
lessThan10KBTxtChunksCounter = metricGroup.counter("lessThan10KBTxtChunksCount");
between10KBAnd100KBTxtChunksCounter = metricGroup.counter("between10KBAnd100KBTxtChunksCount");
between100KBAnd1MBTxtChunksCounter = metricGroup.counter("between100KBAnd1MBTxtChunksCount");
between1MBAnd10MBTxtChunksCounter = metricGroup.counter("between1MBAnd10MBTxtChunksCount");
greaterThan10MBTxtChunksCounter = metricGroup.counter("greaterThan10MBTxtChunksCount");
metricGroup.meter("numLessThan10KBTxtChunksOutPerSecond", new MeterView(lessThan10KBTxtChunksCounter));
metricGroup.meter("numBetween10KBAnd100KBTxtChunksOutPerSecond", new MeterView(between10KBAnd100KBTxtChunksCounter));
metricGroup.meter("numBetween100KBAnd1MBTxtChunksOutPerSecond", new MeterView(between100KBAnd1MBTxtChunksCounter));
metricGroup.meter("numBetween1MBAnd10MBTxtChunksOutPerSecond", new MeterView(between1MBAnd10MBTxtChunksCounter));
metricGroup.meter("numGreaterThan10MBTxtChunksOutPerSecond", new MeterView(greaterThan10MBTxtChunksCounter));
emlChunksCounter = metricGroup.counter("emlChunksCount");
txtChunksCounter = metricGroup.counter("txtChunksCount");
metricGroup.meter("numEmlChunksOutPerSecond", new MeterView(emlChunksCounter));
metricGroup.meter("numTxtChunksOutPerSecond", new MeterView(txtChunksCounter));
fileMetasCounter = metricGroup.counter("fileMetasCount");
metricGroup.meter("numFileMetasInPerSecond", new MeterView(fileMetasCounter));
requestFileMetasCounter = metricGroup.counter("requestFileMetasCount");
responseFileMetasCounter = metricGroup.counter("responseFileMetasCount");
requestFilesCounter = metricGroup.counter("requestFilesCount");
responseFilesCounter = metricGroup.counter("responseFilesCount");
metricGroup.meter("numRequestFileMetasInPerSecond", new MeterView(requestFileMetasCounter));
metricGroup.meter("numResponseFileMetasInPerSecond", new MeterView(responseFileMetasCounter));
metricGroup.meter("numRequestFilesOutPerSecond", new MeterView(requestFilesCounter));
metricGroup.meter("numResponseFilesOutPerSecond", new MeterView(responseFilesCounter));
errorChunksCounter = metricGroup.counter("errorChunksCount");
chunksInCounter = metricGroup.counter("chunksInCount");
chunksOutCounter = metricGroup.counter("chunksOutCount");
bytesInCounter = metricGroup.counter("bytesInCount");
bytesOutCounter = metricGroup.counter("bytesOutCount");
metricGroup.meter("numChunksInPerSecond", new MeterView(chunksInCounter));
metricGroup.meter("numChunksOutPerSecond", new MeterView(chunksOutCounter));
metricGroup.meter("numBytesInPerSecond", new MeterView(bytesInCounter));
metricGroup.meter("numBytesOutPerSecond", new MeterView(bytesOutCounter));
metricGroup.meter("numErrorChunksPerSecond", new MeterView(errorChunksCounter));
completeFilesCounter = metricGroup.counter("completeFilesCount");
completeEmlFilesCounter = metricGroup.counter("completeEmlFilesCount");
completeTxtFilesCounter = metricGroup.counter("completeTxtFilesCount");
completeRequestFilesCounter = metricGroup.counter("completeRequestFilesCount");
completeResponseFilesCounter = metricGroup.counter("completeResponseFilesCount");
metricGroup.meter("numCompleteFilesOutPerSecond", new MeterView(completeFilesCounter));
metricGroup.meter("numCompleteEmlFilesOutPerSecond", new MeterView(completeEmlFilesCounter));
metricGroup.meter("numCompleteTxtFilesOutPerSecond", new MeterView(completeTxtFilesCounter));
metricGroup.meter("numCompleteRequestFilesOutPerSecond", new MeterView(completeRequestFilesCounter));
metricGroup.meter("numCompleteResponseFilesOutPerSecond", new MeterView(completeResponseFilesCounter));
nullChunksCounter = metricGroup.counter("nullChunksCount");
nullEmlChunksCounter = metricGroup.counter("nullTxtChunksCount");
nullTxtChunksCounter = metricGroup.counter("nullEmlChunksCount");
metricGroup.meter("numNullFilesOutPerSecond", new MeterView(nullChunksCounter));
metricGroup.meter("numNullEmlFilesOutPerSecond", new MeterView(nullEmlChunksCounter));
metricGroup.meter("numNullTxtFilesOutPerSecond", new MeterView(nullTxtChunksCounter));
}
@Override
public void invoke(FileChunk fileChunk, Context context) {
String uuid = fileChunk.getUuid();
if (fileChunk.getMeta() != null) { //日志
fileMetasCounter.inc();
Map<String, Object> meta = fileChunk.getMeta();
String fileId = meta.get("fileId").toString();
if (fileId.contains("_1")) {
requestFileMetasCounter.inc();
} else if (fileId.contains("_2")) {
responseFileMetasCounter.inc();
}
FileChunk data = dataCache.get(uuid);
if (data != null) {
sendFile(data, meta);
dataCache.remove(uuid);
} else {
metaCache.put(fileChunk.getUuid(), fileChunk);
}
} else { //文件
chunksInCounter.inc();
bytesInCounter.inc(fileChunk.getLength());
FileChunk meta = metaCache.get(uuid);
if (meta != null) {
sendFile(fileChunk, meta.getMeta());
metaCache.remove(uuid);
} else {
dataCache.put(fileChunk.getUuid(), fileChunk);
}
}
}
@Override
public void close() {
IoUtil.close(syncHttpClient);
IoUtil.close(asyncHttpClient);
ehcacheUtil.close();
}
private void sendFile(FileChunk fileChunk, Map<String, Object> metaMap) {
String url = "";
try {
byte[] data;
String fileType = fileChunk.getFileType();
if (fileChunk.getChunk() != null) {
data = fileChunk.getChunk();
} else {
data = "".getBytes();
}
String fileId = metaMap != null && metaMap.containsKey("fileId") ? metaMap.get("fileId").toString() : "";
String policyId = metaMap != null && metaMap.containsKey("policyId") ? metaMap.get("policyId").toString() : "0";
String serverIP = metaMap != null && metaMap.containsKey("serverIP") ? metaMap.get("serverIP").toString() : "";
String serverPort = metaMap != null && metaMap.containsKey("serverPort") ? metaMap.get("serverPort").toString() : "";
String clientIP = metaMap != null && metaMap.containsKey("clientIP") ? metaMap.get("clientIP").toString() : "";
String clientPort = metaMap != null && metaMap.containsKey("clientPort") ? metaMap.get("clientPort").toString() : "";
String domain = metaMap != null && metaMap.containsKey("httpHost") ? FormatUtils.getTopPrivateDomain(metaMap.get("httpHost").toString()) : "";
String subscriberId = metaMap != null && metaMap.containsKey("subscriberId") ? metaMap.get("subscriberId").toString() : "";
String foundTime = metaMap != null && metaMap.containsKey("foundTime") ? metaMap.get("foundTime").toString() : "0";
url = URLUtil.normalize(endpointList.get(RandomUtil.randomInt(endpointList.size())) + "/v3/upload?" +
"cfg_id=" + policyId +
"&file_id=" + fileId +
"&file_type=" + fileType +
"&found_time=" + foundTime +
"&s_ip=" + serverIP +
"&s_port=" + serverPort +
"&d_ip=" + clientIP +
"&d_port=" + clientPort +
"&domain=" + domain +
"&account=" + subscriberId);
HttpPost httpPost = new HttpPost(url);
httpPost.setEntity(new ByteArrayEntity(data));
executeRequest(httpPost, url);
chunksOutCounter.inc();
bytesOutCounter.inc(data.length);
calculateFileChunkMetrics(fileChunk, fileId);
} catch (Exception e) {
LOG.error("post file error. current url: " + url, e);
errorChunksCounter.inc();
}
}
private void executeRequest(HttpPost httpPost, String url) {
if (isAsync) {
asyncHttpClient.execute(httpPost, new FutureCallback<HttpResponse>() {
@Override
public void completed(HttpResponse httpResponse) {
try {
String responseEntity = EntityUtils.toString(httpResponse.getEntity(), "UTF-8");
if (httpResponse.getStatusLine().getStatusCode() == 200) {
if (!responseEntity.contains("\"code\":200")) {
LOG.error("post file error. current url: {}, msg: {}", url, responseEntity);
errorChunksCounter.inc();
}
} else {
LOG.error("post file error. current url: {}, code: {}, msg: {}", url, httpResponse.getStatusLine().getStatusCode(), responseEntity);
errorChunksCounter.inc();
}
} catch (IOException e) {
LOG.error("post file error. current url: " + url, e);
errorChunksCounter.inc();
}
}
@Override
public void failed(Exception ex) {
LOG.error("post file error. current url: " + url, ex);
errorChunksCounter.inc();
}
@Override
public void cancelled() {
}
});
} else {
CloseableHttpResponse response = null;
try {
response = syncHttpClient.execute(httpPost);
String responseEntity = EntityUtils.toString(response.getEntity(), "UTF-8");
if (response.getStatusLine().getStatusCode() == 200) {
if (!responseEntity.contains("\"code\":200")) {
LOG.error("post file error. current url: {}, msg: {}", url, responseEntity);
errorChunksCounter.inc();
}
} else {
LOG.error("post file error. current url: {}, code: {}, msg: {}", url, response.getStatusLine().getStatusCode(), responseEntity);
errorChunksCounter.inc();
}
} catch (IOException e) {
LOG.error("post file error. current url: " + url, e);
errorChunksCounter.inc();
} finally {
IoUtil.close(response);
}
}
}
private void calculateFileChunkMetrics(FileChunk fileChunk, String fileId) {
String fileType = fileChunk.getFileType();
long length = fileChunk.getLength();
calculateChunkSize(length);
if ("eml".equals(fileType)) {
emlChunksCounter.inc();
calculateEmlChunkSize(length);
} else if ("txt".equals(fileType)) {
txtChunksCounter.inc();
calculateTxtChunkSize(length);
}
if (fileId.contains("_1")) {
requestFilesCounter.inc();
} else if (fileId.contains("_2")) {
responseFilesCounter.inc();
}
if (fileChunk.getOffset() == 0 && fileChunk.getLastChunkFlag() == 1) {
completeFilesCounter.inc();
if ("eml".equals(fileType)) {
completeEmlFilesCounter.inc();
} else if ("txt".equals(fileType)) {
completeTxtFilesCounter.inc();
}
if (fileChunk.getChunk() == null) {
nullChunksCounter.inc();
if ("eml".equals(fileType)) {
nullEmlChunksCounter.inc();
} else if ("txt".equals(fileType)) {
nullTxtChunksCounter.inc();
}
LOG.info("send file data is null. " + fileChunk.toString());
}
if (fileId.contains("_1")) {
completeRequestFilesCounter.inc();
} else if (fileId.contains("_2")) {
completeResponseFilesCounter.inc();
}
}
}
private void calculateChunkSize(long length) {
if (length <= 1024) {
lessThan1KBChunksCounter.inc();
} else if (length <= 5 * 1024) {
between1KBAnd5KBChunksCounter.inc();
} else if (length <= 10 * 1024) {
between5KBAnd10KBChunksCounter.inc();
} else if (length <= 100 * 1024) {
between10KBAnd100KBChunksCounter.inc();
} else if (length <= 1024 * 1024) {
between100KBAnd1MBChunksCounter.inc();
} else {
greaterThan1MBChunksCounter.inc();
}
}
private void calculateEmlChunkSize(long length) {
if (length <= 10 * 1024) {
lessThan10KBEmlChunksCounter.inc();
} else if (length <= 100 * 1024) {
between10KBAnd100KBEmlChunksCounter.inc();
} else if (length <= 1024 * 1024) {
between100KBAnd1MBEmlChunksCounter.inc();
} else if (length <= 10 * 1024 * 1024) {
between1MBAnd10MBEmlChunksCounter.inc();
} else {
greaterThan10MBEmlChunksCounter.inc();
}
}
private void calculateTxtChunkSize(long length) {
if (length <= 10 * 1024) {
lessThan10KBTxtChunksCounter.inc();
} else if (length <= 100 * 1024) {
between10KBAnd100KBTxtChunksCounter.inc();
} else if (length <= 1024 * 1024) {
between100KBAnd1MBTxtChunksCounter.inc();
} else if (length <= 10 * 1024 * 1024) {
between1MBAnd10MBTxtChunksCounter.inc();
} else {
greaterThan10MBTxtChunksCounter.inc();
}
}
}

View File

@@ -0,0 +1,66 @@
package com.zdjizhi.trigger;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.state.ReducingState;
import org.apache.flink.api.common.state.ReducingStateDescriptor;
import org.apache.flink.api.common.typeutils.base.LongSerializer;
import org.apache.flink.streaming.api.windowing.triggers.Trigger;
import org.apache.flink.streaming.api.windowing.triggers.TriggerResult;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
public class IdleTimeTrigger<W extends TimeWindow> extends Trigger<Object, TimeWindow> {
private static final long serialVersionUID = 1L;
private final long maxIdleTime;
private IdleTimeTrigger(long maxIdleTime) {
this.maxIdleTime = maxIdleTime;
}
public static <W extends TimeWindow> IdleTimeTrigger<TimeWindow> of(long maxIdleTime) {
return new IdleTimeTrigger<>(maxIdleTime);
}
private final ReducingStateDescriptor<Long> processingTimeStateDesc =
new ReducingStateDescriptor<>("processTimer", new ReduceMax(), LongSerializer.INSTANCE);
@Override
public TriggerResult onElement(Object element, long timestamp, TimeWindow window, TriggerContext ctx) throws Exception {
ReducingState<Long> fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc);
fireTimestamp.clear();
long nextFireTimestamp = ctx.getCurrentProcessingTime() + maxIdleTime;
ctx.registerProcessingTimeTimer(nextFireTimestamp);
fireTimestamp.add(nextFireTimestamp);
return TriggerResult.CONTINUE;
}
@Override
public TriggerResult onProcessingTime(long time, TimeWindow window, TriggerContext ctx) throws Exception {
ReducingState<Long> fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc);
if (fireTimestamp.get() != null && fireTimestamp.get() == time) {
fireTimestamp.clear();
return TriggerResult.FIRE;
}
return TriggerResult.CONTINUE;
}
@Override
public TriggerResult onEventTime(long time, TimeWindow window, TriggerContext ctx) {
return TriggerResult.CONTINUE;
}
@Override
public void clear(TimeWindow window, TriggerContext ctx) {
ReducingState<Long> fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc);
fireTimestamp.clear();
}
private static class ReduceMax implements ReduceFunction<Long> {
private static final long serialVersionUID = 1L;
@Override
public Long reduce(Long value1, Long value2) {
return Math.max(value1, value2);
}
}
}