[TSG-20820]修复file-chunk-combiner的sink使用批量时导致文件无法下载的问题。
This commit is contained in:
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.zdjizhi</groupId>
|
<groupId>com.zdjizhi</groupId>
|
||||||
<artifactId>file-chunk-combiner</artifactId>
|
<artifactId>file-chunk-combiner</artifactId>
|
||||||
<version>1.3.0</version>
|
<version>1.3.1</version>
|
||||||
|
|
||||||
<repositories>
|
<repositories>
|
||||||
<repository>
|
<repository>
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ import com.zdjizhi.trigger.LastChunkTrigger;
|
|||||||
import com.zdjizhi.trigger.MultipleTrigger;
|
import com.zdjizhi.trigger.MultipleTrigger;
|
||||||
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
||||||
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
|
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
|
||||||
import org.apache.flink.api.java.functions.KeySelector;
|
|
||||||
import org.apache.flink.api.java.utils.ParameterTool;
|
import org.apache.flink.api.java.utils.ParameterTool;
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
import org.apache.flink.streaming.api.datastream.*;
|
import org.apache.flink.streaming.api.datastream.*;
|
||||||
@@ -174,35 +173,6 @@ public class FileChunkCombiner {
|
|||||||
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta"))
|
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta"))
|
||||||
.name("Filter: Map")
|
.name("Filter: Map")
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
|
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
|
||||||
windowStream
|
|
||||||
.filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss"))
|
|
||||||
.name("Filter: Oss")
|
|
||||||
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
|
|
||||||
.union(fileMetaSessionSingleOutputStreamOperator, fileMetaProxySingleOutputStreamOperator)
|
|
||||||
.keyBy(new FileChunkKeySelector())
|
|
||||||
.addSink(new OssSinkByEhcache(configuration))
|
|
||||||
.name("Oss")
|
|
||||||
.setParallelism(configuration.get(Configs.SINK_PARALLELISM));
|
|
||||||
break;
|
|
||||||
case "oss-caffeine":
|
|
||||||
fileMetaSessionSingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC)))
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
|
|
||||||
.name(configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC))
|
|
||||||
.flatMap(new ParseSessionFileMetaFlatMapFunction())
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
|
|
||||||
.name("Map: Parse Session File Meta")
|
|
||||||
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_session_file_meta"))
|
|
||||||
.name("Filter: Map")
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
|
|
||||||
fileMetaProxySingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC)))
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
|
|
||||||
.name(configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC))
|
|
||||||
.flatMap(new ParseProxyFileMetaFlatMapFunction())
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
|
|
||||||
.name("Map: Parse Proxy File Meta")
|
|
||||||
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta"))
|
|
||||||
.name("Filter: Map")
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
|
|
||||||
windowStream
|
windowStream
|
||||||
.filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss"))
|
.filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss"))
|
||||||
.name("Filter: Oss")
|
.name("Filter: Oss")
|
||||||
@@ -213,38 +183,6 @@ public class FileChunkCombiner {
|
|||||||
.name("Oss")
|
.name("Oss")
|
||||||
.setParallelism(configuration.get(Configs.SINK_PARALLELISM));
|
.setParallelism(configuration.get(Configs.SINK_PARALLELISM));
|
||||||
break;
|
break;
|
||||||
case "test":
|
|
||||||
fileMetaSessionSingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC)))
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
|
|
||||||
.name(configuration.get(Configs.KAFKA_FILE_META_SESSION_TOPIC))
|
|
||||||
.flatMap(new ParseSessionFileMetaFlatMapFunction())
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
|
|
||||||
.name("Map: Parse Session File Meta")
|
|
||||||
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_session_file_meta"))
|
|
||||||
.name("Filter: Map")
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
|
|
||||||
fileMetaProxySingleOutputStreamOperator = environment.addSource(FileMetaKafkaConsumer.stringConsumer(configuration, configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC)))
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
|
|
||||||
.name(configuration.get(Configs.KAFKA_FILE_META_PROXY_TOPIC))
|
|
||||||
.flatMap(new ParseProxyFileMetaFlatMapFunction())
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM))
|
|
||||||
.name("Map: Parse Proxy File Meta")
|
|
||||||
.filter(new FileChunkFilterFunction(configuration.getString(Configs.FILE_META_FILTER_EXPRESSION), "map_parse_proxy_file_meta"))
|
|
||||||
.name("Filter: Map")
|
|
||||||
.setParallelism(configuration.get(Configs.MAP_PARSE_FILE_META_PARALLELISM));
|
|
||||||
KeyedStream<FileChunk, String> fileMetaStringKeyedStream = fileMetaSessionSingleOutputStreamOperator
|
|
||||||
.union(fileMetaProxySingleOutputStreamOperator)
|
|
||||||
.keyBy((KeySelector<FileChunk, String>) FileChunk::getUuid);
|
|
||||||
windowStream
|
|
||||||
.filter(new FileChunkFilterFunction(configuration.getString(Configs.SINK_OSS_FILTER_EXPRESSION), "sink_oss"))
|
|
||||||
.name("Filter: Oss")
|
|
||||||
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
|
|
||||||
.keyBy((KeySelector<FileChunk, String>) FileChunk::getUuid)
|
|
||||||
.connect(fileMetaStringKeyedStream)
|
|
||||||
.process(new TestKeyedCoProcessFunction(configuration))
|
|
||||||
.setParallelism(configuration.get(Configs.COMBINER_WINDOW_PARALLELISM))
|
|
||||||
.name("Oss");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
environment.execute(configuration.get(Configs.FLINK_JOB_NAME));
|
environment.execute(configuration.get(Configs.FLINK_JOB_NAME));
|
||||||
|
|||||||
@@ -84,6 +84,9 @@ public class Configs {
|
|||||||
public static final ConfigOption<Long> SINK_BATCH_SIZE = ConfigOptions.key("sink.batch.size")
|
public static final ConfigOption<Long> SINK_BATCH_SIZE = ConfigOptions.key("sink.batch.size")
|
||||||
.longType()
|
.longType()
|
||||||
.defaultValue(Long.MAX_VALUE);
|
.defaultValue(Long.MAX_VALUE);
|
||||||
|
public static final ConfigOption<Integer> SINK_BATCH_TIME = ConfigOptions.key("sink.batch.time")
|
||||||
|
.intType()
|
||||||
|
.defaultValue(5);
|
||||||
public static final ConfigOption<String> SINK_FILTER_EXPRESSION = ConfigOptions.key("sink.filter.expression")
|
public static final ConfigOption<String> SINK_FILTER_EXPRESSION = ConfigOptions.key("sink.filter.expression")
|
||||||
.stringType()
|
.stringType()
|
||||||
.defaultValue("");
|
.defaultValue("");
|
||||||
|
|||||||
@@ -120,8 +120,8 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction<Fil
|
|||||||
waitingToCombineChunkList.add(currentFileChunk.getChunk());
|
waitingToCombineChunkList.add(currentFileChunk.getChunk());
|
||||||
}
|
}
|
||||||
} else {// 期望offset小于当前offset,说明缺块
|
} else {// 期望offset小于当前offset,说明缺块
|
||||||
if (waitingToCombineChunkList.size() > 0) {//将可合并的chunk合并,清空集合
|
if (!waitingToCombineChunkList.isEmpty()) {//将可合并的chunk合并,清空集合
|
||||||
FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), originalFileChunkList.get(0).getTimestamp(), null);
|
FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), currentFileChunk.getTimestamp(), null);
|
||||||
if (fileChunk != null) {
|
if (fileChunk != null) {
|
||||||
combinedFileChunkList.add(fileChunk);
|
combinedFileChunkList.add(fileChunk);
|
||||||
}
|
}
|
||||||
@@ -139,8 +139,8 @@ public class CombineChunkProcessWindowFunction extends ProcessWindowFunction<Fil
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (waitingToCombineChunkList.size() > 0) {
|
if (!waitingToCombineChunkList.isEmpty()) {
|
||||||
FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), originalFileChunkList.get(0).getTimestamp(), null);
|
FileChunk fileChunk = combineChunk(waitingToCombineChunkList, currentFileChunk.getUuid(), currentFileChunk.getFileName(), currentFileChunk.getFileType(), startOffset, currentFileChunk.getCombineMode(), lastChunkFlag, originalFileChunkList.get(0).getMeta(), currentFileChunk.getTimestamp(), null);
|
||||||
if (fileChunk != null) {
|
if (fileChunk != null) {
|
||||||
combinedFileChunkList.add(fileChunk);
|
combinedFileChunkList.add(fileChunk);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,101 +0,0 @@
|
|||||||
package com.zdjizhi.function;
|
|
||||||
|
|
||||||
import cn.hutool.core.io.IoUtil;
|
|
||||||
import com.zdjizhi.config.Configs;
|
|
||||||
import com.zdjizhi.pojo.FileChunk;
|
|
||||||
import com.zdjizhi.utils.HBaseConnectionUtil;
|
|
||||||
import org.apache.flink.configuration.Configuration;
|
|
||||||
import org.apache.flink.metrics.Counter;
|
|
||||||
import org.apache.flink.metrics.MeterView;
|
|
||||||
import org.apache.flink.metrics.MetricGroup;
|
|
||||||
import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction;
|
|
||||||
import org.apache.flink.util.Collector;
|
|
||||||
import org.apache.hadoop.hbase.TableName;
|
|
||||||
import org.apache.hadoop.hbase.client.*;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
public class TestKeyedCoProcessFunction extends KeyedCoProcessFunction<String, FileChunk, FileChunk, FileChunk> {
|
|
||||||
|
|
||||||
private final Configuration configuration;
|
|
||||||
public transient Counter chunksInCounter;
|
|
||||||
public transient Counter fileMetasInCounter;
|
|
||||||
|
|
||||||
private boolean isAsync;
|
|
||||||
private Connection syncHBaseConnection;
|
|
||||||
private AsyncConnection AsyncHBaseConnection;
|
|
||||||
private Table table;
|
|
||||||
private AsyncTable<AdvancedScanResultConsumer> asyncTable;
|
|
||||||
private List<Put> dataPutList;
|
|
||||||
private List<Put> metaPutList;
|
|
||||||
private long maxBatchCount;
|
|
||||||
|
|
||||||
public TestKeyedCoProcessFunction(Configuration configuration) {
|
|
||||||
this.configuration = configuration;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void open(Configuration parameters) throws Exception {
|
|
||||||
super.open(parameters);
|
|
||||||
MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup("file_chunk_combiner", "add_file_meta");
|
|
||||||
chunksInCounter = metricGroup.counter("chunksInCount");
|
|
||||||
fileMetasInCounter = metricGroup.counter("fileMetasInCount");
|
|
||||||
metricGroup.meter("numChunksInPerSecond", new MeterView(chunksInCounter));
|
|
||||||
metricGroup.meter("numFileMetasInPerSecond", new MeterView(fileMetasInCounter));
|
|
||||||
isAsync = configuration.getBoolean(Configs.SINK_ASYNC);
|
|
||||||
if (isAsync) {
|
|
||||||
AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection();
|
|
||||||
asyncTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
|
||||||
} else {
|
|
||||||
syncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getSyncHBaseConnection();
|
|
||||||
table = syncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
|
||||||
}
|
|
||||||
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
|
|
||||||
dataPutList = new ArrayList<>();
|
|
||||||
metaPutList = new ArrayList<>();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void processElement1(FileChunk value, Context ctx, Collector<FileChunk> out) throws IOException, InterruptedException {
|
|
||||||
chunksInCounter.inc();
|
|
||||||
Put dataPut = new Put(value.getUuid().getBytes());
|
|
||||||
dataPut.addColumn("meta".getBytes(), "data".getBytes(), (value.toString()).getBytes());
|
|
||||||
dataPutList.add(dataPut);
|
|
||||||
if (dataPutList.size() >= maxBatchCount) {
|
|
||||||
if (isAsync) {
|
|
||||||
asyncTable.batch(dataPutList);
|
|
||||||
dataPutList.clear();
|
|
||||||
} else {
|
|
||||||
table.batch(dataPutList, null);
|
|
||||||
dataPutList.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void processElement2(FileChunk value, Context ctx, Collector<FileChunk> out) throws IOException, InterruptedException {
|
|
||||||
fileMetasInCounter.inc();
|
|
||||||
Put metaPut = new Put(value.getUuid().getBytes());
|
|
||||||
metaPut.addColumn("meta".getBytes(), "meta".getBytes(), (value.getMeta().toString()).getBytes());
|
|
||||||
metaPutList.add(metaPut);
|
|
||||||
if (metaPutList.size() >= maxBatchCount) {
|
|
||||||
if (isAsync) {
|
|
||||||
asyncTable.batch(metaPutList);
|
|
||||||
metaPutList.clear();
|
|
||||||
} else {
|
|
||||||
table.batch(metaPutList, null);
|
|
||||||
metaPutList.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
IoUtil.close(table);
|
|
||||||
IoUtil.close(syncHBaseConnection);
|
|
||||||
IoUtil.close(AsyncHBaseConnection);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -23,6 +23,9 @@ import org.apache.hadoop.hbase.util.Bytes;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.CompletableFuture;
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import static com.zdjizhi.utils.PublicConstants.*;
|
import static com.zdjizhi.utils.PublicConstants.*;
|
||||||
import static com.zdjizhi.utils.HBaseColumnConstants.*;
|
import static com.zdjizhi.utils.HBaseColumnConstants.*;
|
||||||
@@ -61,7 +64,7 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
|
|||||||
public transient Counter mediaChunksCounter;
|
public transient Counter mediaChunksCounter;
|
||||||
private boolean isAsync;
|
private boolean isAsync;
|
||||||
private Connection syncHBaseConnection;
|
private Connection syncHBaseConnection;
|
||||||
private AsyncConnection AsyncHBaseConnection;
|
private AsyncConnection asyncHBaseConnection;
|
||||||
private Table table;
|
private Table table;
|
||||||
private Table indexTimeTable;
|
private Table indexTimeTable;
|
||||||
private Table indexFilenameTable;
|
private Table indexFilenameTable;
|
||||||
@@ -72,12 +75,12 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
|
|||||||
private List<Put> indexTimePutList;
|
private List<Put> indexTimePutList;
|
||||||
private List<Put> indexFilenamePutList;
|
private List<Put> indexFilenamePutList;
|
||||||
private long chunkSize;
|
private long chunkSize;
|
||||||
private int chunkCount;
|
|
||||||
private long maxBatchSize;
|
private long maxBatchSize;
|
||||||
private long maxBatchCount;
|
private long maxBatchCount;
|
||||||
|
private ScheduledExecutorService executorService;
|
||||||
private long rateLimitThreshold;
|
private long rateLimitThreshold;
|
||||||
private String rateLimitExpression;
|
private String rateLimitExpression;
|
||||||
private long timestamp;
|
private volatile long timestamp;
|
||||||
private long count;
|
private long count;
|
||||||
private JexlExpression jexlExpression;
|
private JexlExpression jexlExpression;
|
||||||
private JexlContext jexlContext;
|
private JexlContext jexlContext;
|
||||||
@@ -148,57 +151,75 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
|
|||||||
metricGroup.meter("numMediaChunksOutPerSecond", new MeterView(mediaChunksCounter));
|
metricGroup.meter("numMediaChunksOutPerSecond", new MeterView(mediaChunksCounter));
|
||||||
isAsync = configuration.getBoolean(Configs.SINK_ASYNC);
|
isAsync = configuration.getBoolean(Configs.SINK_ASYNC);
|
||||||
if (isAsync) {
|
if (isAsync) {
|
||||||
AsyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection();
|
asyncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getAsyncHBaseConnection();
|
||||||
asyncTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
asyncTable = asyncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
||||||
asyncIndexTimeTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
asyncIndexTimeTable = asyncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
||||||
asyncIndexFilenameTable = AsyncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
asyncIndexFilenameTable = asyncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
||||||
} else {
|
} else {
|
||||||
syncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getSyncHBaseConnection();
|
syncHBaseConnection = HBaseConnectionUtil.getInstance(configuration).getSyncHBaseConnection();
|
||||||
table = syncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
table = syncHBaseConnection.getTable(TableName.valueOf("default:" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
||||||
indexTimeTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
indexTimeTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_time_" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
||||||
indexFilenameTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
indexFilenameTable = syncHBaseConnection.getTable(TableName.valueOf("default:index_filename_" + configuration.get(Configs.SINK_HOS_BUCKET)));
|
||||||
}
|
}
|
||||||
maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE);
|
|
||||||
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
|
|
||||||
dataPutList = new ArrayList<>();
|
|
||||||
indexTimePutList = new ArrayList<>();
|
|
||||||
indexFilenamePutList = new ArrayList<>();
|
|
||||||
chunkSize = 0;
|
|
||||||
chunkCount = 0;
|
|
||||||
rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD);
|
|
||||||
rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION);
|
|
||||||
timestamp = System.currentTimeMillis();
|
timestamp = System.currentTimeMillis();
|
||||||
count = 0;
|
if (configuration.get(Configs.SINK_BATCH)) {
|
||||||
JexlEngine jexlEngine = new JexlBuilder().create();
|
maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE);
|
||||||
jexlExpression = jexlEngine.createExpression(rateLimitExpression);
|
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
|
||||||
jexlContext = new MapContext();
|
dataPutList = new ArrayList<>();
|
||||||
|
indexTimePutList = new ArrayList<>();
|
||||||
|
indexFilenamePutList = new ArrayList<>();
|
||||||
|
chunkSize = 0;
|
||||||
|
executorService = Executors.newScheduledThreadPool(1);
|
||||||
|
long period = configuration.getInteger(Configs.SINK_BATCH_TIME);
|
||||||
|
executorService.scheduleWithFixedDelay(() -> {
|
||||||
|
if (System.currentTimeMillis() - timestamp > (period * 1000)) {
|
||||||
|
if (!dataPutList.isEmpty()) {
|
||||||
|
synchronized (this) {
|
||||||
|
sendBatchData();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, period, period, TimeUnit.SECONDS);
|
||||||
|
}
|
||||||
|
if (rateLimitThreshold > 0) {
|
||||||
|
rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD);
|
||||||
|
rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION);
|
||||||
|
count = 0;
|
||||||
|
JexlEngine jexlEngine = new JexlBuilder().create();
|
||||||
|
jexlExpression = jexlEngine.createExpression(rateLimitExpression);
|
||||||
|
jexlContext = new MapContext();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void invoke(FileChunk fileChunk, Context context) {
|
public void invoke(FileChunk fileChunk, Context context) {
|
||||||
chunksInCounter.inc();
|
synchronized (this) {
|
||||||
bytesInCounter.inc(fileChunk.getLength());
|
long currentTimeMillis = System.currentTimeMillis();
|
||||||
if (rateLimitThreshold > 0) {
|
chunksInCounter.inc();
|
||||||
count++;
|
bytesInCounter.inc(fileChunk.getLength());
|
||||||
if (System.currentTimeMillis() - timestamp < 1000 && count > rateLimitThreshold) {
|
if (rateLimitThreshold > 0) {
|
||||||
if (checkFileChunk(fileChunk)) {
|
count++;
|
||||||
sendFileChunk(fileChunk);
|
if (currentTimeMillis - timestamp < 1000 && count > rateLimitThreshold) {
|
||||||
} else {
|
if (checkFileChunk(fileChunk)) {
|
||||||
rateLimitDropChunksCounter.inc();
|
sendFileChunk(fileChunk);
|
||||||
}
|
} else {
|
||||||
} else if (System.currentTimeMillis() - timestamp >= 1000) {
|
rateLimitDropChunksCounter.inc();
|
||||||
if (checkFileChunk(fileChunk)) {
|
}
|
||||||
sendFileChunk(fileChunk);
|
} else if (currentTimeMillis - timestamp >= 1000) {
|
||||||
} else {
|
if (checkFileChunk(fileChunk)) {
|
||||||
rateLimitDropChunksCounter.inc();
|
sendFileChunk(fileChunk);
|
||||||
timestamp = System.currentTimeMillis();
|
} else {
|
||||||
|
rateLimitDropChunksCounter.inc();
|
||||||
|
}
|
||||||
|
timestamp = currentTimeMillis;
|
||||||
count = 0;
|
count = 0;
|
||||||
|
} else {
|
||||||
|
sendFileChunk(fileChunk);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
timestamp = currentTimeMillis;
|
||||||
sendFileChunk(fileChunk);
|
sendFileChunk(fileChunk);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
sendFileChunk(fileChunk);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -208,7 +229,10 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
|
|||||||
IoUtil.close(indexTimeTable);
|
IoUtil.close(indexTimeTable);
|
||||||
IoUtil.close(indexFilenameTable);
|
IoUtil.close(indexFilenameTable);
|
||||||
IoUtil.close(syncHBaseConnection);
|
IoUtil.close(syncHBaseConnection);
|
||||||
IoUtil.close(AsyncHBaseConnection);
|
IoUtil.close(asyncHBaseConnection);
|
||||||
|
if (executorService != null) {
|
||||||
|
executorService.shutdown();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void sendFileChunk(FileChunk fileChunk) {
|
private void sendFileChunk(FileChunk fileChunk) {
|
||||||
@@ -254,72 +278,52 @@ public class HBaseSink extends RichSinkFunction<FileChunk> {
|
|||||||
metaPut.addColumn(HBaseColumnConstants.BYTE_FAMILY_META, HBaseColumnConstants.BYTE_COLUMN_LAST_MODIFIED, Bytes.toBytes(timestamp));
|
metaPut.addColumn(HBaseColumnConstants.BYTE_FAMILY_META, HBaseColumnConstants.BYTE_COLUMN_LAST_MODIFIED, Bytes.toBytes(timestamp));
|
||||||
dataPutList.add(metaPut);
|
dataPutList.add(metaPut);
|
||||||
}
|
}
|
||||||
chunkCount++;
|
|
||||||
chunkSize += chunkLength;
|
chunkSize += chunkLength;
|
||||||
chunksOutCounter.inc();
|
chunksOutCounter.inc();
|
||||||
bytesOutCounter.inc(chunkLength);
|
bytesOutCounter.inc(chunkLength);
|
||||||
calculateFileChunkMetrics(fileChunk);
|
calculateFileChunkMetrics(fileChunk);
|
||||||
if (chunkSize >= maxBatchSize || chunkCount >= maxBatchCount) {
|
if (chunkSize >= maxBatchSize || dataPutList.size() >= maxBatchCount) {
|
||||||
if (isAsync) {
|
sendBatchData();
|
||||||
if (dataPutList.size() > 0) {
|
|
||||||
List<CompletableFuture<Object>> futures = asyncTable.batch(dataPutList);
|
|
||||||
CompletableFuture.supplyAsync(() -> {
|
|
||||||
for (CompletableFuture<Object> completableFuture : futures) {
|
|
||||||
completableFuture.whenCompleteAsync((result, error) -> {
|
|
||||||
if (error != null) {
|
|
||||||
LOG.error("put chunk to hbase error. ", error.getMessage());
|
|
||||||
errorChunksCounter.inc();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
dataPutList.clear();
|
|
||||||
}
|
|
||||||
if (indexTimePutList.size() > 0) {
|
|
||||||
asyncIndexTimeTable.batch(indexTimePutList);
|
|
||||||
indexTimePutList.clear();
|
|
||||||
}
|
|
||||||
if (indexFilenamePutList.size() > 0) {
|
|
||||||
asyncIndexFilenameTable.batch(indexFilenamePutList);
|
|
||||||
indexFilenamePutList.clear();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (dataPutList.size() > 0) {
|
|
||||||
try {
|
|
||||||
table.batch(dataPutList, null);
|
|
||||||
} catch (IOException | InterruptedException e) {
|
|
||||||
LOG.error("put chunk to hbase data table error. ", e.getMessage());
|
|
||||||
errorChunksCounter.inc(dataPutList.size());
|
|
||||||
} finally {
|
|
||||||
dataPutList.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (indexTimePutList.size() > 0) {
|
|
||||||
try {
|
|
||||||
indexTimeTable.batch(indexTimePutList, null);
|
|
||||||
} catch (IOException | InterruptedException e) {
|
|
||||||
LOG.error("put chunk to hbase index time table error. ", e.getMessage());
|
|
||||||
} finally {
|
|
||||||
indexTimePutList.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (indexFilenamePutList.size() > 0) {
|
|
||||||
try {
|
|
||||||
indexFilenameTable.batch(indexFilenamePutList, null);
|
|
||||||
} catch (IOException | InterruptedException e) {
|
|
||||||
LOG.error("put chunk to hbase index filename table error. ", e.getMessage());
|
|
||||||
} finally {
|
|
||||||
indexFilenamePutList.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
chunkSize = 0;
|
|
||||||
chunkCount = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void sendBatchData() {
|
||||||
|
if (isAsync) {
|
||||||
|
List<CompletableFuture<Object>> futures = asyncTable.batch(dataPutList);
|
||||||
|
CompletableFuture.supplyAsync(() -> {
|
||||||
|
for (CompletableFuture<Object> completableFuture : futures) {
|
||||||
|
completableFuture.whenCompleteAsync((result, error) -> {
|
||||||
|
if (error != null) {
|
||||||
|
LOG.error("Put chunk to hbase error. ", error.getMessage());
|
||||||
|
errorChunksCounter.inc();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
dataPutList.clear();
|
||||||
|
asyncIndexTimeTable.batch(indexTimePutList);
|
||||||
|
indexTimePutList.clear();
|
||||||
|
asyncIndexFilenameTable.batch(indexFilenamePutList);
|
||||||
|
indexFilenamePutList.clear();
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
table.batch(dataPutList, null);
|
||||||
|
indexTimeTable.batch(indexTimePutList, null);
|
||||||
|
indexFilenameTable.batch(indexFilenamePutList, null);
|
||||||
|
} catch (IOException | InterruptedException e) {
|
||||||
|
LOG.error("Put chunk to hbase error. ", e.getMessage());
|
||||||
|
errorChunksCounter.inc(dataPutList.size());
|
||||||
|
} finally {
|
||||||
|
dataPutList.clear();
|
||||||
|
indexTimePutList.clear();
|
||||||
|
indexFilenamePutList.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
chunkSize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
private boolean checkFileChunk(FileChunk fileChunk) {
|
private boolean checkFileChunk(FileChunk fileChunk) {
|
||||||
if (StrUtil.isNotEmpty(rateLimitExpression)) {
|
if (StrUtil.isNotEmpty(rateLimitExpression)) {
|
||||||
jexlContext.set(fileChunk.getClass().getSimpleName(), fileChunk);
|
jexlContext.set(fileChunk.getClass().getSimpleName(), fileChunk);
|
||||||
|
|||||||
@@ -28,6 +28,9 @@ import org.apache.http.util.EntityUtils;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.ConnectException;
|
import java.net.ConnectException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import static com.zdjizhi.utils.HttpHeaderConstants.*;
|
import static com.zdjizhi.utils.HttpHeaderConstants.*;
|
||||||
import static com.zdjizhi.utils.PublicConstants.*;
|
import static com.zdjizhi.utils.PublicConstants.*;
|
||||||
@@ -73,16 +76,16 @@ public class HosSink extends RichSinkFunction<FileChunk> {
|
|||||||
private String token;
|
private String token;
|
||||||
private volatile String bathPutUrl;
|
private volatile String bathPutUrl;
|
||||||
private HashMap<String, String> hosMessage;
|
private HashMap<String, String> hosMessage;
|
||||||
private String objectsMeta = "";
|
private String objectsMeta;
|
||||||
private String objectsOffset = "";
|
private String objectsOffset;
|
||||||
private List<byte[]> byteList;
|
private List<byte[]> byteList;
|
||||||
private long maxBatchSize;
|
private long maxBatchSize;
|
||||||
private long maxBatchCount;
|
private long maxBatchCount;
|
||||||
private long chunkSize = 0;
|
private long chunkSize;
|
||||||
private int chunkCount = 0;
|
private ScheduledExecutorService executorService;
|
||||||
private long rateLimitThreshold;
|
private long rateLimitThreshold;
|
||||||
private String rateLimitExpression;
|
private String rateLimitExpression;
|
||||||
private long timestamp;
|
private volatile long timestamp;
|
||||||
private long count;
|
private long count;
|
||||||
private JexlExpression jexlExpression;
|
private JexlExpression jexlExpression;
|
||||||
private JexlContext jexlContext;
|
private JexlContext jexlContext;
|
||||||
@@ -167,47 +170,67 @@ public class HosSink extends RichSinkFunction<FileChunk> {
|
|||||||
} else {
|
} else {
|
||||||
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
|
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
|
||||||
}
|
}
|
||||||
bathPutUrl = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + PublicUtil.getUUID()) + "?multiFile";
|
|
||||||
maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE);
|
|
||||||
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
|
|
||||||
hosMessage = new HashMap<>();
|
|
||||||
objectsMeta = "";
|
|
||||||
objectsOffset = "";
|
|
||||||
byteList = new ArrayList<>();
|
|
||||||
rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD);
|
|
||||||
rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION);
|
|
||||||
timestamp = System.currentTimeMillis();
|
timestamp = System.currentTimeMillis();
|
||||||
count = 0;
|
if (configuration.get(Configs.SINK_BATCH)) {
|
||||||
JexlEngine jexlEngine = new JexlBuilder().create();
|
bathPutUrl = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + PublicUtil.getUUID()) + "?multiFile";
|
||||||
jexlExpression = jexlEngine.createExpression(rateLimitExpression);
|
maxBatchSize = configuration.getLong(Configs.SINK_BATCH_SIZE);
|
||||||
jexlContext = new MapContext();
|
maxBatchCount = configuration.getInteger(Configs.SINK_BATCH_COUNT);
|
||||||
|
hosMessage = new HashMap<>();
|
||||||
|
byteList = new ArrayList<>();
|
||||||
|
objectsMeta = "";
|
||||||
|
objectsOffset = "";
|
||||||
|
chunkSize = 0;
|
||||||
|
executorService = Executors.newScheduledThreadPool(1);
|
||||||
|
long period = configuration.getInteger(Configs.SINK_BATCH_TIME);
|
||||||
|
executorService.scheduleWithFixedDelay(() -> {
|
||||||
|
if (System.currentTimeMillis() - timestamp > (period * 1000)) {
|
||||||
|
if (!byteList.isEmpty()) {
|
||||||
|
synchronized (this) {
|
||||||
|
sendBatchData();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, period, period, TimeUnit.SECONDS);
|
||||||
|
}
|
||||||
|
if (rateLimitThreshold > 0) {
|
||||||
|
rateLimitThreshold = configuration.getLong(Configs.SINK_RATE_LIMIT_THRESHOLD);
|
||||||
|
rateLimitExpression = configuration.getString(Configs.SINK_RATE_LIMIT_EXCLUSION_EXPRESSION);
|
||||||
|
count = 0;
|
||||||
|
JexlEngine jexlEngine = new JexlBuilder().create();
|
||||||
|
jexlExpression = jexlEngine.createExpression(rateLimitExpression);
|
||||||
|
jexlContext = new MapContext();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void invoke(FileChunk fileChunk, Context context) {
|
public void invoke(FileChunk fileChunk, Context context) {
|
||||||
chunksInCounter.inc();
|
synchronized (this) {
|
||||||
bytesInCounter.inc(fileChunk.getLength());
|
long currentTimeMillis = System.currentTimeMillis();
|
||||||
if (rateLimitThreshold > 0) {
|
chunksInCounter.inc();
|
||||||
count++;
|
bytesInCounter.inc(fileChunk.getLength());
|
||||||
if (System.currentTimeMillis() - timestamp < 1000 && count > rateLimitThreshold) {
|
if (rateLimitThreshold > 0) {
|
||||||
if (checkFileChunk(fileChunk)) {
|
count++;
|
||||||
sendFileChunk(fileChunk);
|
if (currentTimeMillis - timestamp < 1000 && count > rateLimitThreshold) {
|
||||||
} else {
|
if (checkFileChunk(fileChunk)) {
|
||||||
rateLimitDropChunksCounter.inc();
|
sendFileChunk(fileChunk);
|
||||||
}
|
} else {
|
||||||
} else if (System.currentTimeMillis() - timestamp >= 1000) {
|
rateLimitDropChunksCounter.inc();
|
||||||
if (checkFileChunk(fileChunk)) {
|
}
|
||||||
sendFileChunk(fileChunk);
|
} else if (currentTimeMillis - timestamp >= 1000) {
|
||||||
} else {
|
if (checkFileChunk(fileChunk)) {
|
||||||
rateLimitDropChunksCounter.inc();
|
sendFileChunk(fileChunk);
|
||||||
timestamp = System.currentTimeMillis();
|
} else {
|
||||||
|
rateLimitDropChunksCounter.inc();
|
||||||
|
}
|
||||||
|
timestamp = currentTimeMillis;
|
||||||
count = 0;
|
count = 0;
|
||||||
|
} else {
|
||||||
|
sendFileChunk(fileChunk);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
timestamp = currentTimeMillis;
|
||||||
sendFileChunk(fileChunk);
|
sendFileChunk(fileChunk);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
sendFileChunk(fileChunk);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -215,6 +238,9 @@ public class HosSink extends RichSinkFunction<FileChunk> {
|
|||||||
public void close() {
|
public void close() {
|
||||||
IoUtil.close(syncHttpClient);
|
IoUtil.close(syncHttpClient);
|
||||||
IoUtil.close(asyncHttpClient);
|
IoUtil.close(asyncHttpClient);
|
||||||
|
if (executorService != null) {
|
||||||
|
executorService.shutdown();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void sendFileChunk(FileChunk fileChunk) {
|
private void sendFileChunk(FileChunk fileChunk) {
|
||||||
@@ -236,7 +262,7 @@ public class HosSink extends RichSinkFunction<FileChunk> {
|
|||||||
}
|
}
|
||||||
hosMessage.put(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + "");
|
hosMessage.put(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + "");
|
||||||
Map<String, Object> metaMap = fileChunk.getMeta();
|
Map<String, Object> metaMap = fileChunk.getMeta();
|
||||||
if (metaMap != null && metaMap.size() > 0) {
|
if (metaMap != null && !metaMap.isEmpty()) {
|
||||||
for (String meta : metaMap.keySet()) {
|
for (String meta : metaMap.keySet()) {
|
||||||
hosMessage.put(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + "");
|
hosMessage.put(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + "");
|
||||||
}
|
}
|
||||||
@@ -245,28 +271,12 @@ public class HosSink extends RichSinkFunction<FileChunk> {
|
|||||||
hosMessage.clear();
|
hosMessage.clear();
|
||||||
objectsOffset += chunkLength + ";";
|
objectsOffset += chunkLength + ";";
|
||||||
byteList.add(data);
|
byteList.add(data);
|
||||||
chunkCount++;
|
|
||||||
chunkSize += chunkLength;
|
chunkSize += chunkLength;
|
||||||
chunksOutCounter.inc();
|
chunksOutCounter.inc();
|
||||||
bytesOutCounter.inc(chunkLength);
|
bytesOutCounter.inc(chunkLength);
|
||||||
calculateFileChunkMetrics(fileChunk);
|
calculateFileChunkMetrics(fileChunk);
|
||||||
if (chunkSize >= maxBatchSize || chunkCount >= maxBatchCount) {
|
if (chunkSize >= maxBatchSize || byteList.size() >= maxBatchCount) {
|
||||||
HttpPut httpPut = new HttpPut(bathPutUrl);
|
sendBatchData();
|
||||||
httpPut.setHeader(TOKEN, token);
|
|
||||||
httpPut.setHeader(HOS_UPLOAD_TYPE, UPLOAD_TYPE_APPENDV2);
|
|
||||||
httpPut.setHeader(HOS_COMBINE_MODE, fileChunk.getCombineMode());
|
|
||||||
httpPut.setHeader(HOS_OBJECTS_META, objectsMeta);
|
|
||||||
httpPut.setHeader(HOS_OBJECTS_OFFSET, objectsOffset);
|
|
||||||
byte[][] bytes = new byte[byteList.size()][];
|
|
||||||
byteList.toArray(bytes);
|
|
||||||
byte[] newData = ArrayUtil.addAll(bytes);
|
|
||||||
httpPut.setEntity(new ByteArrayEntity(newData));
|
|
||||||
byteList.clear();
|
|
||||||
executeRequest(httpPut);
|
|
||||||
objectsMeta = "";
|
|
||||||
objectsOffset = "";
|
|
||||||
chunkSize = 0;
|
|
||||||
chunkCount = 0;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
String url = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + fileChunk.getUuid());
|
String url = URLUtil.normalize(endpoint + "/hos/" + configuration.get(Configs.SINK_HOS_BUCKET) + "/" + fileChunk.getUuid());
|
||||||
@@ -292,7 +302,7 @@ public class HosSink extends RichSinkFunction<FileChunk> {
|
|||||||
}
|
}
|
||||||
httpPut.setHeader(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + "");
|
httpPut.setHeader(HOS_PART_CHUNK_COUNT, fileChunk.getChunkCount() + "");
|
||||||
Map<String, Object> metaMap = fileChunk.getMeta();
|
Map<String, Object> metaMap = fileChunk.getMeta();
|
||||||
if (metaMap != null && metaMap.size() > 0) {
|
if (metaMap != null && !metaMap.isEmpty()) {
|
||||||
for (String meta : metaMap.keySet()) {
|
for (String meta : metaMap.keySet()) {
|
||||||
httpPut.setHeader(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + "");
|
httpPut.setHeader(HOS_META_PREFIX + StrUtil.toSymbolCase(meta, CharUtil.DASHED), metaMap.get(meta) + "");
|
||||||
}
|
}
|
||||||
@@ -309,6 +319,24 @@ public class HosSink extends RichSinkFunction<FileChunk> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void sendBatchData() {
|
||||||
|
HttpPut httpPut = new HttpPut(bathPutUrl);
|
||||||
|
httpPut.setHeader(TOKEN, token);
|
||||||
|
httpPut.setHeader(HOS_UPLOAD_TYPE, UPLOAD_TYPE_APPENDV2);
|
||||||
|
httpPut.setHeader(HOS_COMBINE_MODE, COMBINE_MODE_SEEK);
|
||||||
|
httpPut.setHeader(HOS_OBJECTS_META, objectsMeta);
|
||||||
|
httpPut.setHeader(HOS_OBJECTS_OFFSET, objectsOffset);
|
||||||
|
byte[][] bytes = new byte[byteList.size()][];
|
||||||
|
byteList.toArray(bytes);
|
||||||
|
byte[] newData = ArrayUtil.addAll(bytes);
|
||||||
|
httpPut.setEntity(new ByteArrayEntity(newData));
|
||||||
|
executeRequest(httpPut);
|
||||||
|
objectsMeta = "";
|
||||||
|
objectsOffset = "";
|
||||||
|
byteList.clear();
|
||||||
|
chunkSize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
private void executeRequest(HttpPut httpPut) {
|
private void executeRequest(HttpPut httpPut) {
|
||||||
if (isAsync) {
|
if (isAsync) {
|
||||||
asyncHttpClient.execute(httpPut, new FutureCallback<HttpResponse>() {
|
asyncHttpClient.execute(httpPut, new FutureCallback<HttpResponse>() {
|
||||||
|
|||||||
@@ -73,7 +73,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
|
|||||||
private CloseableHttpClient syncHttpClient;
|
private CloseableHttpClient syncHttpClient;
|
||||||
private CloseableHttpAsyncClient asyncHttpClient;
|
private CloseableHttpAsyncClient asyncHttpClient;
|
||||||
private List<String> endpointList;
|
private List<String> endpointList;
|
||||||
private CaffeineCacheUtil caffeineCacheUtil;
|
|
||||||
private Cache<String, FileChunk> cache;
|
private Cache<String, FileChunk> cache;
|
||||||
|
|
||||||
public OssSinkByCaffeineCache(Configuration configuration) {
|
public OssSinkByCaffeineCache(Configuration configuration) {
|
||||||
@@ -92,8 +91,7 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
|
|||||||
} else {
|
} else {
|
||||||
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
|
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
|
||||||
}
|
}
|
||||||
caffeineCacheUtil = CaffeineCacheUtil.getInstance(configuration);
|
cache = CaffeineCacheUtil.getInstance(configuration).getCaffeineCache();
|
||||||
cache = caffeineCacheUtil.getCaffeineCache();
|
|
||||||
metricGroup.gauge("cacheLength", (Gauge<Long>) () -> cache.estimatedSize());
|
metricGroup.gauge("cacheLength", (Gauge<Long>) () -> cache.estimatedSize());
|
||||||
lessThan1KBChunksCounter = metricGroup.counter("lessThan1KBChunksCount");
|
lessThan1KBChunksCounter = metricGroup.counter("lessThan1KBChunksCount");
|
||||||
between1KBAnd5KBChunksCounter = metricGroup.counter("between1KBAnd5KBChunksCount");
|
between1KBAnd5KBChunksCounter = metricGroup.counter("between1KBAnd5KBChunksCount");
|
||||||
@@ -183,8 +181,8 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
|
|||||||
}
|
}
|
||||||
FileChunk data = cache.getIfPresent(uuid + "_data");
|
FileChunk data = cache.getIfPresent(uuid + "_data");
|
||||||
if (data != null) {
|
if (data != null) {
|
||||||
sendFile(data, meta);
|
|
||||||
cache.invalidate(uuid + "_data");
|
cache.invalidate(uuid + "_data");
|
||||||
|
sendFile(data, meta);
|
||||||
} else {
|
} else {
|
||||||
cache.put(fileChunk.getUuid() + "_meta", fileChunk);
|
cache.put(fileChunk.getUuid() + "_meta", fileChunk);
|
||||||
}
|
}
|
||||||
@@ -193,8 +191,8 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
|
|||||||
bytesInCounter.inc(fileChunk.getLength());
|
bytesInCounter.inc(fileChunk.getLength());
|
||||||
FileChunk meta = cache.getIfPresent(uuid + "_meta");
|
FileChunk meta = cache.getIfPresent(uuid + "_meta");
|
||||||
if (meta != null) {
|
if (meta != null) {
|
||||||
sendFile(fileChunk, meta.getMeta());
|
|
||||||
cache.invalidate(uuid + "_meta");
|
cache.invalidate(uuid + "_meta");
|
||||||
|
sendFile(fileChunk, meta.getMeta());
|
||||||
} else {
|
} else {
|
||||||
cache.put(fileChunk.getUuid() + "_data", fileChunk);
|
cache.put(fileChunk.getUuid() + "_data", fileChunk);
|
||||||
}
|
}
|
||||||
@@ -205,7 +203,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
|
|||||||
public void close() {
|
public void close() {
|
||||||
IoUtil.close(syncHttpClient);
|
IoUtil.close(syncHttpClient);
|
||||||
IoUtil.close(asyncHttpClient);
|
IoUtil.close(asyncHttpClient);
|
||||||
caffeineCacheUtil.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void sendFile(FileChunk fileChunk, Map<String, Object> metaMap) {
|
private void sendFile(FileChunk fileChunk, Map<String, Object> metaMap) {
|
||||||
@@ -322,6 +319,15 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
|
|||||||
} else if (fileId.contains("_2")) {
|
} else if (fileId.contains("_2")) {
|
||||||
responseFilesCounter.inc();
|
responseFilesCounter.inc();
|
||||||
}
|
}
|
||||||
|
if (fileChunk.getChunk() == null) {
|
||||||
|
nullChunksCounter.inc();
|
||||||
|
if ("eml".equals(fileType)) {
|
||||||
|
nullEmlChunksCounter.inc();
|
||||||
|
} else if ("txt".equals(fileType)) {
|
||||||
|
nullTxtChunksCounter.inc();
|
||||||
|
}
|
||||||
|
LOG.info("send file data is null. " + fileChunk.toString());
|
||||||
|
}
|
||||||
if (fileChunk.getOffset() == 0 && fileChunk.getLastChunkFlag() == 1) {
|
if (fileChunk.getOffset() == 0 && fileChunk.getLastChunkFlag() == 1) {
|
||||||
completeFilesCounter.inc();
|
completeFilesCounter.inc();
|
||||||
if ("eml".equals(fileType)) {
|
if ("eml".equals(fileType)) {
|
||||||
@@ -329,15 +335,6 @@ public class OssSinkByCaffeineCache extends RichSinkFunction<FileChunk> {
|
|||||||
} else if ("txt".equals(fileType)) {
|
} else if ("txt".equals(fileType)) {
|
||||||
completeTxtFilesCounter.inc();
|
completeTxtFilesCounter.inc();
|
||||||
}
|
}
|
||||||
if (fileChunk.getChunk() == null) {
|
|
||||||
nullChunksCounter.inc();
|
|
||||||
if ("eml".equals(fileType)) {
|
|
||||||
nullEmlChunksCounter.inc();
|
|
||||||
} else if ("txt".equals(fileType)) {
|
|
||||||
nullTxtChunksCounter.inc();
|
|
||||||
}
|
|
||||||
LOG.info("send file data is null. " + fileChunk.toString());
|
|
||||||
}
|
|
||||||
if (fileId.contains("_1")) {
|
if (fileId.contains("_1")) {
|
||||||
completeRequestFilesCounter.inc();
|
completeRequestFilesCounter.inc();
|
||||||
} else if (fileId.contains("_2")) {
|
} else if (fileId.contains("_2")) {
|
||||||
|
|||||||
@@ -1,396 +0,0 @@
|
|||||||
package com.zdjizhi.sink;
|
|
||||||
|
|
||||||
import cn.hutool.core.io.IoUtil;
|
|
||||||
import cn.hutool.core.util.RandomUtil;
|
|
||||||
import cn.hutool.core.util.URLUtil;
|
|
||||||
import cn.hutool.log.Log;
|
|
||||||
import cn.hutool.log.LogFactory;
|
|
||||||
import com.zdjizhi.config.Configs;
|
|
||||||
import com.zdjizhi.pojo.FileChunk;
|
|
||||||
import com.zdjizhi.utils.EhcacheUtil;
|
|
||||||
import com.zdjizhi.utils.FormatUtils;
|
|
||||||
import com.zdjizhi.utils.HttpClientUtil;
|
|
||||||
import org.apache.flink.configuration.Configuration;
|
|
||||||
import org.apache.flink.metrics.Counter;
|
|
||||||
import org.apache.flink.metrics.MeterView;
|
|
||||||
import org.apache.flink.metrics.MetricGroup;
|
|
||||||
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
|
|
||||||
import org.apache.http.HttpResponse;
|
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
||||||
import org.apache.http.client.methods.HttpPost;
|
|
||||||
import org.apache.http.concurrent.FutureCallback;
|
|
||||||
import org.apache.http.entity.ByteArrayEntity;
|
|
||||||
import org.apache.http.impl.client.CloseableHttpClient;
|
|
||||||
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
|
|
||||||
import org.apache.http.util.EntityUtils;
|
|
||||||
import org.ehcache.Cache;
|
|
||||||
import org.ehcache.CacheManager;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
public class OssSinkByEhcache extends RichSinkFunction<FileChunk> {
|
|
||||||
private static final Log LOG = LogFactory.get();
|
|
||||||
|
|
||||||
private final Configuration configuration;
|
|
||||||
public transient Counter chunksInCounter;
|
|
||||||
public transient Counter chunksOutCounter;
|
|
||||||
public transient Counter bytesInCounter;
|
|
||||||
public transient Counter bytesOutCounter;
|
|
||||||
public transient Counter errorChunksCounter;
|
|
||||||
public transient Counter fileMetasCounter;
|
|
||||||
public transient Counter requestFileMetasCounter;
|
|
||||||
public transient Counter responseFileMetasCounter;
|
|
||||||
public transient Counter requestFilesCounter;
|
|
||||||
public transient Counter responseFilesCounter;
|
|
||||||
public transient Counter emlChunksCounter;
|
|
||||||
public transient Counter txtChunksCounter;
|
|
||||||
public transient Counter completeFilesCounter;
|
|
||||||
public transient Counter completeEmlFilesCounter;
|
|
||||||
public transient Counter completeTxtFilesCounter;
|
|
||||||
public transient Counter completeRequestFilesCounter;
|
|
||||||
public transient Counter completeResponseFilesCounter;
|
|
||||||
public transient Counter nullChunksCounter;
|
|
||||||
public transient Counter nullTxtChunksCounter;
|
|
||||||
public transient Counter nullEmlChunksCounter;
|
|
||||||
public transient Counter lessThan1KBChunksCounter;
|
|
||||||
public transient Counter between1KBAnd5KBChunksCounter;
|
|
||||||
public transient Counter between5KBAnd10KBChunksCounter;
|
|
||||||
public transient Counter between10KBAnd100KBChunksCounter;
|
|
||||||
public transient Counter between100KBAnd1MBChunksCounter;
|
|
||||||
public transient Counter greaterThan1MBChunksCounter;
|
|
||||||
public transient Counter lessThan10KBEmlChunksCounter;
|
|
||||||
public transient Counter between1MBAnd10MBEmlChunksCounter;
|
|
||||||
public transient Counter between10KBAnd100KBEmlChunksCounter;
|
|
||||||
public transient Counter between100KBAnd1MBEmlChunksCounter;
|
|
||||||
public transient Counter greaterThan10MBEmlChunksCounter;
|
|
||||||
public transient Counter lessThan10KBTxtChunksCounter;
|
|
||||||
public transient Counter between1MBAnd10MBTxtChunksCounter;
|
|
||||||
public transient Counter between10KBAnd100KBTxtChunksCounter;
|
|
||||||
public transient Counter between100KBAnd1MBTxtChunksCounter;
|
|
||||||
public transient Counter greaterThan10MBTxtChunksCounter;
|
|
||||||
private boolean isAsync;
|
|
||||||
private CloseableHttpClient syncHttpClient;
|
|
||||||
private CloseableHttpAsyncClient asyncHttpClient;
|
|
||||||
private List<String> endpointList;
|
|
||||||
private EhcacheUtil ehcacheUtil;
|
|
||||||
private Cache<String, FileChunk> dataCache;
|
|
||||||
private Cache<String, FileChunk> metaCache;
|
|
||||||
|
|
||||||
public OssSinkByEhcache(Configuration configuration) {
|
|
||||||
this.configuration = configuration;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void open(Configuration parameters) throws Exception {
|
|
||||||
super.open(parameters);
|
|
||||||
MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup("file_chunk_combiner", "sink_oss");
|
|
||||||
endpointList = Arrays.asList(configuration.get(Configs.SINK_OSS_ENDPOINT).split(","));
|
|
||||||
isAsync = configuration.getBoolean(Configs.SINK_OSS_ASYNC);
|
|
||||||
if (isAsync) {
|
|
||||||
asyncHttpClient = HttpClientUtil.getInstance(configuration).getAsyncHttpClient();
|
|
||||||
asyncHttpClient.start();
|
|
||||||
} else {
|
|
||||||
syncHttpClient = HttpClientUtil.getInstance(configuration).getSyncHttpClient();
|
|
||||||
}
|
|
||||||
ehcacheUtil = EhcacheUtil.getInstance();
|
|
||||||
CacheManager ehcacheManager = EhcacheUtil.getInstance().getEhcacheManager();
|
|
||||||
dataCache = ehcacheManager.getCache("data", String.class, FileChunk.class);
|
|
||||||
metaCache = ehcacheManager.getCache("meta", String.class, FileChunk.class);
|
|
||||||
lessThan1KBChunksCounter = metricGroup.counter("lessThan1KBChunksCount");
|
|
||||||
between1KBAnd5KBChunksCounter = metricGroup.counter("between1KBAnd5KBChunksCount");
|
|
||||||
between5KBAnd10KBChunksCounter = metricGroup.counter("between5KBAnd10KBChunksCount");
|
|
||||||
between10KBAnd100KBChunksCounter = metricGroup.counter("between10KBAnd100KBChunksCount");
|
|
||||||
between100KBAnd1MBChunksCounter = metricGroup.counter("between100KBAnd1MBChunksCount");
|
|
||||||
greaterThan1MBChunksCounter = metricGroup.counter("greaterThan1MBChunksCount");
|
|
||||||
metricGroup.meter("numLessThan1KBFilesOutPerSecond", new MeterView(lessThan1KBChunksCounter));
|
|
||||||
metricGroup.meter("numBetween1KBAnd5KBFilesOutPerSecond", new MeterView(between1KBAnd5KBChunksCounter));
|
|
||||||
metricGroup.meter("numBetween5KBAnd10KBFilesOutPerSecond", new MeterView(between5KBAnd10KBChunksCounter));
|
|
||||||
metricGroup.meter("numBetween10KBAnd100KBFilesOutPerSecond", new MeterView(between10KBAnd100KBChunksCounter));
|
|
||||||
metricGroup.meter("numBetween100KBAnd1MBFilesOutPerSecond", new MeterView(between100KBAnd1MBChunksCounter));
|
|
||||||
metricGroup.meter("numGreaterThan1MBFilesOutPerSecond", new MeterView(greaterThan1MBChunksCounter));
|
|
||||||
lessThan10KBEmlChunksCounter = metricGroup.counter("lessThan10KBEmlChunksCount");
|
|
||||||
between10KBAnd100KBEmlChunksCounter = metricGroup.counter("between10KBAnd100KBEmlChunksCount");
|
|
||||||
between100KBAnd1MBEmlChunksCounter = metricGroup.counter("between100KBAnd1MBEmlChunksCount");
|
|
||||||
between1MBAnd10MBEmlChunksCounter = metricGroup.counter("between1MBAnd10MBEmlChunksCount");
|
|
||||||
greaterThan10MBEmlChunksCounter = metricGroup.counter("greaterThan10MBEmlChunksCount");
|
|
||||||
metricGroup.meter("numLessThan10KBEmlFilesOutPerSecond", new MeterView(lessThan10KBEmlChunksCounter));
|
|
||||||
metricGroup.meter("numBetween10KBAnd100KBEmlFilesOutPerSecond", new MeterView(between10KBAnd100KBEmlChunksCounter));
|
|
||||||
metricGroup.meter("numBetween100KBAnd1MBEmlFilesOutPerSecond", new MeterView(between100KBAnd1MBEmlChunksCounter));
|
|
||||||
metricGroup.meter("numBetween1MBAnd10MBEmlFilesOutPerSecond", new MeterView(between1MBAnd10MBEmlChunksCounter));
|
|
||||||
metricGroup.meter("numGreaterThan10MBEmlFilesOutPerSecond", new MeterView(greaterThan10MBEmlChunksCounter));
|
|
||||||
lessThan10KBTxtChunksCounter = metricGroup.counter("lessThan10KBTxtChunksCount");
|
|
||||||
between10KBAnd100KBTxtChunksCounter = metricGroup.counter("between10KBAnd100KBTxtChunksCount");
|
|
||||||
between100KBAnd1MBTxtChunksCounter = metricGroup.counter("between100KBAnd1MBTxtChunksCount");
|
|
||||||
between1MBAnd10MBTxtChunksCounter = metricGroup.counter("between1MBAnd10MBTxtChunksCount");
|
|
||||||
greaterThan10MBTxtChunksCounter = metricGroup.counter("greaterThan10MBTxtChunksCount");
|
|
||||||
metricGroup.meter("numLessThan10KBTxtChunksOutPerSecond", new MeterView(lessThan10KBTxtChunksCounter));
|
|
||||||
metricGroup.meter("numBetween10KBAnd100KBTxtChunksOutPerSecond", new MeterView(between10KBAnd100KBTxtChunksCounter));
|
|
||||||
metricGroup.meter("numBetween100KBAnd1MBTxtChunksOutPerSecond", new MeterView(between100KBAnd1MBTxtChunksCounter));
|
|
||||||
metricGroup.meter("numBetween1MBAnd10MBTxtChunksOutPerSecond", new MeterView(between1MBAnd10MBTxtChunksCounter));
|
|
||||||
metricGroup.meter("numGreaterThan10MBTxtChunksOutPerSecond", new MeterView(greaterThan10MBTxtChunksCounter));
|
|
||||||
emlChunksCounter = metricGroup.counter("emlChunksCount");
|
|
||||||
txtChunksCounter = metricGroup.counter("txtChunksCount");
|
|
||||||
metricGroup.meter("numEmlChunksOutPerSecond", new MeterView(emlChunksCounter));
|
|
||||||
metricGroup.meter("numTxtChunksOutPerSecond", new MeterView(txtChunksCounter));
|
|
||||||
fileMetasCounter = metricGroup.counter("fileMetasCount");
|
|
||||||
metricGroup.meter("numFileMetasInPerSecond", new MeterView(fileMetasCounter));
|
|
||||||
requestFileMetasCounter = metricGroup.counter("requestFileMetasCount");
|
|
||||||
responseFileMetasCounter = metricGroup.counter("responseFileMetasCount");
|
|
||||||
requestFilesCounter = metricGroup.counter("requestFilesCount");
|
|
||||||
responseFilesCounter = metricGroup.counter("responseFilesCount");
|
|
||||||
metricGroup.meter("numRequestFileMetasInPerSecond", new MeterView(requestFileMetasCounter));
|
|
||||||
metricGroup.meter("numResponseFileMetasInPerSecond", new MeterView(responseFileMetasCounter));
|
|
||||||
metricGroup.meter("numRequestFilesOutPerSecond", new MeterView(requestFilesCounter));
|
|
||||||
metricGroup.meter("numResponseFilesOutPerSecond", new MeterView(responseFilesCounter));
|
|
||||||
errorChunksCounter = metricGroup.counter("errorChunksCount");
|
|
||||||
chunksInCounter = metricGroup.counter("chunksInCount");
|
|
||||||
chunksOutCounter = metricGroup.counter("chunksOutCount");
|
|
||||||
bytesInCounter = metricGroup.counter("bytesInCount");
|
|
||||||
bytesOutCounter = metricGroup.counter("bytesOutCount");
|
|
||||||
metricGroup.meter("numChunksInPerSecond", new MeterView(chunksInCounter));
|
|
||||||
metricGroup.meter("numChunksOutPerSecond", new MeterView(chunksOutCounter));
|
|
||||||
metricGroup.meter("numBytesInPerSecond", new MeterView(bytesInCounter));
|
|
||||||
metricGroup.meter("numBytesOutPerSecond", new MeterView(bytesOutCounter));
|
|
||||||
metricGroup.meter("numErrorChunksPerSecond", new MeterView(errorChunksCounter));
|
|
||||||
completeFilesCounter = metricGroup.counter("completeFilesCount");
|
|
||||||
completeEmlFilesCounter = metricGroup.counter("completeEmlFilesCount");
|
|
||||||
completeTxtFilesCounter = metricGroup.counter("completeTxtFilesCount");
|
|
||||||
completeRequestFilesCounter = metricGroup.counter("completeRequestFilesCount");
|
|
||||||
completeResponseFilesCounter = metricGroup.counter("completeResponseFilesCount");
|
|
||||||
metricGroup.meter("numCompleteFilesOutPerSecond", new MeterView(completeFilesCounter));
|
|
||||||
metricGroup.meter("numCompleteEmlFilesOutPerSecond", new MeterView(completeEmlFilesCounter));
|
|
||||||
metricGroup.meter("numCompleteTxtFilesOutPerSecond", new MeterView(completeTxtFilesCounter));
|
|
||||||
metricGroup.meter("numCompleteRequestFilesOutPerSecond", new MeterView(completeRequestFilesCounter));
|
|
||||||
metricGroup.meter("numCompleteResponseFilesOutPerSecond", new MeterView(completeResponseFilesCounter));
|
|
||||||
nullChunksCounter = metricGroup.counter("nullChunksCount");
|
|
||||||
nullEmlChunksCounter = metricGroup.counter("nullTxtChunksCount");
|
|
||||||
nullTxtChunksCounter = metricGroup.counter("nullEmlChunksCount");
|
|
||||||
metricGroup.meter("numNullFilesOutPerSecond", new MeterView(nullChunksCounter));
|
|
||||||
metricGroup.meter("numNullEmlFilesOutPerSecond", new MeterView(nullEmlChunksCounter));
|
|
||||||
metricGroup.meter("numNullTxtFilesOutPerSecond", new MeterView(nullTxtChunksCounter));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void invoke(FileChunk fileChunk, Context context) {
|
|
||||||
String uuid = fileChunk.getUuid();
|
|
||||||
if (fileChunk.getMeta() != null) { //日志
|
|
||||||
fileMetasCounter.inc();
|
|
||||||
Map<String, Object> meta = fileChunk.getMeta();
|
|
||||||
String fileId = meta.get("fileId").toString();
|
|
||||||
if (fileId.contains("_1")) {
|
|
||||||
requestFileMetasCounter.inc();
|
|
||||||
} else if (fileId.contains("_2")) {
|
|
||||||
responseFileMetasCounter.inc();
|
|
||||||
}
|
|
||||||
FileChunk data = dataCache.get(uuid);
|
|
||||||
if (data != null) {
|
|
||||||
sendFile(data, meta);
|
|
||||||
dataCache.remove(uuid);
|
|
||||||
} else {
|
|
||||||
metaCache.put(fileChunk.getUuid(), fileChunk);
|
|
||||||
}
|
|
||||||
} else { //文件
|
|
||||||
chunksInCounter.inc();
|
|
||||||
bytesInCounter.inc(fileChunk.getLength());
|
|
||||||
FileChunk meta = metaCache.get(uuid);
|
|
||||||
if (meta != null) {
|
|
||||||
sendFile(fileChunk, meta.getMeta());
|
|
||||||
metaCache.remove(uuid);
|
|
||||||
} else {
|
|
||||||
dataCache.put(fileChunk.getUuid(), fileChunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
IoUtil.close(syncHttpClient);
|
|
||||||
IoUtil.close(asyncHttpClient);
|
|
||||||
ehcacheUtil.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void sendFile(FileChunk fileChunk, Map<String, Object> metaMap) {
|
|
||||||
String url = "";
|
|
||||||
try {
|
|
||||||
byte[] data;
|
|
||||||
String fileType = fileChunk.getFileType();
|
|
||||||
if (fileChunk.getChunk() != null) {
|
|
||||||
data = fileChunk.getChunk();
|
|
||||||
} else {
|
|
||||||
data = "".getBytes();
|
|
||||||
}
|
|
||||||
String fileId = metaMap != null && metaMap.containsKey("fileId") ? metaMap.get("fileId").toString() : "";
|
|
||||||
String policyId = metaMap != null && metaMap.containsKey("policyId") ? metaMap.get("policyId").toString() : "0";
|
|
||||||
String serverIP = metaMap != null && metaMap.containsKey("serverIP") ? metaMap.get("serverIP").toString() : "";
|
|
||||||
String serverPort = metaMap != null && metaMap.containsKey("serverPort") ? metaMap.get("serverPort").toString() : "";
|
|
||||||
String clientIP = metaMap != null && metaMap.containsKey("clientIP") ? metaMap.get("clientIP").toString() : "";
|
|
||||||
String clientPort = metaMap != null && metaMap.containsKey("clientPort") ? metaMap.get("clientPort").toString() : "";
|
|
||||||
String domain = metaMap != null && metaMap.containsKey("httpHost") ? FormatUtils.getTopPrivateDomain(metaMap.get("httpHost").toString()) : "";
|
|
||||||
String subscriberId = metaMap != null && metaMap.containsKey("subscriberId") ? metaMap.get("subscriberId").toString() : "";
|
|
||||||
String foundTime = metaMap != null && metaMap.containsKey("foundTime") ? metaMap.get("foundTime").toString() : "0";
|
|
||||||
url = URLUtil.normalize(endpointList.get(RandomUtil.randomInt(endpointList.size())) + "/v3/upload?" +
|
|
||||||
"cfg_id=" + policyId +
|
|
||||||
"&file_id=" + fileId +
|
|
||||||
"&file_type=" + fileType +
|
|
||||||
"&found_time=" + foundTime +
|
|
||||||
"&s_ip=" + serverIP +
|
|
||||||
"&s_port=" + serverPort +
|
|
||||||
"&d_ip=" + clientIP +
|
|
||||||
"&d_port=" + clientPort +
|
|
||||||
"&domain=" + domain +
|
|
||||||
"&account=" + subscriberId);
|
|
||||||
HttpPost httpPost = new HttpPost(url);
|
|
||||||
httpPost.setEntity(new ByteArrayEntity(data));
|
|
||||||
executeRequest(httpPost, url);
|
|
||||||
chunksOutCounter.inc();
|
|
||||||
bytesOutCounter.inc(data.length);
|
|
||||||
calculateFileChunkMetrics(fileChunk, fileId);
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.error("post file error. current url: " + url, e);
|
|
||||||
errorChunksCounter.inc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void executeRequest(HttpPost httpPost, String url) {
|
|
||||||
if (isAsync) {
|
|
||||||
asyncHttpClient.execute(httpPost, new FutureCallback<HttpResponse>() {
|
|
||||||
@Override
|
|
||||||
public void completed(HttpResponse httpResponse) {
|
|
||||||
try {
|
|
||||||
String responseEntity = EntityUtils.toString(httpResponse.getEntity(), "UTF-8");
|
|
||||||
if (httpResponse.getStatusLine().getStatusCode() == 200) {
|
|
||||||
if (!responseEntity.contains("\"code\":200")) {
|
|
||||||
LOG.error("post file error. current url: {}, msg: {}", url, responseEntity);
|
|
||||||
errorChunksCounter.inc();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG.error("post file error. current url: {}, code: {}, msg: {}", url, httpResponse.getStatusLine().getStatusCode(), responseEntity);
|
|
||||||
errorChunksCounter.inc();
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOG.error("post file error. current url: " + url, e);
|
|
||||||
errorChunksCounter.inc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void failed(Exception ex) {
|
|
||||||
LOG.error("post file error. current url: " + url, ex);
|
|
||||||
errorChunksCounter.inc();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void cancelled() {
|
|
||||||
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
CloseableHttpResponse response = null;
|
|
||||||
try {
|
|
||||||
response = syncHttpClient.execute(httpPost);
|
|
||||||
String responseEntity = EntityUtils.toString(response.getEntity(), "UTF-8");
|
|
||||||
if (response.getStatusLine().getStatusCode() == 200) {
|
|
||||||
if (!responseEntity.contains("\"code\":200")) {
|
|
||||||
LOG.error("post file error. current url: {}, msg: {}", url, responseEntity);
|
|
||||||
errorChunksCounter.inc();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG.error("post file error. current url: {}, code: {}, msg: {}", url, response.getStatusLine().getStatusCode(), responseEntity);
|
|
||||||
errorChunksCounter.inc();
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOG.error("post file error. current url: " + url, e);
|
|
||||||
errorChunksCounter.inc();
|
|
||||||
} finally {
|
|
||||||
IoUtil.close(response);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void calculateFileChunkMetrics(FileChunk fileChunk, String fileId) {
|
|
||||||
String fileType = fileChunk.getFileType();
|
|
||||||
long length = fileChunk.getLength();
|
|
||||||
calculateChunkSize(length);
|
|
||||||
if ("eml".equals(fileType)) {
|
|
||||||
emlChunksCounter.inc();
|
|
||||||
calculateEmlChunkSize(length);
|
|
||||||
} else if ("txt".equals(fileType)) {
|
|
||||||
txtChunksCounter.inc();
|
|
||||||
calculateTxtChunkSize(length);
|
|
||||||
}
|
|
||||||
if (fileId.contains("_1")) {
|
|
||||||
requestFilesCounter.inc();
|
|
||||||
} else if (fileId.contains("_2")) {
|
|
||||||
responseFilesCounter.inc();
|
|
||||||
}
|
|
||||||
if (fileChunk.getOffset() == 0 && fileChunk.getLastChunkFlag() == 1) {
|
|
||||||
completeFilesCounter.inc();
|
|
||||||
if ("eml".equals(fileType)) {
|
|
||||||
completeEmlFilesCounter.inc();
|
|
||||||
} else if ("txt".equals(fileType)) {
|
|
||||||
completeTxtFilesCounter.inc();
|
|
||||||
}
|
|
||||||
if (fileChunk.getChunk() == null) {
|
|
||||||
nullChunksCounter.inc();
|
|
||||||
if ("eml".equals(fileType)) {
|
|
||||||
nullEmlChunksCounter.inc();
|
|
||||||
} else if ("txt".equals(fileType)) {
|
|
||||||
nullTxtChunksCounter.inc();
|
|
||||||
}
|
|
||||||
LOG.info("send file data is null. " + fileChunk.toString());
|
|
||||||
}
|
|
||||||
if (fileId.contains("_1")) {
|
|
||||||
completeRequestFilesCounter.inc();
|
|
||||||
} else if (fileId.contains("_2")) {
|
|
||||||
completeResponseFilesCounter.inc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void calculateChunkSize(long length) {
|
|
||||||
if (length <= 1024) {
|
|
||||||
lessThan1KBChunksCounter.inc();
|
|
||||||
} else if (length <= 5 * 1024) {
|
|
||||||
between1KBAnd5KBChunksCounter.inc();
|
|
||||||
} else if (length <= 10 * 1024) {
|
|
||||||
between5KBAnd10KBChunksCounter.inc();
|
|
||||||
} else if (length <= 100 * 1024) {
|
|
||||||
between10KBAnd100KBChunksCounter.inc();
|
|
||||||
} else if (length <= 1024 * 1024) {
|
|
||||||
between100KBAnd1MBChunksCounter.inc();
|
|
||||||
} else {
|
|
||||||
greaterThan1MBChunksCounter.inc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void calculateEmlChunkSize(long length) {
|
|
||||||
if (length <= 10 * 1024) {
|
|
||||||
lessThan10KBEmlChunksCounter.inc();
|
|
||||||
} else if (length <= 100 * 1024) {
|
|
||||||
between10KBAnd100KBEmlChunksCounter.inc();
|
|
||||||
} else if (length <= 1024 * 1024) {
|
|
||||||
between100KBAnd1MBEmlChunksCounter.inc();
|
|
||||||
} else if (length <= 10 * 1024 * 1024) {
|
|
||||||
between1MBAnd10MBEmlChunksCounter.inc();
|
|
||||||
} else {
|
|
||||||
greaterThan10MBEmlChunksCounter.inc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void calculateTxtChunkSize(long length) {
|
|
||||||
if (length <= 10 * 1024) {
|
|
||||||
lessThan10KBTxtChunksCounter.inc();
|
|
||||||
} else if (length <= 100 * 1024) {
|
|
||||||
between10KBAnd100KBTxtChunksCounter.inc();
|
|
||||||
} else if (length <= 1024 * 1024) {
|
|
||||||
between100KBAnd1MBTxtChunksCounter.inc();
|
|
||||||
} else if (length <= 10 * 1024 * 1024) {
|
|
||||||
between1MBAnd10MBTxtChunksCounter.inc();
|
|
||||||
} else {
|
|
||||||
greaterThan10MBTxtChunksCounter.inc();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
66
src/main/java/com/zdjizhi/trigger/IdleTimeTrigger.java
Normal file
66
src/main/java/com/zdjizhi/trigger/IdleTimeTrigger.java
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
package com.zdjizhi.trigger;
|
||||||
|
|
||||||
|
import org.apache.flink.api.common.functions.ReduceFunction;
|
||||||
|
import org.apache.flink.api.common.state.ReducingState;
|
||||||
|
import org.apache.flink.api.common.state.ReducingStateDescriptor;
|
||||||
|
import org.apache.flink.api.common.typeutils.base.LongSerializer;
|
||||||
|
import org.apache.flink.streaming.api.windowing.triggers.Trigger;
|
||||||
|
import org.apache.flink.streaming.api.windowing.triggers.TriggerResult;
|
||||||
|
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
|
||||||
|
|
||||||
|
public class IdleTimeTrigger<W extends TimeWindow> extends Trigger<Object, TimeWindow> {
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
|
private final long maxIdleTime;
|
||||||
|
|
||||||
|
private IdleTimeTrigger(long maxIdleTime) {
|
||||||
|
this.maxIdleTime = maxIdleTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <W extends TimeWindow> IdleTimeTrigger<TimeWindow> of(long maxIdleTime) {
|
||||||
|
return new IdleTimeTrigger<>(maxIdleTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final ReducingStateDescriptor<Long> processingTimeStateDesc =
|
||||||
|
new ReducingStateDescriptor<>("processTimer", new ReduceMax(), LongSerializer.INSTANCE);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TriggerResult onElement(Object element, long timestamp, TimeWindow window, TriggerContext ctx) throws Exception {
|
||||||
|
ReducingState<Long> fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc);
|
||||||
|
fireTimestamp.clear();
|
||||||
|
long nextFireTimestamp = ctx.getCurrentProcessingTime() + maxIdleTime;
|
||||||
|
ctx.registerProcessingTimeTimer(nextFireTimestamp);
|
||||||
|
fireTimestamp.add(nextFireTimestamp);
|
||||||
|
return TriggerResult.CONTINUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TriggerResult onProcessingTime(long time, TimeWindow window, TriggerContext ctx) throws Exception {
|
||||||
|
ReducingState<Long> fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc);
|
||||||
|
if (fireTimestamp.get() != null && fireTimestamp.get() == time) {
|
||||||
|
fireTimestamp.clear();
|
||||||
|
return TriggerResult.FIRE;
|
||||||
|
}
|
||||||
|
return TriggerResult.CONTINUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TriggerResult onEventTime(long time, TimeWindow window, TriggerContext ctx) {
|
||||||
|
return TriggerResult.CONTINUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clear(TimeWindow window, TriggerContext ctx) {
|
||||||
|
ReducingState<Long> fireTimestamp = ctx.getPartitionedState(processingTimeStateDesc);
|
||||||
|
fireTimestamp.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ReduceMax implements ReduceFunction<Long> {
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Long reduce(Long value1, Long value2) {
|
||||||
|
return Math.max(value1, value2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -38,9 +38,10 @@ sink.parallelism=1
|
|||||||
#<23><>ѡhos<6F><73>oss<73><73>hbase
|
#<23><>ѡhos<6F><73>oss<73><73>hbase
|
||||||
sink.type=hos
|
sink.type=hos
|
||||||
sink.async=false
|
sink.async=false
|
||||||
sink.batch=false
|
sink.batch=true
|
||||||
sink.batch.count=1000
|
sink.batch.count=1000
|
||||||
sink.batch.size=1048576
|
sink.batch.size=1048576
|
||||||
|
sink.batch.time=10
|
||||||
#sink.filter.expression=
|
#sink.filter.expression=
|
||||||
#sink.rate.limit.threshold=0
|
#sink.rate.limit.threshold=0
|
||||||
#sink.rate.limit.exclusion.expression=FileChunk.fileType == "eml"
|
#sink.rate.limit.exclusion.expression=FileChunk.fileType == "eml"
|
||||||
|
|||||||
@@ -1,36 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<config xmlns="http://www.ehcache.org/v3"
|
|
||||||
xmlns:jsr107="http://www.ehcache.org/v3/jsr107"
|
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
||||||
xsi:schemaLocation="http://www.ehcache.org/v3
|
|
||||||
http://www.ehcache.org/schema/ehcache-core-3.10.xsd
|
|
||||||
http://www.ehcache.org/v3/jsr107
|
|
||||||
http://www.ehcache.org/schema/ehcache-107-ext-3.10.xsd">
|
|
||||||
|
|
||||||
<!-- <persistence directory="D:\myCache"/>-->
|
|
||||||
<cache alias="data">
|
|
||||||
<key-type>java.lang.String</key-type>
|
|
||||||
<value-type>com.zdjizhi.pojo.FileChunk</value-type>
|
|
||||||
<expiry>
|
|
||||||
<ttl unit="seconds">600</ttl>
|
|
||||||
</expiry>
|
|
||||||
<resources>
|
|
||||||
<heap unit="entries">100000</heap>
|
|
||||||
<!-- <offheap unit="GB">15</offheap>-->
|
|
||||||
<!-- <disk persistent="true" unit="MB">500</disk>-->
|
|
||||||
</resources>
|
|
||||||
</cache>
|
|
||||||
|
|
||||||
<cache alias="meta">
|
|
||||||
<key-type>java.lang.String</key-type>
|
|
||||||
<value-type>com.zdjizhi.pojo.FileChunk</value-type>
|
|
||||||
<expiry>
|
|
||||||
<ttl unit="seconds">1200</ttl>
|
|
||||||
</expiry>
|
|
||||||
<resources>
|
|
||||||
<heap unit="entries">100000</heap>
|
|
||||||
<!-- <offheap unit="GB">5</offheap>-->
|
|
||||||
<!-- <disk persistent="true" unit="MB">500</disk>-->
|
|
||||||
</resources>
|
|
||||||
</cache>
|
|
||||||
</config>
|
|
||||||
Reference in New Issue
Block a user