druid hlld升级到26.0.0

This commit is contained in:
lifengchao
2023-09-25 10:42:16 +08:00
parent 69cd9e3223
commit 26bb13fd74
10 changed files with 298 additions and 185 deletions

View File

@@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>org.apache.druid.extensions</groupId> <groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-hlld_0.18.1</artifactId> <artifactId>druid-hlld_26.0.0</artifactId>
<name>druid-hlld</name> <name>druid-hlld</name>
<version>1.0-SNAPSHOT</version> <version>1.0-SNAPSHOT</version>
@@ -14,7 +14,7 @@
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.source>1.8</maven.compiler.source> <maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target> <maven.compiler.target>1.8</maven.compiler.target>
<druid.version>0.18.1</druid.version> <druid.version>26.0.0</druid.version>
</properties> </properties>
<dependencies> <dependencies>
@@ -33,6 +33,14 @@
</dependency> </dependency>
<!-- Tests --> <!-- Tests -->
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<version>4.3</version>
<scope>test</scope>
</dependency>
<dependency> <dependency>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid-processing</artifactId> <artifactId>druid-processing</artifactId>
@@ -42,9 +50,17 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid-benchmarks</artifactId> <artifactId>druid-server</artifactId>
<version>${druid.version}</version> <version>${druid.version}</version>
<scope>test</scope> <scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-sql</artifactId>
<version>${druid.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>junit</groupId> <groupId>junit</groupId>

View File

@@ -9,6 +9,7 @@ import org.apache.druid.query.aggregation.*;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.column.ColumnType;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.Collections; import java.util.Collections;
@@ -145,9 +146,9 @@ public class HllAggregatorFactory extends AggregatorFactory {
Math.max(precision, castedOther.precision), Math.max(precision, castedOther.precision),
round || castedOther.round round || castedOther.round
); );
} else {
throw new AggregatorFactoryNotMergeableException(this, other);
} }
throw new AggregatorFactoryNotMergeableException(this, other);
} }
@Override @Override
@@ -157,25 +158,38 @@ public class HllAggregatorFactory extends AggregatorFactory {
); );
} }
@Override
public AggregatorFactory withName(String newName) {
return new HllAggregatorFactory(newName, fieldName, precision, round);
}
@Override @Override
public Object deserialize(Object object) { public Object deserialize(Object object) {
return HllUtils.deserializeHll(object); return HllUtils.deserializeHll(object);
} }
@Override
public ColumnType getResultType() {
return round ? ColumnType.LONG : ColumnType.DOUBLE;
}
@Nullable @Nullable
@Override @Override
public Object finalizeComputation(@Nullable Object object) { public Object finalizeComputation(@Nullable Object object) {
if (object == null) { if (object == null) {
return null; return null;
} }
final Hll hll = (Hll) object;
return object;
/*final Hll hll = (Hll) object;
final double estimate = hll.size(); final double estimate = hll.size();
if (round) { if (round) {
return Math.round(estimate); return Math.round(estimate);
} else { } else {
return estimate; return estimate;
} }*/
} }
@Override @Override
@@ -199,9 +213,16 @@ public class HllAggregatorFactory extends AggregatorFactory {
return round; return round;
} }
/*
没这个方法了, 新版本需要实现getIntermediateType方法
@Override @Override
public String getTypeName() { public String getTypeName() {
return HllModule.HLLD_BUILD_TYPE_NAME; return HllModule.HLLD_BUILD_TYPE_NAME;
}*/
@Override
public ColumnType getIntermediateType() {
return HllModule.BUILD_TYPE;
} }
@Override @Override

View File

@@ -4,10 +4,12 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.zdjz.galaxy.sketch.hlld.Hll; import com.zdjz.galaxy.sketch.hlld.Hll;
import com.zdjz.galaxy.sketch.hlld.HllUnion; import com.zdjz.galaxy.sketch.hlld.HllUnion;
import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.BufferAggregator;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.column.ColumnType;
import javax.annotation.Nullable; import javax.annotation.Nullable;
@@ -21,9 +23,16 @@ public class HllMergeAggregatorFactory extends HllAggregatorFactory{
super(name, fieldName, precision, round); super(name, fieldName, precision, round);
} }
/*
没这个方法了, 新版本需要实现getIntermediateType方法
@Override @Override
public String getTypeName(){ public String getTypeName(){
return HllModule.HLLD_TYPE_NAME; return HllModule.HLLD_TYPE_NAME;
}*/
@Override
public ColumnType getIntermediateType() {
return HllModule.TYPE;
} }
@Override @Override
@@ -44,6 +53,11 @@ public class HllMergeAggregatorFactory extends HllAggregatorFactory{
); );
} }
@Override
public AggregatorFactory withName(String newName) {
return new HllMergeAggregatorFactory(newName, fieldName, precision, round);
}
@Override @Override
public byte[] getCacheKey() { public byte[] getCacheKey() {
return new CacheKeyBuilder(HllModule.CACHE_TYPE_ID_OFFSET).appendByte(HllModule.HLLD_MERGE_CACHE_TYPE_ID) return new CacheKeyBuilder(HllModule.CACHE_TYPE_ID_OFFSET).appendByte(HllModule.HLLD_MERGE_CACHE_TYPE_ID)

View File

@@ -10,6 +10,7 @@ import org.apache.druid.initialization.DruidModule;
import org.apache.druid.query.aggregation.sketch.hlld.sql.HllApproxCountDistinctSqlAggregator; import org.apache.druid.query.aggregation.sketch.hlld.sql.HllApproxCountDistinctSqlAggregator;
import org.apache.druid.query.aggregation.sketch.hlld.sql.HllEstimateOperatorConversion; import org.apache.druid.query.aggregation.sketch.hlld.sql.HllEstimateOperatorConversion;
import org.apache.druid.query.aggregation.sketch.hlld.sql.HllObjectSqlAggregator; import org.apache.druid.query.aggregation.sketch.hlld.sql.HllObjectSqlAggregator;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.serde.ComplexMetrics; import org.apache.druid.segment.serde.ComplexMetrics;
import org.apache.druid.sql.guice.SqlBindings; import org.apache.druid.sql.guice.SqlBindings;
@@ -24,6 +25,9 @@ public class HllModule implements DruidModule {
public static final String HLLD_TYPE_NAME = "HLLDSketch"; public static final String HLLD_TYPE_NAME = "HLLDSketch";
public static final String HLLD_BUILD_TYPE_NAME = "HLLDSketchBuild"; public static final String HLLD_BUILD_TYPE_NAME = "HLLDSketchBuild";
public static final ColumnType TYPE = ColumnType.ofComplex(HLLD_TYPE_NAME);
public static final ColumnType BUILD_TYPE = ColumnType.ofComplex(HLLD_BUILD_TYPE_NAME);
@Override @Override
public void configure(Binder binder) { public void configure(Binder binder) {

View File

@@ -7,6 +7,8 @@ import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator; import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.column.ColumnType;
import java.util.Comparator; import java.util.Comparator;
import java.util.Map; import java.util.Map;
@@ -29,6 +31,12 @@ public class HllToEstimatePostAggregator implements PostAggregator {
this.round = round; this.round = round;
} }
// 新版本需要实现的方法
@Override
public ColumnType getType(ColumnInspector signature) {
return round ? ColumnType.LONG : ColumnType.DOUBLE;
}
@Override @Override
@JsonProperty @JsonProperty
public String getName() { public String getName() {

View File

@@ -5,36 +5,44 @@ import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.type.*; import org.apache.calcite.sql.type.*;
import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator;
import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator; import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator;
import org.apache.druid.segment.VirtualColumn; import org.apache.druid.query.aggregation.sketch.hlld.HllAggregatorFactory;
import org.apache.druid.query.aggregation.sketch.hlld.HllToEstimatePostAggregator;
import org.apache.druid.sql.calcite.aggregation.Aggregation; import org.apache.druid.sql.calcite.aggregation.Aggregation;
import java.util.Collections; import java.util.Collections;
import java.util.List;
public class HllApproxCountDistinctSqlAggregator extends HllBaseSqlAggregator { public class HllApproxCountDistinctSqlAggregator extends HllBaseSqlAggregator {
private static final SqlAggFunction FUNCTION_INSTANCE = new CPCSketchApproxCountDistinctSqlAggFunction(); private static final SqlAggFunction FUNCTION_INSTANCE = new CPCSketchApproxCountDistinctSqlAggFunction();
private static final String NAME = "APPROX_COUNT_DISTINCT_HLLD"; private static final String NAME = "APPROX_COUNT_DISTINCT_HLLD";
public HllApproxCountDistinctSqlAggregator(){
super(true);
}
@Override @Override
public SqlAggFunction calciteFunction() { public SqlAggFunction calciteFunction() {
return FUNCTION_INSTANCE; return FUNCTION_INSTANCE;
} }
// 新版本参数少了virtualColumns
@Override @Override
protected Aggregation toAggregation( protected Aggregation toAggregation(
String name, String name,
boolean finalizeAggregations, boolean finalizeAggregations,
List<VirtualColumn> virtualColumns,
AggregatorFactory aggregatorFactory AggregatorFactory aggregatorFactory
) { ) {
return Aggregation.create( return Aggregation.create(
virtualColumns,
Collections.singletonList(aggregatorFactory), Collections.singletonList(aggregatorFactory),
//感觉是否是最外层的函数吧 //感觉是否是最外层的函数吧
finalizeAggregations ? new FinalizingFieldAccessPostAggregator( finalizeAggregations ? new HllToEstimatePostAggregator(
name, name,
aggregatorFactory.getName() new FieldAccessPostAggregator(
aggregatorFactory.getName(),
aggregatorFactory.getName()
),
((HllAggregatorFactory)aggregatorFactory).isRound()
) : null ) : null
); );
} }

View File

@@ -2,6 +2,7 @@ package org.apache.druid.query.aggregation.sketch.hlld.sql;
import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexNode;
@@ -14,6 +15,7 @@ import org.apache.druid.query.aggregation.sketch.hlld.HllMergeAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.segment.VirtualColumn; import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.column.ValueType;
import org.apache.druid.sql.calcite.aggregation.Aggregation; import org.apache.druid.sql.calcite.aggregation.Aggregation;
@@ -29,6 +31,13 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
public abstract class HllBaseSqlAggregator implements SqlAggregator { public abstract class HllBaseSqlAggregator implements SqlAggregator {
private final boolean finalizeSketch;
protected HllBaseSqlAggregator(boolean finalizeSketch){
this.finalizeSketch = finalizeSketch;
}
@Nullable @Nullable
@Override @Override
public Aggregation toDruidAggregation( public Aggregation toDruidAggregation(
@@ -93,13 +102,14 @@ public abstract class HllBaseSqlAggregator implements SqlAggregator {
round = HllAggregatorFactory.DEFAULT_ROUND; round = HllAggregatorFactory.DEFAULT_ROUND;
} }
final List<VirtualColumn> virtualColumns = new ArrayList<>(); // 新版本删除了final List<VirtualColumn> virtualColumns = new ArrayList<>();
final AggregatorFactory aggregatorFactory; final AggregatorFactory aggregatorFactory;
final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name; //final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
final String aggregatorName = finalizeSketch ? Calcites.makePrefixedName(name, "a") : name;
// 输入是Cpc返回HllMergeAggregatorFactory // 输入是Hll返回HllSketchMergeAggregatorFactory
if (columnArg.isDirectColumnAccess() if (columnArg.isDirectColumnAccess()
&& rowSignature.getColumnType(columnArg.getDirectColumn()).orElse(null) == ValueType.COMPLEX) { && rowSignature.getColumnType(columnArg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
// 这就是具体的聚合函数吧 // 这就是具体的聚合函数吧
aggregatorFactory = new HllMergeAggregatorFactory( aggregatorFactory = new HllMergeAggregatorFactory(
aggregatorName, aggregatorName,
@@ -109,10 +119,10 @@ public abstract class HllBaseSqlAggregator implements SqlAggregator {
); );
} else { } else {
// 输入是regular columnHllBuildAggregatorFactory // 输入是regular columnHllBuildAggregatorFactory
final SqlTypeName sqlTypeName = columnRexNode.getType().getSqlTypeName(); final RelDataType dataType = columnRexNode.getType();
final ValueType inputType = Calcites.getValueTypeForSqlTypeName(sqlTypeName); final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
if (inputType == null) { if (inputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", sqlTypeName, aggregatorName); throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
} }
final DimensionSpec dimensionSpec; final DimensionSpec dimensionSpec;
@@ -120,27 +130,34 @@ public abstract class HllBaseSqlAggregator implements SqlAggregator {
if (columnArg.isDirectColumnAccess()) { if (columnArg.isDirectColumnAccess()) {
dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType); dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType);
} else { } else {
VirtualColumn virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
plannerContext,
columnArg, columnArg,
sqlTypeName dataType
); );
dimensionSpec = new DefaultDimensionSpec(virtualColumn.getOutputName(), null, inputType); dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
virtualColumns.add(virtualColumn);
} }
aggregatorFactory = new HllAggregatorFactory( // 新版本的判断输入是Hll
aggregatorName, if (inputType.is(ValueType.COMPLEX)) {
dimensionSpec.getDimension(), aggregatorFactory = new HllMergeAggregatorFactory(
precision, aggregatorName,
round dimensionSpec.getOutputName(),
); precision,
round
);
} else {
aggregatorFactory = new HllAggregatorFactory(
aggregatorName,
dimensionSpec.getDimension(),
precision,
round
);
}
} }
return toAggregation( return toAggregation(
name, name,
finalizeAggregations, finalizeSketch,
virtualColumns,
aggregatorFactory aggregatorFactory
); );
} }
@@ -148,7 +165,6 @@ public abstract class HllBaseSqlAggregator implements SqlAggregator {
protected abstract Aggregation toAggregation( protected abstract Aggregation toAggregation(
String name, String name,
boolean finalizeAggregations, boolean finalizeAggregations,
List<VirtualColumn> virtualColumns,
AggregatorFactory aggregatorFactory AggregatorFactory aggregatorFactory
); );
} }

View File

@@ -13,16 +13,15 @@ import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.sketch.hlld.HllAggregatorFactory; import org.apache.druid.query.aggregation.sketch.hlld.HllAggregatorFactory;
import org.apache.druid.query.aggregation.sketch.hlld.HllToEstimatePostAggregator; import org.apache.druid.query.aggregation.sketch.hlld.HllToEstimatePostAggregator;
import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.sql.calcite.expression.DirectOperatorConversion; import org.apache.druid.sql.calcite.expression.*;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
import org.apache.druid.sql.calcite.expression.PostAggregatorVisitor;
import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.planner.PlannerContext;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.List; import java.util.List;
public class HllEstimateOperatorConversion extends DirectOperatorConversion { // postAggregator, toDruidExpression返回null。相当于post udf和普通udf是不一样的。
// 新版本直接修改了父类
public class HllEstimateOperatorConversion implements SqlOperatorConversion {
private static final String FUNCTION_NAME = "HLLD_ESTIMATE"; private static final String FUNCTION_NAME = "HLLD_ESTIMATE";
private static final SqlFunction SQL_FUNCTION = OperatorConversions private static final SqlFunction SQL_FUNCTION = OperatorConversions
.operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME))
@@ -32,9 +31,7 @@ public class HllEstimateOperatorConversion extends DirectOperatorConversion {
.returnTypeInference(ReturnTypes.DOUBLE) .returnTypeInference(ReturnTypes.DOUBLE)
.build(); .build();
public HllEstimateOperatorConversion() { // 新版本少了构造函数
super(SQL_FUNCTION, FUNCTION_NAME);
}
@Override @Override
public SqlOperator calciteOperator() { public SqlOperator calciteOperator() {
@@ -63,7 +60,8 @@ public class HllEstimateOperatorConversion extends DirectOperatorConversion {
plannerContext, plannerContext,
rowSignature, rowSignature,
operands.get(0), operands.get(0),
postAggregatorVisitor postAggregatorVisitor,
true // 新版本多了个参数
); );
if (firstOperand == null) { if (firstOperand == null) {

View File

@@ -5,16 +5,18 @@ import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.type.*; import org.apache.calcite.sql.type.*;
import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.sql.calcite.aggregation.Aggregation; import org.apache.druid.sql.calcite.aggregation.Aggregation;
import java.util.Collections; import java.util.Collections;
import java.util.List;
public class HllObjectSqlAggregator extends HllBaseSqlAggregator { public class HllObjectSqlAggregator extends HllBaseSqlAggregator {
private static final SqlAggFunction FUNCTION_INSTANCE = new CpcSketchSqlAggFunction(); private static final SqlAggFunction FUNCTION_INSTANCE = new CpcSketchSqlAggFunction();
private static final String NAME = "HLLD"; private static final String NAME = "HLLD";
public HllObjectSqlAggregator(){
super(false);
}
@Override @Override
public SqlAggFunction calciteFunction() { public SqlAggFunction calciteFunction() {
return FUNCTION_INSTANCE; return FUNCTION_INSTANCE;
@@ -24,11 +26,9 @@ public class HllObjectSqlAggregator extends HllBaseSqlAggregator {
protected Aggregation toAggregation( protected Aggregation toAggregation(
String name, String name,
boolean finalizeAggregations, boolean finalizeAggregations,
List<VirtualColumn> virtualColumns,
AggregatorFactory aggregatorFactory AggregatorFactory aggregatorFactory
) { ) {
return Aggregation.create( return Aggregation.create(
virtualColumns,
Collections.singletonList(aggregatorFactory), Collections.singletonList(aggregatorFactory),
null null
); );

View File

@@ -1,83 +1,64 @@
package org.apache.druid.query.aggregation.sketch.hlld.sql; package org.apache.druid.query.aggregation.sketch.hlld.sql;
import com.alibaba.fastjson2.JSON;
import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.Module;
import com.google.common.collect.ImmutableMap; import com.google.inject.Injector;
import com.google.common.collect.ImmutableSet; import org.apache.druid.guice.DruidInjectorBuilder;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory;
import org.apache.druid.query.aggregation.sketch.hlld.HllAggregatorFactory;
import org.apache.druid.query.aggregation.sketch.hlld.HllModule; import org.apache.druid.query.aggregation.sketch.hlld.HllModule;
import org.apache.druid.segment.IndexBuilder;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.sql.calcite.BaseCalciteQueryTest;
import org.apache.druid.server.QueryStackTests; import org.apache.druid.sql.calcite.QueryTestBuilder;
import org.apache.druid.server.security.AuthTestUtils; import org.apache.druid.sql.calcite.QueryTestRunner;
import org.apache.druid.server.security.AuthenticationResult;
import org.apache.druid.sql.SqlLifecycle;
import org.apache.druid.sql.SqlLifecycleFactory;
import org.apache.druid.sql.calcite.planner.DruidOperatorTable;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.planner.PlannerFactory;
import org.apache.druid.sql.calcite.util.CalciteTestBase;
import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.QueryLogHook;
import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker; import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec; import org.apache.druid.timeline.partition.LinearShardSpec;
import org.junit.*; import org.junit.*;
import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.*;
import java.util.List;
import java.util.Map;
public class HllApproxCountDistinctSqlAggregatorTest extends CalciteTestBase { // 新版本父类直接变了,实现更简单了
private static final String DATA_SOURCE = "foo"; public class HllApproxCountDistinctSqlAggregatorTest extends BaseCalciteQueryTest {
private static final boolean ROUND = true; private static final boolean ROUND = true;
private static final Map<String, Object> QUERY_CONTEXT_DEFAULT = ImmutableMap.of(
PlannerContext.CTX_SQL_QUERY_ID, "dummy"
);
private static QueryRunnerFactoryConglomerate conglomerate;
private static Closer resourceCloser;
private static AuthenticationResult authenticationResult = CalciteTests.REGULAR_USER_AUTH_RESULT;
@Rule @Override
public TemporaryFolder temporaryFolder = new TemporaryFolder(); public void gatherProperties(Properties properties)
{
@Rule super.gatherProperties(properties);
public QueryLogHook queryLogHook = QueryLogHook.create(TestHelper.JSON_MAPPER);
private SpecificSegmentsQuerySegmentWalker walker;
private SqlLifecycleFactory sqlLifecycleFactory;
@BeforeClass
public static void setUpClass() {
resourceCloser = Closer.create();
conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(resourceCloser);
} }
@AfterClass @Override
public static void tearDownClass() throws IOException { public void configureGuice(DruidInjectorBuilder builder)
resourceCloser.close(); {
super.configureGuice(builder);
builder.addModule(new HllModule());
} }
@Before
public void setUp() throws Exception {
@SuppressWarnings("resource")
@Override
public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory,
final Injector injector
) throws IOException
{
HllModule.registerSerde(); HllModule.registerSerde();
for (Module mod : new HllModule().getJacksonModules()) { for (Module mod : new HllModule().getJacksonModules()) {
CalciteTests.getJsonMapper().registerModule(mod); CalciteTests.getJsonMapper().registerModule(mod);
TestHelper.JSON_MAPPER.registerModule(mod); TestHelper.JSON_MAPPER.registerModule(mod);
} }
final QueryableIndex index = IndexBuilder.create() final QueryableIndex index = TestHelper.getTestIndexIO().loadIndex(new File("D:/doc/datas/testIndex-1369101812"));
//final QueryableIndex index = TestHelper.getTestIndexIO().loadIndex(new File("D:/doc/datas/9_index"));
/*final QueryableIndex index = IndexBuilder.create()
.tmpDir(temporaryFolder.newFolder()) .tmpDir(temporaryFolder.newFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema( .schema(
@@ -95,12 +76,12 @@ public class HllApproxCountDistinctSqlAggregatorTest extends CalciteTestBase {
.withRollup(false) .withRollup(false)
.build() .build()
) )
.rows(CalciteTests.ROWS1) .rows(TestDataBuilder.ROWS1)
.buildMMappedIndex(); .buildMMappedIndex();*/
walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add( return new SpecificSegmentsQuerySegmentWalker(conglomerate).add(
DataSegment.builder() DataSegment.builder()
.dataSource(DATA_SOURCE) .dataSource(CalciteTests.DATASOURCE1)
.interval(index.getDataInterval()) .interval(index.getDataInterval())
.version("1") .version("1")
.shardSpec(new LinearShardSpec(0)) .shardSpec(new LinearShardSpec(0))
@@ -108,45 +89,47 @@ public class HllApproxCountDistinctSqlAggregatorTest extends CalciteTestBase {
.build(), .build(),
index index
); );
final PlannerConfig plannerConfig = new PlannerConfig();
final DruidOperatorTable operatorTable = new DruidOperatorTable(
ImmutableSet.of(
new HllApproxCountDistinctSqlAggregator(),
new HllObjectSqlAggregator()
),
ImmutableSet.of(
new HllEstimateOperatorConversion()
)
);
SchemaPlus rootSchema = CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER);
sqlLifecycleFactory = CalciteTests.createSqlLifecycleFactory(
new PlannerFactory(
rootSchema,
CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate),
operatorTable,
CalciteTests.createExprMacroTable(),
plannerConfig,
AuthTestUtils.TEST_AUTHORIZER_MAPPER,
CalciteTests.getJsonMapper(),
CalciteTests.DRUID_SCHEMA_NAME
)
);
}
@After
public void tearDown() throws Exception {
walker.close();
walker = null;
} }
@Test @Test
public void testSqlQuery() throws Exception { public void testSqlQuery() throws Exception {
SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize(); // Can't vectorize due to SUBSTRING expression.
String sql = "select * from druid.foo"; cannotVectorize();
final List<Object[]> results = String[] columns = new String[]{"__time", "dim1", "dim2", "dim3", "cnt", "hll_dim1", "m1"};
sqlLifecycle.runSimple(sql, QUERY_CONTEXT_DEFAULT, DEFAULT_PARAMETERS, authenticationResult).toList();
String sql = "select " + String.join(",", columns) + " from druid.foo";
QueryTestBuilder builder = testBuilder().sql(sql);
builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) {
Map row = new LinkedHashMap();
for (int i = 0; i < result.length; i++) {
row.put(columns[i], result[i]);
}
System.out.println(JSON.toJSONString(row));
// System.out.println(Arrays.toString(result));
}
for (int i = 0; i < columns.length; i++) {
Object[] values = new Object[results.size()];
for (int j = 0; j < results.size(); j++) {
values[j] = results.get(j)[i];
}
System.out.println(columns[i] + ":" + Arrays.toString(values));
}
}
@Test
public void testSqlQuery1() throws Exception {
// Can't vectorize due to SUBSTRING expression.
cannotVectorize();
String sql = "select dim1 from druid.foo";
QueryTestBuilder builder = testBuilder().sql(sql);
builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }
@@ -154,37 +137,67 @@ public class HllApproxCountDistinctSqlAggregatorTest extends CalciteTestBase {
@Test @Test
public void testSqlQuery2() throws Exception { public void testSqlQuery2() throws Exception {
SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize(); //cannotVectorize();
String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''"; //String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = '1'";
final List<Object[]> results = // Caused by: org.apache.calcite.sql.validate.SqlValidatorException: Aggregate expressions cannot be nested
sqlLifecycle.runSimple(sql, QUERY_CONTEXT_DEFAULT, DEFAULT_PARAMETERS, authenticationResult).toList(); //String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)), APPROX_COUNT_DISTINCT_HLLD(HLLD(hll_dim1)), HLLD(hll_dim1) from druid.foo";
String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)), APPROX_COUNT_DISTINCT_HLLD(hll_dim1), HLLD(hll_dim1) from (select HLLD(hll_dim1) hll_dim1 from druid.foo) t";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }
} }
@Test @Test
public void testAgg() throws Exception { public void testSqlQuery3() throws Exception {
SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize(); //cannotVectorize();
//String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''";
String sql = "select APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select HLLD(hll_dim1) hll from druid.foo where dim1 = '1') t ";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) {
System.out.println(Arrays.toString(result));
}
}
@Test
public void testSqlQuery4() throws Exception {
//cannotVectorize();
//String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''";
String sql = "select APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select HLLD(hll_dim1) hll from druid.foo where dim1 = '1') t ";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) {
System.out.println(Arrays.toString(result));
}
}
@Test
public void testAgg() throws Exception {
final String sql = "SELECT\n" final String sql = "SELECT\n"
+ " SUM(cnt),\n" + " SUM(cnt),\n"
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n" + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n"
+ "FROM druid.foo"; + "FROM druid.foo";
final List<Object[]> results = QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
sqlLifecycle.runSimple(sql, QUERY_CONTEXT_DEFAULT, DEFAULT_PARAMETERS, authenticationResult).toList(); builder.run();
for (Object[] result : results) { QueryTestRunner.QueryResults queryResults = builder.results();
System.out.println(Arrays.toString(result)); List<Object[]> results = queryResults.results;
} for (Object[] result : results) {
System.out.println(Arrays.toString(result));
}
} }
@Test
@Test
public void testDistinct() throws Exception { public void testDistinct() throws Exception {
SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize();
final String sql = "SELECT\n" final String sql = "SELECT\n"
+ " SUM(cnt),\n" + " SUM(cnt),\n"
+ " APPROX_COUNT_DISTINCT_HLLD(dim2),\n" // uppercase + " APPROX_COUNT_DISTINCT_HLLD(dim2),\n" // uppercase
@@ -195,18 +208,17 @@ public class HllApproxCountDistinctSqlAggregatorTest extends CalciteTestBase {
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n" // on native HllSketch column + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n" // on native HllSketch column
+ "FROM druid.foo"; + "FROM druid.foo";
final List<Object[]> results = QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
sqlLifecycle.runSimple(sql, QUERY_CONTEXT_DEFAULT, DEFAULT_PARAMETERS, authenticationResult).toList(); builder.run();
for (Object[] result : results) { QueryTestRunner.QueryResults queryResults = builder.results();
System.out.println(Arrays.toString(result)); List<Object[]> results = queryResults.results;
} for (Object[] result : results) {
System.out.println(Arrays.toString(result));
}
} }
@Test @Test
public void testDistinct2() throws Exception { public void testDistinct2() throws Exception {
SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize();
final String sql = "SELECT\n" final String sql = "SELECT\n"
+ " SUM(cnt),\n" + " SUM(cnt),\n"
+ " APPROX_COUNT_DISTINCT_HLLD(dim2),\n" + " APPROX_COUNT_DISTINCT_HLLD(dim2),\n"
@@ -219,8 +231,26 @@ public class HllApproxCountDistinctSqlAggregatorTest extends CalciteTestBase {
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n" // on native HllSketch column + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n" // on native HllSketch column
+ "FROM druid.foo"; + "FROM druid.foo";
final List<Object[]> results = QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
sqlLifecycle.runSimple(sql, QUERY_CONTEXT_DEFAULT, DEFAULT_PARAMETERS, authenticationResult).toList(); builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) {
System.out.println(Arrays.toString(result));
}
}
@Test
public void testDistinctDebug2() throws Exception {
final String sql = "SELECT\n"
+ " dim1, dim2\n"
+ "FROM druid.foo";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }
@@ -229,15 +259,15 @@ public class HllApproxCountDistinctSqlAggregatorTest extends CalciteTestBase {
@Test @Test
public void testDistinctDebug() throws Exception { public void testDistinctDebug() throws Exception {
SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize();
final String sql = "SELECT\n" final String sql = "SELECT\n"
+ " SUM(cnt),\n" + " SUM(cnt),\n"
+ " APPROX_COUNT_DISTINCT_HLLD(dim2)\n" + " APPROX_COUNT_DISTINCT_HLLD(dim2)\n"
+ "FROM druid.foo"; + "FROM druid.foo";
final List<Object[]> results = QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
sqlLifecycle.runSimple(sql, QUERY_CONTEXT_DEFAULT, DEFAULT_PARAMETERS, authenticationResult).toList(); builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }
@@ -246,14 +276,14 @@ public class HllApproxCountDistinctSqlAggregatorTest extends CalciteTestBase {
@Test @Test
public void testDeser() throws Exception { public void testDeser() throws Exception {
SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize();
final String sql = "SELECT\n" final String sql = "SELECT\n"
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1) cnt\n" + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1) cnt\n"
+ "FROM druid.foo"; + "FROM druid.foo";
final List<Object[]> results = QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
sqlLifecycle.runSimple(sql, QUERY_CONTEXT_DEFAULT, DEFAULT_PARAMETERS, authenticationResult).toList(); builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }
@@ -263,30 +293,29 @@ public class HllApproxCountDistinctSqlAggregatorTest extends CalciteTestBase {
@Test @Test
public void testGroupBy() throws Exception { public void testGroupBy() throws Exception {
SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize();
final String sql = "SELECT cnt,\n" final String sql = "SELECT cnt,\n"
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt2\n" + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt2\n"
+ "FROM druid.foo group by cnt"; + "FROM druid.foo group by cnt";
final List<Object[]> results = QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
sqlLifecycle.runSimple(sql, QUERY_CONTEXT_DEFAULT, DEFAULT_PARAMETERS, authenticationResult).toList(); builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }
} }
@Test @Test
public void testGroupBy1() throws Exception { public void testGroupBy1() throws Exception {
SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize();
final String sql = "SELECT __time,\n" final String sql = "SELECT __time,\n"
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt\n" + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt\n"
+ "FROM druid.foo group by __time"; + "FROM druid.foo group by __time";
final List<Object[]> results = QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
sqlLifecycle.runSimple(sql, QUERY_CONTEXT_DEFAULT, DEFAULT_PARAMETERS, authenticationResult).toList(); builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }
@@ -295,14 +324,13 @@ public class HllApproxCountDistinctSqlAggregatorTest extends CalciteTestBase {
@Test @Test
public void testGroupBy2() throws Exception { public void testGroupBy2() throws Exception {
SqlLifecycle sqlLifecycle = sqlLifecycleFactory.factorize();
final String sql = "SELECT __time,\n" final String sql = "SELECT __time,\n"
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt\n" + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt\n"
+ "FROM druid.foo group by __time order by cnt desc"; + "FROM druid.foo group by __time order by cnt desc";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
final List<Object[]> results = builder.run();
sqlLifecycle.runSimple(sql, QUERY_CONTEXT_DEFAULT, DEFAULT_PARAMETERS, authenticationResult).toList(); QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }