优化:getMaxIntermediateSize返回值初始化计算一次cache,getMaxIntermediateSize每行数据都会调用一次

This commit is contained in:
lifengchao
2024-01-31 17:34:24 +08:00
parent eb64880203
commit 00db131a55
8 changed files with 1625 additions and 1567 deletions

View File

@@ -1,342 +1,348 @@
package org.apache.druid.query.aggregation.sketch.HdrHistogram; package org.apache.druid.query.aggregation.sketch.HdrHistogram;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import org.HdrHistogram.HistogramSketch; import org.HdrHistogram.HistogramSketch;
import org.HdrHistogram.HistogramUnion; import org.HdrHistogram.HistogramUnion;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.*; import org.apache.druid.query.aggregation.*;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
public class HdrHistogramAggregatorFactory extends AggregatorFactory { public class HdrHistogramAggregatorFactory extends AggregatorFactory {
public static final long DEFAULT_LOWEST = 1; public static final long DEFAULT_LOWEST = 1;
public static final long DEFAULT_HIGHEST = 2; public static final long DEFAULT_HIGHEST = 2;
public static final int DEFAULT_SIGNIFICANT = 3; public static final int DEFAULT_SIGNIFICANT = 3;
public static final boolean DEFAULT_AUTO_RESIZE = true; public static final boolean DEFAULT_AUTO_RESIZE = true;
public static final long BUFFER_AUTO_RESIZE_HIGHEST = 100000000L * 1000000L; public static final long BUFFER_AUTO_RESIZE_HIGHEST = 100000000L * 1000000L;
public static final Comparator<HistogramSketch> COMPARATOR = public static final Comparator<HistogramSketch> COMPARATOR =
Comparator.nullsFirst(Comparator.comparingLong(HistogramSketch::getTotalCount)); Comparator.nullsFirst(Comparator.comparingLong(HistogramSketch::getTotalCount));
protected final String name; protected final String name;
protected final String fieldName; protected final String fieldName;
protected final long lowestDiscernibleValue; protected final long lowestDiscernibleValue;
protected final long highestTrackableValue; protected final long highestTrackableValue;
protected final int numberOfSignificantValueDigits; protected final int numberOfSignificantValueDigits;
protected final boolean autoResize; //默认是false protected final boolean autoResize; //默认是false
protected final int updatableSerializationBytes;
public HdrHistogramAggregatorFactory(
@JsonProperty("name") String name, public HdrHistogramAggregatorFactory(
@JsonProperty("fieldName") String fieldName, @JsonProperty("name") String name,
@JsonProperty("lowestDiscernibleValue") @Nullable Long lowestDiscernibleValue, @JsonProperty("fieldName") String fieldName,
@JsonProperty("highestTrackableValue") @Nullable Long highestTrackableValue, @JsonProperty("lowestDiscernibleValue") @Nullable Long lowestDiscernibleValue,
@JsonProperty("numberOfSignificantValueDigits") @Nullable Integer numberOfSignificantValueDigits, @JsonProperty("highestTrackableValue") @Nullable Long highestTrackableValue,
@JsonProperty("autoResize") @Nullable Boolean autoResize @JsonProperty("numberOfSignificantValueDigits") @Nullable Integer numberOfSignificantValueDigits,
) { @JsonProperty("autoResize") @Nullable Boolean autoResize
if (name == null) { ) {
throw new IAE("Must have a valid, non-null aggregator name"); if (name == null) {
} throw new IAE("Must have a valid, non-null aggregator name");
if (fieldName == null) { }
throw new IAE("Parameter fieldName must be specified"); if (fieldName == null) {
} throw new IAE("Parameter fieldName must be specified");
}
if(lowestDiscernibleValue == null){
lowestDiscernibleValue = DEFAULT_LOWEST; if(lowestDiscernibleValue == null){
} lowestDiscernibleValue = DEFAULT_LOWEST;
// Verify argument validity }
if (lowestDiscernibleValue < 1) { // Verify argument validity
throw new IAE("lowestDiscernibleValue must be >= 1"); if (lowestDiscernibleValue < 1) {
} throw new IAE("lowestDiscernibleValue must be >= 1");
if (lowestDiscernibleValue > Long.MAX_VALUE / 2) { }
// prevent subsequent multiplication by 2 for highestTrackableValue check from overflowing if (lowestDiscernibleValue > Long.MAX_VALUE / 2) {
throw new IAE("lowestDiscernibleValue must be <= Long.MAX_VALUE / 2"); // prevent subsequent multiplication by 2 for highestTrackableValue check from overflowing
} throw new IAE("lowestDiscernibleValue must be <= Long.MAX_VALUE / 2");
if(highestTrackableValue == null){ }
highestTrackableValue = DEFAULT_HIGHEST; if(highestTrackableValue == null){
} highestTrackableValue = DEFAULT_HIGHEST;
if (highestTrackableValue < 2L * lowestDiscernibleValue) { }
throw new IAE("highestTrackableValue must be >= 2 * lowestDiscernibleValue"); if (highestTrackableValue < 2L * lowestDiscernibleValue) {
} throw new IAE("highestTrackableValue must be >= 2 * lowestDiscernibleValue");
if(numberOfSignificantValueDigits == null){ }
numberOfSignificantValueDigits = DEFAULT_SIGNIFICANT; if(numberOfSignificantValueDigits == null){
} numberOfSignificantValueDigits = DEFAULT_SIGNIFICANT;
if ((numberOfSignificantValueDigits < 0) || (numberOfSignificantValueDigits > 5)) { }
throw new IAE("numberOfSignificantValueDigits must be between 0 and 5"); if ((numberOfSignificantValueDigits < 0) || (numberOfSignificantValueDigits > 5)) {
} throw new IAE("numberOfSignificantValueDigits must be between 0 and 5");
if(autoResize == null){ }
autoResize = DEFAULT_AUTO_RESIZE; if(autoResize == null){
} autoResize = DEFAULT_AUTO_RESIZE;
}
this.name = name;
this.fieldName = fieldName; this.name = name;
this.lowestDiscernibleValue = lowestDiscernibleValue; this.fieldName = fieldName;
this.highestTrackableValue = highestTrackableValue; this.lowestDiscernibleValue = lowestDiscernibleValue;
this.numberOfSignificantValueDigits = numberOfSignificantValueDigits; this.highestTrackableValue = highestTrackableValue;
this.autoResize = autoResize; this.numberOfSignificantValueDigits = numberOfSignificantValueDigits;
} this.autoResize = autoResize;
this.updatableSerializationBytes = getUpdatableSerializationBytes();
@Override }
public Aggregator factorize(ColumnSelectorFactory metricFactory) {
return new HdrHistogramAggregator( @Override
metricFactory.makeColumnValueSelector(fieldName), public Aggregator factorize(ColumnSelectorFactory metricFactory) {
lowestDiscernibleValue, return new HdrHistogramAggregator(
highestTrackableValue, metricFactory.makeColumnValueSelector(fieldName),
numberOfSignificantValueDigits, lowestDiscernibleValue,
autoResize highestTrackableValue,
); numberOfSignificantValueDigits,
} autoResize
);
@Override }
public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) {
return new HdrHistogramBufferAggregator( @Override
metricFactory.makeColumnValueSelector(fieldName), public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) {
lowestDiscernibleValue, return new HdrHistogramBufferAggregator(
highestTrackableValue, metricFactory.makeColumnValueSelector(fieldName),
numberOfSignificantValueDigits, lowestDiscernibleValue,
autoResize, highestTrackableValue,
getMaxIntermediateSize() numberOfSignificantValueDigits,
); autoResize,
} getMaxIntermediateSize()
);
@Override }
public Comparator getComparator() {
return COMPARATOR; @Override
} public Comparator getComparator() {
return COMPARATOR;
@Override }
public Object combine(Object lhs, Object rhs) {
if(lhs == null){ @Override
return rhs; public Object combine(Object lhs, Object rhs) {
}else if(rhs == null){ if(lhs == null){
return lhs; return rhs;
}else{ }else if(rhs == null){
final HistogramUnion union = new HistogramUnion(lowestDiscernibleValue,highestTrackableValue,numberOfSignificantValueDigits,autoResize); return lhs;
union.update((HistogramSketch) lhs); }else{
union.update((HistogramSketch) rhs); final HistogramUnion union = new HistogramUnion(lowestDiscernibleValue,highestTrackableValue,numberOfSignificantValueDigits,autoResize);
HistogramSketch result = union.getResult(); union.update((HistogramSketch) lhs);
return result; union.update((HistogramSketch) rhs);
} HistogramSketch result = union.getResult();
} return result;
}
@Override }
public AggregateCombiner makeAggregateCombiner() {
return new ObjectAggregateCombiner<HistogramSketch>() { @Override
private HistogramUnion union = null; public AggregateCombiner makeAggregateCombiner() {
return new ObjectAggregateCombiner<HistogramSketch>() {
@Override private HistogramUnion union = null;
public void reset(ColumnValueSelector selector) {
//union.reset(); @Override
union = null; public void reset(ColumnValueSelector selector) {
fold(selector); //union.reset();
} union = null;
fold(selector);
@Override }
public void fold(ColumnValueSelector selector) {
HistogramSketch h = (HistogramSketch) selector.getObject(); @Override
if(h != null){ public void fold(ColumnValueSelector selector) {
if(union == null){ HistogramSketch h = (HistogramSketch) selector.getObject();
union = new HistogramUnion(lowestDiscernibleValue,highestTrackableValue,numberOfSignificantValueDigits,autoResize); if(h != null){
} if(union == null){
union.update(h); union = new HistogramUnion(lowestDiscernibleValue,highestTrackableValue,numberOfSignificantValueDigits,autoResize);
} }
} union.update(h);
}
@Override }
public Class<HistogramSketch> classOfObject() {
return HistogramSketch.class; @Override
} public Class<HistogramSketch> classOfObject() {
return HistogramSketch.class;
@Nullable }
@Override
public HistogramSketch getObject() { @Nullable
if(union == null){ @Override
return null; public HistogramSketch getObject() {
}else{ if(union == null){
HistogramSketch result = union.getResult(); return null;
/*if(result.getTotalCount() == 0){ }else{
return null; HistogramSketch result = union.getResult();
}*/ /*if(result.getTotalCount() == 0){
return result; return null;
} }*/
} return result;
}; }
} }
};
/*public Histogram geneHistogram() { }
Histogram histogram = new Histogram(lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits);
histogram.setAutoResize(autoResize); /*public Histogram geneHistogram() {
return histogram; Histogram histogram = new Histogram(lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits);
}*/ histogram.setAutoResize(autoResize);
return histogram;
@Override }*/
public AggregatorFactory getCombiningFactory() {
return new HdrHistogramMergeAggregatorFactory(name, name, lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits, autoResize); @Override
} public AggregatorFactory getCombiningFactory() {
return new HdrHistogramMergeAggregatorFactory(name, name, lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits, autoResize);
@Override }
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException {
if (other.getName().equals(this.getName()) && other instanceof HdrHistogramAggregatorFactory) { @Override
HdrHistogramAggregatorFactory castedOther = (HdrHistogramAggregatorFactory) other; public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException {
if (other.getName().equals(this.getName()) && other instanceof HdrHistogramAggregatorFactory) {
return new HdrHistogramMergeAggregatorFactory(name, name, HdrHistogramAggregatorFactory castedOther = (HdrHistogramAggregatorFactory) other;
Math.min(lowestDiscernibleValue, castedOther.lowestDiscernibleValue),
Math.max(highestTrackableValue, castedOther.highestTrackableValue), return new HdrHistogramMergeAggregatorFactory(name, name,
Math.max(numberOfSignificantValueDigits, castedOther.numberOfSignificantValueDigits), Math.min(lowestDiscernibleValue, castedOther.lowestDiscernibleValue),
autoResize || castedOther.autoResize Math.max(highestTrackableValue, castedOther.highestTrackableValue),
); Math.max(numberOfSignificantValueDigits, castedOther.numberOfSignificantValueDigits),
} else { autoResize || castedOther.autoResize
throw new AggregatorFactoryNotMergeableException(this, other); );
} } else {
} throw new AggregatorFactoryNotMergeableException(this, other);
}
@Override }
public List<AggregatorFactory> getRequiredColumns() {
return Collections.singletonList( @Override
new HdrHistogramAggregatorFactory( public List<AggregatorFactory> getRequiredColumns() {
fieldName, return Collections.singletonList(
fieldName, new HdrHistogramAggregatorFactory(
lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits, autoResize fieldName,
) fieldName,
); lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits, autoResize
} )
);
@Override }
public AggregatorFactory withName(String newName) {
return new HdrHistogramAggregatorFactory(newName, fieldName, lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits, autoResize); @Override
} public AggregatorFactory withName(String newName) {
return new HdrHistogramAggregatorFactory(newName, fieldName, lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits, autoResize);
@Override }
public Object deserialize(Object object) {
if (object == null) { @Override
return null; public Object deserialize(Object object) {
} if (object == null) {
return HistogramUtils.deserializeHistogram(object); return null;
} }
return HistogramUtils.deserializeHistogram(object);
@Override }
public ColumnType getResultType() {
//return ColumnType.LONG; @Override
return getIntermediateType(); public ColumnType getResultType() {
} //return ColumnType.LONG;
return getIntermediateType();
@Nullable }
@Override
public Object finalizeComputation(@Nullable Object object) { @Nullable
//return object == null ? null : ((HistogramSketch) object).getTotalCount(); @Override
return object; public Object finalizeComputation(@Nullable Object object) {
} //return object == null ? null : ((HistogramSketch) object).getTotalCount();
return object;
@Override }
@JsonProperty
public String getName() { @Override
return name; @JsonProperty
} public String getName() {
return name;
@JsonProperty }
public String getFieldName() {
return fieldName; @JsonProperty
} public String getFieldName() {
return fieldName;
@JsonProperty }
public long getLowestDiscernibleValue() {
return lowestDiscernibleValue; @JsonProperty
} public long getLowestDiscernibleValue() {
return lowestDiscernibleValue;
@JsonProperty }
public long getHighestTrackableValue() {
return highestTrackableValue; @JsonProperty
} public long getHighestTrackableValue() {
return highestTrackableValue;
@JsonProperty }
public int getNumberOfSignificantValueDigits() {
return numberOfSignificantValueDigits; @JsonProperty
} public int getNumberOfSignificantValueDigits() {
return numberOfSignificantValueDigits;
@JsonProperty }
public boolean isAutoResize() {
return autoResize; @JsonProperty
} public boolean isAutoResize() {
return autoResize;
/* }
没这个方法了, 新版本需要实现getIntermediateType方法
@Override /*
public String getTypeName() { 没这个方法了, 新版本需要实现getIntermediateType方法
return HdrHistogramModule.HDRHISTOGRAM_TYPE_NAME; @Override
}*/ public String getTypeName() {
return HdrHistogramModule.HDRHISTOGRAM_TYPE_NAME;
@Override }*/
public ColumnType getIntermediateType() {
return HdrHistogramModule.TYPE; @Override
} public ColumnType getIntermediateType() {
return HdrHistogramModule.TYPE;
@Override }
public List<String> requiredFields() {
return Collections.singletonList(fieldName); @Override
} public List<String> requiredFields() {
return Collections.singletonList(fieldName);
}
@Override
public int getMaxIntermediateSize() {
if(!autoResize){ @Override
/*Histogram histogram = new Histogram(lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits); public int getMaxIntermediateSize() {
histogram.setAutoResize(autoResize); return updatableSerializationBytes == 0? getUpdatableSerializationBytes():updatableSerializationBytes;
return histogram.getNeededByteBufferCapacity();*/ }
return HistogramSketch.getUpdatableSerializationBytes(lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits);
}else{ private int getUpdatableSerializationBytes(){
//return (1 << 10) * 512; if(!autoResize){
return HistogramSketch.getUpdatableSerializationBytes(lowestDiscernibleValue, BUFFER_AUTO_RESIZE_HIGHEST, numberOfSignificantValueDigits); /*Histogram histogram = new Histogram(lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits);
} histogram.setAutoResize(autoResize);
} return histogram.getNeededByteBufferCapacity();*/
return HistogramSketch.getUpdatableSerializationBytes(lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits);
@Override }else{
public byte[] getCacheKey() { //return (1 << 10) * 512;
return new CacheKeyBuilder(HdrHistogramModule.CACHE_TYPE_ID_OFFSET).appendByte(HdrHistogramModule.QUANTILES_HDRHISTOGRAM_BUILD_CACHE_TYPE_ID) return HistogramSketch.getUpdatableSerializationBytes(lowestDiscernibleValue, BUFFER_AUTO_RESIZE_HIGHEST, numberOfSignificantValueDigits);
.appendString(name).appendString(fieldName) }
.appendDouble(lowestDiscernibleValue).appendDouble(highestTrackableValue) }
.appendInt(numberOfSignificantValueDigits).appendBoolean(autoResize)
.build(); @Override
} public byte[] getCacheKey() {
return new CacheKeyBuilder(HdrHistogramModule.CACHE_TYPE_ID_OFFSET).appendByte(HdrHistogramModule.QUANTILES_HDRHISTOGRAM_BUILD_CACHE_TYPE_ID)
@Override .appendString(name).appendString(fieldName)
public boolean equals(final Object o){ .appendDouble(lowestDiscernibleValue).appendDouble(highestTrackableValue)
if (this == o) { .appendInt(numberOfSignificantValueDigits).appendBoolean(autoResize)
return true; .build();
} }
if (o == null || !getClass().equals(o.getClass())) {
return false; @Override
} public boolean equals(final Object o){
if (this == o) {
HdrHistogramAggregatorFactory that = (HdrHistogramAggregatorFactory) o; return true;
return name.equals(that.name) && fieldName.equals(that.fieldName) && }
lowestDiscernibleValue == that.lowestDiscernibleValue && if (o == null || !getClass().equals(o.getClass())) {
highestTrackableValue == that.highestTrackableValue && return false;
numberOfSignificantValueDigits == that.numberOfSignificantValueDigits && }
autoResize == that.autoResize
; HdrHistogramAggregatorFactory that = (HdrHistogramAggregatorFactory) o;
} return name.equals(that.name) && fieldName.equals(that.fieldName) &&
lowestDiscernibleValue == that.lowestDiscernibleValue &&
@Override highestTrackableValue == that.highestTrackableValue &&
public int hashCode(){ numberOfSignificantValueDigits == that.numberOfSignificantValueDigits &&
return Objects.hash(name, fieldName, lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits, autoResize); autoResize == that.autoResize
} ;
}
@Override @Override
public String toString() { public int hashCode(){
return getClass().getSimpleName() + "{" + return Objects.hash(name, fieldName, lowestDiscernibleValue, highestTrackableValue, numberOfSignificantValueDigits, autoResize);
"name='" + name + '\'' + }
", fieldName='" + fieldName + '\'' +
", lowestDiscernibleValue=" + lowestDiscernibleValue +
", highestTrackableValue=" + highestTrackableValue + @Override
", numberOfSignificantValueDigits=" + numberOfSignificantValueDigits + public String toString() {
", autoResize=" + autoResize + return getClass().getSimpleName() + "{" +
'}'; "name='" + name + '\'' +
} ", fieldName='" + fieldName + '\'' +
} ", lowestDiscernibleValue=" + lowestDiscernibleValue +
", highestTrackableValue=" + highestTrackableValue +
", numberOfSignificantValueDigits=" + numberOfSignificantValueDigits +
", autoResize=" + autoResize +
'}';
}
}

View File

@@ -1,118 +1,121 @@
package org.apache.druid.query.aggregation.sketch.HdrHistogram; package org.apache.druid.query.aggregation.sketch.HdrHistogram;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import org.HdrHistogram.HistogramSketch; import org.HdrHistogram.HistogramSketch;
import org.HdrHistogram.Percentile; import org.HdrHistogram.Percentile;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.*; import java.util.*;
public class HdrHistogramToPercentilesPostAggregator implements PostAggregator { public class HdrHistogramToPercentilesPostAggregator implements PostAggregator {
private final String name; private final String name;
private final String fieldName; private final String fieldName;
private final int percentileTicksPerHalfDistance; private final int percentileTicksPerHalfDistance;
@JsonCreator @JsonCreator
public HdrHistogramToPercentilesPostAggregator( public HdrHistogramToPercentilesPostAggregator(
@JsonProperty("name") String name, @JsonProperty("name") String name,
@JsonProperty("fieldName") String fieldName, @JsonProperty("fieldName") String fieldName,
@JsonProperty("percentileTicksPerHalfDistance") int percentileTicksPerHalfDistance @JsonProperty("percentileTicksPerHalfDistance") int percentileTicksPerHalfDistance
){ ){
this.name = name; this.name = name;
this.fieldName = fieldName; this.fieldName = fieldName;
this.percentileTicksPerHalfDistance = percentileTicksPerHalfDistance; this.percentileTicksPerHalfDistance = percentileTicksPerHalfDistance;
} }
@Override @Override
public ColumnType getType(ColumnInspector signature){ public ColumnType getType(ColumnInspector signature){
return ColumnType.STRING; return ColumnType.STRING;
} }
@Override @Override
@JsonProperty @JsonProperty
public String getName() { public String getName() {
return name; return name;
} }
@JsonProperty @JsonProperty
public String getFieldName() { public String getFieldName() {
return fieldName; return fieldName;
} }
@JsonProperty @JsonProperty
public int getPercentileTicksPerHalfDistance() { public int getPercentileTicksPerHalfDistance() {
return percentileTicksPerHalfDistance; return percentileTicksPerHalfDistance;
} }
@Nullable @Nullable
@Override @Override
public Object compute(Map<String, Object> values) { public Object compute(Map<String, Object> values) {
HistogramSketch histogram = (HistogramSketch) values.get(fieldName); HistogramSketch histogram = (HistogramSketch) values.get(fieldName);
List<Percentile> percentiles = histogram.percentileList(percentileTicksPerHalfDistance); if(histogram == null){
return HdrHistogramModule.toJson(percentiles); return "[]"; //"[]"
} }
List<Percentile> percentiles = histogram.percentileList(percentileTicksPerHalfDistance);
@Override return HdrHistogramModule.toJson(percentiles);
public Comparator<double[]> getComparator() }
{
throw new IAE("Comparing arrays of quantiles is not supported"); @Override
} public Comparator<double[]> getComparator()
{
@Override throw new IAE("Comparing arrays of quantiles is not supported");
public Set<String> getDependentFields() }
{
return Sets.newHashSet(fieldName); @Override
} public Set<String> getDependentFields()
{
@Override return Sets.newHashSet(fieldName);
public PostAggregator decorate(Map<String, AggregatorFactory> aggregators) { }
return this;
} @Override
public PostAggregator decorate(Map<String, AggregatorFactory> aggregators) {
@Override return this;
public byte[] getCacheKey() { }
CacheKeyBuilder builder = new CacheKeyBuilder(HdrHistogramModule.CACHE_TYPE_ID_OFFSET).appendByte(HdrHistogramModule.QUANTILES_HDRHISTOGRAM_TO_PERCENTILES_CACHE_TYPE_ID)
.appendString(fieldName); @Override
builder.appendInt(percentileTicksPerHalfDistance); public byte[] getCacheKey() {
return builder.build(); CacheKeyBuilder builder = new CacheKeyBuilder(HdrHistogramModule.CACHE_TYPE_ID_OFFSET).appendByte(HdrHistogramModule.QUANTILES_HDRHISTOGRAM_TO_PERCENTILES_CACHE_TYPE_ID)
} .appendString(fieldName);
builder.appendInt(percentileTicksPerHalfDistance);
@Override return builder.build();
public boolean equals(Object o) { }
if (this == o) {
return true; @Override
} public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) { if (this == o) {
return false; return true;
} }
HdrHistogramToPercentilesPostAggregator that = (HdrHistogramToPercentilesPostAggregator) o; if (o == null || getClass() != o.getClass()) {
return false;
return percentileTicksPerHalfDistance == that.percentileTicksPerHalfDistance && }
name.equals(that.name) && HdrHistogramToPercentilesPostAggregator that = (HdrHistogramToPercentilesPostAggregator) o;
fieldName.equals(that.fieldName);
} return percentileTicksPerHalfDistance == that.percentileTicksPerHalfDistance &&
name.equals(that.name) &&
@Override fieldName.equals(that.fieldName);
public int hashCode() { }
return Objects.hash(name, fieldName, percentileTicksPerHalfDistance);
} @Override
public int hashCode() {
@Override return Objects.hash(name, fieldName, percentileTicksPerHalfDistance);
public String toString() { }
return "HdrHistogramToPercentilesPostAggregator{" +
"name='" + name + '\'' + @Override
", fieldName='" + fieldName + '\'' + public String toString() {
", probabilitys=" + percentileTicksPerHalfDistance + return "HdrHistogramToPercentilesPostAggregator{" +
'}'; "name='" + name + '\'' +
} ", fieldName='" + fieldName + '\'' +
", probabilitys=" + percentileTicksPerHalfDistance +
'}';
} }
}

View File

@@ -1,125 +1,128 @@
package org.apache.druid.query.aggregation.sketch.HdrHistogram; package org.apache.druid.query.aggregation.sketch.HdrHistogram;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import org.HdrHistogram.Histogram; import org.HdrHistogram.Histogram;
import org.HdrHistogram.HistogramSketch; import org.HdrHistogram.HistogramSketch;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.Comparator; import java.util.Comparator;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
public class HdrHistogramToQuantilePostAggregator implements PostAggregator { public class HdrHistogramToQuantilePostAggregator implements PostAggregator {
private final String name; private final String name;
private final String fieldName; private final String fieldName;
private final float probability; private final float probability;
@JsonCreator @JsonCreator
public HdrHistogramToQuantilePostAggregator( public HdrHistogramToQuantilePostAggregator(
@JsonProperty("name") String name, @JsonProperty("name") String name,
@JsonProperty("fieldName") String fieldName, @JsonProperty("fieldName") String fieldName,
@JsonProperty("probability") float probability @JsonProperty("probability") float probability
){ ){
this.name = name; this.name = name;
this.fieldName = fieldName; this.fieldName = fieldName;
this.probability = probability; this.probability = probability;
if (probability < 0 || probability > 1) { if (probability < 0 || probability > 1) {
throw new IAE("Illegal probability[%s], must be strictly between 0 and 1", probability); throw new IAE("Illegal probability[%s], must be strictly between 0 and 1", probability);
} }
} }
@Override @Override
public ColumnType getType(ColumnInspector signature){ public ColumnType getType(ColumnInspector signature){
return ColumnType.LONG; return ColumnType.LONG;
} }
@Override @Override
public Set<String> getDependentFields() { public Set<String> getDependentFields() {
return Sets.newHashSet(fieldName); return Sets.newHashSet(fieldName);
} }
@Override @Override
public Comparator getComparator() { public Comparator getComparator() {
return new Comparator<Long>(){ return new Comparator<Long>(){
@Override @Override
public int compare(final Long a, final Long b){ public int compare(final Long a, final Long b){
return Long.compare(a, b); return Long.compare(a, b);
} }
}; };
} }
@Nullable @Nullable
@Override @Override
public Object compute(Map<String, Object> values) { public Object compute(Map<String, Object> values) {
HistogramSketch histogram = (HistogramSketch) values.get(fieldName); HistogramSketch histogram = (HistogramSketch) values.get(fieldName);
return histogram.getValueAtPercentile(probability * 100); if(histogram == null){
} return null;
}
@Override return histogram.getValueAtPercentile(probability * 100);
@JsonProperty }
public String getName() {
return name; @Override
} @JsonProperty
public String getName() {
@JsonProperty return name;
public String getFieldName() { }
return fieldName;
} @JsonProperty
public String getFieldName() {
@JsonProperty return fieldName;
public double getProbability() { }
return probability;
} @JsonProperty
public double getProbability() {
@Override return probability;
public PostAggregator decorate(Map<String, AggregatorFactory> aggregators) { }
return this;
} @Override
public PostAggregator decorate(Map<String, AggregatorFactory> aggregators) {
@Override return this;
public boolean equals(Object o) { }
if (this == o) {
return true; @Override
} public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) { if (this == o) {
return false; return true;
} }
HdrHistogramToQuantilePostAggregator that = (HdrHistogramToQuantilePostAggregator) o; if (o == null || getClass() != o.getClass()) {
return false;
return Float.compare(that.probability, probability) == 0 && }
name.equals(that.name) && HdrHistogramToQuantilePostAggregator that = (HdrHistogramToQuantilePostAggregator) o;
fieldName.equals(that.fieldName);
} return Float.compare(that.probability, probability) == 0 &&
name.equals(that.name) &&
@Override fieldName.equals(that.fieldName);
public int hashCode() { }
return Objects.hash(name, fieldName, probability);
} @Override
public int hashCode() {
@Override return Objects.hash(name, fieldName, probability);
public String toString() { }
return "HdrHistogramToQuantilePostAggregator{" +
"name='" + name + '\'' + @Override
", fieldName='" + fieldName + '\'' + public String toString() {
", probability=" + probability + return "HdrHistogramToQuantilePostAggregator{" +
'}'; "name='" + name + '\'' +
} ", fieldName='" + fieldName + '\'' +
", probability=" + probability +
@Override '}';
public byte[] getCacheKey() { }
return new CacheKeyBuilder(HdrHistogramModule.CACHE_TYPE_ID_OFFSET).appendByte(HdrHistogramModule.QUANTILES_HDRHISTOGRAM_TO_QUANTILE_CACHE_TYPE_ID)
.appendString(fieldName) @Override
.appendFloat(probability) public byte[] getCacheKey() {
.build(); return new CacheKeyBuilder(HdrHistogramModule.CACHE_TYPE_ID_OFFSET).appendByte(HdrHistogramModule.QUANTILES_HDRHISTOGRAM_TO_QUANTILE_CACHE_TYPE_ID)
} .appendString(fieldName)
} .appendFloat(probability)
.build();
}
}

View File

@@ -1,121 +1,125 @@
package org.apache.druid.query.aggregation.sketch.HdrHistogram; package org.apache.druid.query.aggregation.sketch.HdrHistogram;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import org.HdrHistogram.Histogram; import org.HdrHistogram.Histogram;
import org.HdrHistogram.HistogramSketch; import org.HdrHistogram.HistogramSketch;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.*; import java.util.*;
public class HdrHistogramToQuantilesPostAggregator implements PostAggregator { public class HdrHistogramToQuantilesPostAggregator implements PostAggregator {
private final String name; private final String name;
private final String fieldName; private final String fieldName;
private final float[] probabilitys; private final float[] probabilitys;
@JsonCreator @JsonCreator
public HdrHistogramToQuantilesPostAggregator( public HdrHistogramToQuantilesPostAggregator(
@JsonProperty("name") String name, @JsonProperty("name") String name,
@JsonProperty("fieldName") String fieldName, @JsonProperty("fieldName") String fieldName,
@JsonProperty("probabilitys") float[] probabilitys @JsonProperty("probabilitys") float[] probabilitys
){ ){
this.name = name; this.name = name;
this.fieldName = fieldName; this.fieldName = fieldName;
this.probabilitys = probabilitys; this.probabilitys = probabilitys;
} }
@Override @Override
public ColumnType getType(ColumnInspector signature){ public ColumnType getType(ColumnInspector signature){
return ColumnType.LONG_ARRAY; return ColumnType.LONG_ARRAY;
} }
@Override @Override
@JsonProperty @JsonProperty
public String getName() { public String getName() {
return name; return name;
} }
@JsonProperty @JsonProperty
public String getFieldName() { public String getFieldName() {
return fieldName; return fieldName;
} }
@JsonProperty @JsonProperty
public float[] getProbabilitys() { public float[] getProbabilitys() {
return probabilitys; return probabilitys;
} }
@Nullable @Nullable
@Override @Override
public Object compute(Map<String, Object> values) { public Object compute(Map<String, Object> values) {
HistogramSketch histogram = (HistogramSketch) values.get(fieldName); HistogramSketch histogram = (HistogramSketch) values.get(fieldName);
final long[] counts = new long[probabilitys.length]; if(histogram == null){
for (int i = 0; i < probabilitys.length; i++) { //return null;
counts[i] = histogram.getValueAtPercentile(probabilitys[i] * 100); return new Long[probabilitys.length];
} }
return counts; final Long[] counts = new Long[probabilitys.length];
} for (int i = 0; i < probabilitys.length; i++) {
counts[i] = histogram.getValueAtPercentile(probabilitys[i] * 100);
@Override }
public Comparator<double[]> getComparator() return counts;
{ }
throw new IAE("Comparing arrays of quantiles is not supported");
} @Override
public Comparator<double[]> getComparator()
@Override {
public Set<String> getDependentFields() throw new IAE("Comparing arrays of quantiles is not supported");
{ }
return Sets.newHashSet(fieldName);
} @Override
public Set<String> getDependentFields()
@Override {
public PostAggregator decorate(Map<String, AggregatorFactory> aggregators) { return Sets.newHashSet(fieldName);
return this; }
}
@Override
@Override public PostAggregator decorate(Map<String, AggregatorFactory> aggregators) {
public byte[] getCacheKey() { return this;
CacheKeyBuilder builder = new CacheKeyBuilder(HdrHistogramModule.CACHE_TYPE_ID_OFFSET).appendByte(HdrHistogramModule.QUANTILES_HDRHISTOGRAM_TO_QUANTILES_CACHE_TYPE_ID) }
.appendString(fieldName);
for (float probability : probabilitys) { @Override
builder.appendFloat(probability); public byte[] getCacheKey() {
} CacheKeyBuilder builder = new CacheKeyBuilder(HdrHistogramModule.CACHE_TYPE_ID_OFFSET).appendByte(HdrHistogramModule.QUANTILES_HDRHISTOGRAM_TO_QUANTILES_CACHE_TYPE_ID)
return builder.build(); .appendString(fieldName);
} for (float probability : probabilitys) {
builder.appendFloat(probability);
@Override }
public boolean equals(Object o) { return builder.build();
if (this == o) { }
return true;
} @Override
if (o == null || getClass() != o.getClass()) { public boolean equals(Object o) {
return false; if (this == o) {
} return true;
HdrHistogramToQuantilesPostAggregator that = (HdrHistogramToQuantilesPostAggregator) o; }
if (o == null || getClass() != o.getClass()) {
return Arrays.equals(probabilitys, that.probabilitys) && return false;
name.equals(that.name) && }
fieldName.equals(that.fieldName); HdrHistogramToQuantilesPostAggregator that = (HdrHistogramToQuantilesPostAggregator) o;
}
return Arrays.equals(probabilitys, that.probabilitys) &&
@Override name.equals(that.name) &&
public int hashCode() { fieldName.equals(that.fieldName);
return Objects.hash(name, fieldName, Arrays.hashCode(probabilitys)); }
}
@Override
@Override public int hashCode() {
public String toString() { return Objects.hash(name, fieldName, Arrays.hashCode(probabilitys));
return "HdrHistogramToQuantilesPostAggregator{" + }
"name='" + name + '\'' +
", fieldName='" + fieldName + '\'' + @Override
", probabilitys=" + Arrays.toString(probabilitys) + public String toString() {
'}'; return "HdrHistogramToQuantilesPostAggregator{" +
} "name='" + name + '\'' +
} ", fieldName='" + fieldName + '\'' +
", probabilitys=" + Arrays.toString(probabilitys) +
'}';
}
}

View File

@@ -1,281 +1,287 @@
package org.apache.druid.query.aggregation.sketch.hlld; package org.apache.druid.query.aggregation.sketch.hlld;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.zdjz.galaxy.sketch.hlld.Hll; import com.zdjz.galaxy.sketch.hlld.Hll;
import com.zdjz.galaxy.sketch.hlld.HllUnion; import com.zdjz.galaxy.sketch.hlld.HllUnion;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.query.aggregation.*; import org.apache.druid.query.aggregation.*;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
public class HllAggregatorFactory extends AggregatorFactory { public class HllAggregatorFactory extends AggregatorFactory {
private static final Logger LOG = new Logger(HllAggregatorFactory.class); private static final Logger LOG = new Logger(HllAggregatorFactory.class);
public static final boolean DEFAULT_ROUND = false; public static final boolean DEFAULT_ROUND = false;
public static final int DEFAULT_PRECISION = 12; public static final int DEFAULT_PRECISION = 12;
static final Comparator<Hll> COMPARATOR = Comparator.nullsFirst(Comparator.comparingDouble(Hll::size)); static final Comparator<Hll> COMPARATOR = Comparator.nullsFirst(Comparator.comparingDouble(Hll::size));
protected final String name; protected final String name;
protected final String fieldName; protected final String fieldName;
protected final int precision; protected final int precision;
protected final boolean round; protected final boolean round;
protected final int updatableSerializationBytes;
public HllAggregatorFactory(
@JsonProperty("name") final String name, public HllAggregatorFactory(
@JsonProperty("fieldName") final String fieldName, @JsonProperty("name") final String name,
@JsonProperty("precision") @Nullable final Integer precision, @JsonProperty("fieldName") final String fieldName,
@JsonProperty("round") @Nullable final Boolean round @JsonProperty("precision") @Nullable final Integer precision,
) { @JsonProperty("round") @Nullable final Boolean round
if (name == null) { ) {
throw new IAE("Must have a valid, non-null aggregator name"); if (name == null) {
} throw new IAE("Must have a valid, non-null aggregator name");
if (fieldName == null) { }
throw new IAE("Parameter fieldName must be specified"); if (fieldName == null) {
} throw new IAE("Parameter fieldName must be specified");
this.name = name; }
this.fieldName = fieldName; this.name = name;
this.precision = precision == null ? DEFAULT_PRECISION : precision; this.fieldName = fieldName;
this.round = round == null ? DEFAULT_ROUND : round; this.precision = precision == null ? DEFAULT_PRECISION : precision;
} this.round = round == null ? DEFAULT_ROUND : round;
this.updatableSerializationBytes = getUpdatableSerializationBytes();
@Override }
public Aggregator factorize(ColumnSelectorFactory columnSelectorFactory) {
final ColumnValueSelector<Object> selector = columnSelectorFactory.makeColumnValueSelector(fieldName); @Override
return new HllAggregator(selector, precision); public Aggregator factorize(ColumnSelectorFactory columnSelectorFactory) {
} final ColumnValueSelector<Object> selector = columnSelectorFactory.makeColumnValueSelector(fieldName);
return new HllAggregator(selector, precision);
@Override }
public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnSelectorFactory) {
final ColumnValueSelector<Object> selector = columnSelectorFactory.makeColumnValueSelector(fieldName); @Override
return new HllBufferAggregator( public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnSelectorFactory) {
selector, final ColumnValueSelector<Object> selector = columnSelectorFactory.makeColumnValueSelector(fieldName);
precision return new HllBufferAggregator(
); selector,
} precision
);
@Override }
public Comparator getComparator() {
return COMPARATOR; @Override
} public Comparator getComparator() {
return COMPARATOR;
@Override }
public Object combine(Object lhs, Object rhs) {
if(lhs == null){ @Override
return rhs; public Object combine(Object lhs, Object rhs) {
}else if(rhs == null){ if(lhs == null){
return lhs; return rhs;
}else{ }else if(rhs == null){
final HllUnion union = new HllUnion(precision); return lhs;
union.update((Hll) lhs); }else{
union.update((Hll) rhs); final HllUnion union = new HllUnion(precision);
Hll result = union.getResult(); union.update((Hll) lhs);
return result; union.update((Hll) rhs);
} Hll result = union.getResult();
} return result;
}
@Override }
public AggregateCombiner makeAggregateCombiner() {
return new ObjectAggregateCombiner<Hll>() { @Override
private HllUnion union = null; public AggregateCombiner makeAggregateCombiner() {
return new ObjectAggregateCombiner<Hll>() {
@Override private HllUnion union = null;
public void reset(ColumnValueSelector selector) {
//LOG.error("HllAggregateCombiner reset:" + "-" + Thread.currentThread().getId() + "-" + this); @Override
//union.reset(); public void reset(ColumnValueSelector selector) {
union = null; //LOG.error("HllAggregateCombiner reset:" + "-" + Thread.currentThread().getId() + "-" + this);
fold(selector); //union.reset();
} union = null;
fold(selector);
@Override }
public void fold(ColumnValueSelector selector) {
//LOG.error("HllAggregateCombiner fold:" + "-" + Thread.currentThread().getId() + "-" + this); @Override
final Hll hll = (Hll) selector.getObject(); public void fold(ColumnValueSelector selector) {
if(hll != null){ //LOG.error("HllAggregateCombiner fold:" + "-" + Thread.currentThread().getId() + "-" + this);
if(union == null){ final Hll hll = (Hll) selector.getObject();
union = new HllUnion(precision); if(hll != null){
} if(union == null){
union.update(hll); union = new HllUnion(precision);
}else{ }
//LOG.error("HllAggregateCombiner fold_null:" + "-" + Thread.currentThread().getId() + "-" + this); union.update(hll);
} }else{
} //LOG.error("HllAggregateCombiner fold_null:" + "-" + Thread.currentThread().getId() + "-" + this);
}
@Override }
public Class<Hll> classOfObject() {
return Hll.class; @Override
} public Class<Hll> classOfObject() {
return Hll.class;
@Nullable }
@Override
public Hll getObject() { @Nullable
//LOG.error("HllAggregateCombiner get:" + "-" + Thread.currentThread().getId() + "-" + this); @Override
if(union == null){ public Hll getObject() {
return null; //LOG.error("HllAggregateCombiner get:" + "-" + Thread.currentThread().getId() + "-" + this);
}else{ if(union == null){
Hll result = union.getResult(); return null;
/*if(result.size() == 0){ }else{
return null; Hll result = union.getResult();
}*/ /*if(result.size() == 0){
return result; return null;
} }*/
} return result;
}; }
} }
};
@Override }
public AggregatorFactory getCombiningFactory() {
// 千万不能写错,好大一个坑 @Override
return new HllMergeAggregatorFactory(name, name, precision, round); public AggregatorFactory getCombiningFactory() {
} // 千万不能写错,好大一个坑
return new HllMergeAggregatorFactory(name, name, precision, round);
@Override }
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException {
if (other.getName().equals(this.getName()) && other instanceof HllAggregatorFactory) { @Override
HllAggregatorFactory castedOther = (HllAggregatorFactory) other; public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException {
if (other.getName().equals(this.getName()) && other instanceof HllAggregatorFactory) {
return new HllMergeAggregatorFactory(name, name, HllAggregatorFactory castedOther = (HllAggregatorFactory) other;
Math.max(precision, castedOther.precision),
round || castedOther.round return new HllMergeAggregatorFactory(name, name,
); Math.max(precision, castedOther.precision),
} round || castedOther.round
);
throw new AggregatorFactoryNotMergeableException(this, other); }
}
throw new AggregatorFactoryNotMergeableException(this, other);
@Override }
public List<AggregatorFactory> getRequiredColumns() {
return Collections.singletonList( @Override
new HllAggregatorFactory(fieldName, fieldName, precision, round) public List<AggregatorFactory> getRequiredColumns() {
); return Collections.singletonList(
} new HllAggregatorFactory(fieldName, fieldName, precision, round)
);
@Override }
public AggregatorFactory withName(String newName) {
return new HllAggregatorFactory(newName, fieldName, precision, round); @Override
} public AggregatorFactory withName(String newName) {
return new HllAggregatorFactory(newName, fieldName, precision, round);
@Override }
public Object deserialize(Object object) {
if (object == null) { @Override
return null; public Object deserialize(Object object) {
} if (object == null) {
return HllUtils.deserializeHll(object); return null;
} }
return HllUtils.deserializeHll(object);
@Override }
public ColumnType getResultType() {
//return round ? ColumnType.LONG : ColumnType.DOUBLE; @Override
return getIntermediateType(); public ColumnType getResultType() {
} //return round ? ColumnType.LONG : ColumnType.DOUBLE;
return getIntermediateType();
@Nullable }
@Override
public Object finalizeComputation(@Nullable Object object) { @Nullable
if (object == null) { @Override
return null; public Object finalizeComputation(@Nullable Object object) {
} if (object == null) {
return null;
return object; }
/*final Hll hll = (Hll) object; return object;
final double estimate = hll.size();
/*final Hll hll = (Hll) object;
if (round) { final double estimate = hll.size();
return Math.round(estimate);
} else { if (round) {
return estimate; return Math.round(estimate);
}*/ } else {
} return estimate;
}*/
@Override }
@JsonProperty
public String getName() { @Override
return name; @JsonProperty
} public String getName() {
return name;
@JsonProperty }
public String getFieldName() {
return fieldName; @JsonProperty
} public String getFieldName() {
return fieldName;
@JsonProperty }
public int getPrecision() {
return precision; @JsonProperty
} public int getPrecision() {
return precision;
@JsonProperty }
public boolean isRound() {
return round; @JsonProperty
} public boolean isRound() {
return round;
/* }
没这个方法了, 新版本需要实现getIntermediateType方法
@Override /*
public String getTypeName() { 没这个方法了, 新版本需要实现getIntermediateType方法
return HllModule.HLLD_BUILD_TYPE_NAME; @Override
}*/ public String getTypeName() {
return HllModule.HLLD_BUILD_TYPE_NAME;
@Override }*/
public ColumnType getIntermediateType() {
return HllModule.BUILD_TYPE; @Override
} public ColumnType getIntermediateType() {
return HllModule.BUILD_TYPE;
@Override }
public List<String> requiredFields() {
return Collections.singletonList(fieldName); @Override
} public List<String> requiredFields() {
return Collections.singletonList(fieldName);
@Override }
public int getMaxIntermediateSize() {
return Hll.getUpdatableSerializationBytes(precision); @Override
} public int getMaxIntermediateSize() {
return updatableSerializationBytes == 0? getUpdatableSerializationBytes():updatableSerializationBytes;
@Override }
public byte[] getCacheKey() {
return new CacheKeyBuilder(HllModule.CACHE_TYPE_ID_OFFSET).appendByte(HllModule.HLLD_BUILD_CACHE_TYPE_ID) protected int getUpdatableSerializationBytes(){
.appendString(name).appendString(fieldName) return Hll.getUpdatableSerializationBytes(precision);
.appendInt(precision).appendBoolean(round) }
.build();
} @Override
public byte[] getCacheKey() {
@Override return new CacheKeyBuilder(HllModule.CACHE_TYPE_ID_OFFSET).appendByte(HllModule.HLLD_BUILD_CACHE_TYPE_ID)
public boolean equals(final Object o){ .appendString(name).appendString(fieldName)
if (this == o) { .appendInt(precision).appendBoolean(round)
return true; .build();
} }
if (o == null || !getClass().equals(o.getClass())) {
return false; @Override
} public boolean equals(final Object o){
if (this == o) {
HllAggregatorFactory that = (HllAggregatorFactory) o; return true;
return name.equals(that.name) && fieldName.equals(that.fieldName) && }
precision == that.precision && if (o == null || !getClass().equals(o.getClass())) {
round == that.round return false;
; }
}
HllAggregatorFactory that = (HllAggregatorFactory) o;
@Override return name.equals(that.name) && fieldName.equals(that.fieldName) &&
public int hashCode(){ precision == that.precision &&
return Objects.hash(name, fieldName, precision, round); round == that.round
} ;
}
@Override @Override
public String toString() { public int hashCode(){
return getClass().getSimpleName() + "{" + return Objects.hash(name, fieldName, precision, round);
"name='" + name + '\'' + }
", fieldName='" + fieldName + '\'' +
", precision=" + precision +
", round=" + round + @Override
'}'; public String toString() {
} return getClass().getSimpleName() + "{" +
} "name='" + name + '\'' +
", fieldName='" + fieldName + '\'' +
", precision=" + precision +
", round=" + round +
'}';
}
}

View File

@@ -1,73 +1,73 @@
package org.apache.druid.query.aggregation.sketch.hlld; package org.apache.druid.query.aggregation.sketch.hlld;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.zdjz.galaxy.sketch.hlld.Hll; import com.zdjz.galaxy.sketch.hlld.Hll;
import com.zdjz.galaxy.sketch.hlld.HllUnion; import com.zdjz.galaxy.sketch.hlld.HllUnion;
import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.BufferAggregator; import org.apache.druid.query.aggregation.BufferAggregator;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import javax.annotation.Nullable; import javax.annotation.Nullable;
public class HllMergeAggregatorFactory extends HllAggregatorFactory{ public class HllMergeAggregatorFactory extends HllAggregatorFactory{
public HllMergeAggregatorFactory( public HllMergeAggregatorFactory(
@JsonProperty("name") final String name, @JsonProperty("name") final String name,
@JsonProperty("fieldName") final String fieldName, @JsonProperty("fieldName") final String fieldName,
@JsonProperty("precision") @Nullable final Integer precision, @JsonProperty("precision") @Nullable final Integer precision,
@JsonProperty("round") @Nullable final Boolean round @JsonProperty("round") @Nullable final Boolean round
) { ) {
super(name, fieldName, precision, round); super(name, fieldName, precision, round);
} }
/* /*
没这个方法了, 新版本需要实现getIntermediateType方法 没这个方法了, 新版本需要实现getIntermediateType方法
@Override @Override
public String getTypeName(){ public String getTypeName(){
return HllModule.HLLD_TYPE_NAME; return HllModule.HLLD_TYPE_NAME;
}*/ }*/
@Override @Override
public ColumnType getIntermediateType() { public ColumnType getIntermediateType() {
return HllModule.TYPE; return HllModule.TYPE;
} }
@Override @Override
public Aggregator factorize(ColumnSelectorFactory metricFactory) { public Aggregator factorize(ColumnSelectorFactory metricFactory) {
final ColumnValueSelector<Hll> selector = metricFactory.makeColumnValueSelector(getFieldName()); final ColumnValueSelector<Hll> selector = metricFactory.makeColumnValueSelector(getFieldName());
return new HllMergeAggregator( return new HllMergeAggregator(
selector, selector,
precision precision
); );
} }
@Override @Override
public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnSelectorFactory) { public BufferAggregator factorizeBuffered(ColumnSelectorFactory columnSelectorFactory) {
final ColumnValueSelector<Hll> selector = columnSelectorFactory.makeColumnValueSelector(getFieldName()); final ColumnValueSelector<Hll> selector = columnSelectorFactory.makeColumnValueSelector(getFieldName());
return new HllMergeBufferAggregator( return new HllMergeBufferAggregator(
selector, selector,
precision precision
); );
} }
@Override @Override
public AggregatorFactory withName(String newName) { public AggregatorFactory withName(String newName) {
return new HllMergeAggregatorFactory(newName, fieldName, precision, round); return new HllMergeAggregatorFactory(newName, fieldName, precision, round);
} }
@Override @Override
public byte[] getCacheKey() { public byte[] getCacheKey() {
return new CacheKeyBuilder(HllModule.CACHE_TYPE_ID_OFFSET).appendByte(HllModule.HLLD_MERGE_CACHE_TYPE_ID) return new CacheKeyBuilder(HllModule.CACHE_TYPE_ID_OFFSET).appendByte(HllModule.HLLD_MERGE_CACHE_TYPE_ID)
.appendString(name).appendString(fieldName) .appendString(name).appendString(fieldName)
.appendInt(precision).appendBoolean(round) .appendInt(precision).appendBoolean(round)
.build(); .build();
} }
@Override @Override
public int getMaxIntermediateSize() { protected int getUpdatableSerializationBytes() {
return HllUnion.getUpdatableSerializationBytes(precision); return HllUnion.getUpdatableSerializationBytes(precision);
} }
} }

View File

@@ -1,111 +1,114 @@
package org.apache.druid.query.aggregation.sketch.hlld; package org.apache.druid.query.aggregation.sketch.hlld;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.zdjz.galaxy.sketch.hlld.Hll; import com.zdjz.galaxy.sketch.hlld.Hll;
import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator; import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import java.util.Comparator; import java.util.Comparator;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
public class HllToEstimatePostAggregator implements PostAggregator { public class HllToEstimatePostAggregator implements PostAggregator {
private final String name; private final String name;
private final PostAggregator field; private final PostAggregator field;
private final boolean round; private final boolean round;
@JsonCreator @JsonCreator
public HllToEstimatePostAggregator( public HllToEstimatePostAggregator(
@JsonProperty("name") final String name, @JsonProperty("name") final String name,
@JsonProperty("field") final PostAggregator field, @JsonProperty("field") final PostAggregator field,
@JsonProperty("round") boolean round @JsonProperty("round") boolean round
) { ) {
this.name = name; this.name = name;
this.field = field; this.field = field;
this.round = round; this.round = round;
} }
// 新版本需要实现的方法 // 新版本需要实现的方法
@Override @Override
public ColumnType getType(ColumnInspector signature) { public ColumnType getType(ColumnInspector signature) {
return round ? ColumnType.LONG : ColumnType.DOUBLE; return round ? ColumnType.LONG : ColumnType.DOUBLE;
} }
@Override @Override
@JsonProperty @JsonProperty
public String getName() { public String getName() {
return name; return name;
} }
@JsonProperty @JsonProperty
public PostAggregator getField() { public PostAggregator getField() {
return field; return field;
} }
@JsonProperty @JsonProperty
public boolean isRound() { public boolean isRound() {
return round; return round;
} }
@Override @Override
public Set<String> getDependentFields() { public Set<String> getDependentFields() {
return field.getDependentFields(); return field.getDependentFields();
} }
@Override @Override
public Comparator<Double> getComparator() { public Comparator<Double> getComparator() {
return ArithmeticPostAggregator.DEFAULT_COMPARATOR; return ArithmeticPostAggregator.DEFAULT_COMPARATOR;
} }
@Override @Override
public Object compute(final Map<String, Object> combinedAggregators) { public Object compute(final Map<String, Object> combinedAggregators) {
final Hll sketch = (Hll) field.compute(combinedAggregators); final Hll sketch = (Hll) field.compute(combinedAggregators);
return round ? Math.round(sketch.size()) : sketch.size(); if(sketch == null){
} return round ? 0L: 0D;
}
@Override return round ? Math.round(sketch.size()) : sketch.size();
public PostAggregator decorate(final Map<String, AggregatorFactory> aggregators) { }
return this;
} @Override
public PostAggregator decorate(final Map<String, AggregatorFactory> aggregators) {
@Override return this;
public String toString() { }
return "HllToEstimatePostAggregator{" +
"name='" + name + '\'' + @Override
", field=" + field + public String toString() {
", round=" + round + return "HllToEstimatePostAggregator{" +
'}'; "name='" + name + '\'' +
} ", field=" + field +
", round=" + round +
@Override '}';
public boolean equals(final Object o) { }
if (this == o) {
return true; @Override
} public boolean equals(final Object o) {
if (!(o instanceof HllToEstimatePostAggregator)) { if (this == o) {
return false; return true;
} }
if (!(o instanceof HllToEstimatePostAggregator)) {
final HllToEstimatePostAggregator that = (HllToEstimatePostAggregator) o; return false;
return name.equals(that.name) && field.equals(that.field) && round == that.round; }
}
final HllToEstimatePostAggregator that = (HllToEstimatePostAggregator) o;
@Override return name.equals(that.name) && field.equals(that.field) && round == that.round;
public int hashCode() { }
return Objects.hash(name, field, round);
} @Override
public int hashCode() {
@Override return Objects.hash(name, field, round);
public byte[] getCacheKey() { }
CacheKeyBuilder builder = new CacheKeyBuilder(HllModule.CACHE_TYPE_ID_OFFSET).appendByte(HllModule.HLLD_TO_ESTIMATE_CACHE_TYPE_ID)
.appendCacheable(field).appendBoolean(round); @Override
return builder.build(); public byte[] getCacheKey() {
} CacheKeyBuilder builder = new CacheKeyBuilder(HllModule.CACHE_TYPE_ID_OFFSET).appendByte(HllModule.HLLD_TO_ESTIMATE_CACHE_TYPE_ID)
.appendCacheable(field).appendBoolean(round);
} return builder.build();
}
}

View File

@@ -1,396 +1,429 @@
package org.apache.druid.query.aggregation.sketch.hlld.sql; package org.apache.druid.query.aggregation.sketch.hlld.sql;
import com.alibaba.fastjson2.JSON; import com.alibaba.fastjson2.JSON;
import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.Module;
import com.google.inject.Injector; import com.google.inject.Injector;
import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.guice.DruidInjectorBuilder;
import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.QueryRunnerFactoryConglomerate;
import org.apache.druid.query.aggregation.sketch.hlld.HllModule; import org.apache.druid.query.aggregation.sketch.hlld.HllModule;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.join.JoinableFactoryWrapper; import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.sql.calcite.BaseCalciteQueryTest; import org.apache.druid.sql.calcite.BaseCalciteQueryTest;
import org.apache.druid.sql.calcite.QueryTestBuilder; import org.apache.druid.sql.calcite.QueryTestBuilder;
import org.apache.druid.sql.calcite.QueryTestRunner; import org.apache.druid.sql.calcite.QueryTestRunner;
import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker; import org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker;
import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec; import org.apache.druid.timeline.partition.LinearShardSpec;
import org.junit.*; import org.junit.*;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
// 新版本父类直接变了,实现更简单了 // 新版本父类直接变了,实现更简单了
public class HllApproxCountDistinctSqlAggregatorTest extends BaseCalciteQueryTest { public class HllApproxCountDistinctSqlAggregatorTest extends BaseCalciteQueryTest {
private static final boolean ROUND = true; private static final boolean ROUND = true;
@Override @Override
public void gatherProperties(Properties properties) public void gatherProperties(Properties properties)
{ {
super.gatherProperties(properties); super.gatherProperties(properties);
} }
@Override @Override
public void configureGuice(DruidInjectorBuilder builder) public void configureGuice(DruidInjectorBuilder builder)
{ {
super.configureGuice(builder); super.configureGuice(builder);
builder.addModule(new HllModule()); builder.addModule(new HllModule());
} }
@SuppressWarnings("resource") @SuppressWarnings("resource")
@Override @Override
public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate, final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory, final JoinableFactoryWrapper joinableFactory,
final Injector injector final Injector injector
) throws IOException ) throws IOException
{ {
HllModule.registerSerde(); HllModule.registerSerde();
for (Module mod : new HllModule().getJacksonModules()) { for (Module mod : new HllModule().getJacksonModules()) {
CalciteTests.getJsonMapper().registerModule(mod); CalciteTests.getJsonMapper().registerModule(mod);
TestHelper.JSON_MAPPER.registerModule(mod); TestHelper.JSON_MAPPER.registerModule(mod);
} }
final QueryableIndex index = TestHelper.getTestIndexIO().loadIndex(new File("D:/doc/datas/testIndex-1369101812")); final QueryableIndex index = TestHelper.getTestIndexIO().loadIndex(new File("D:/doc/datas/testIndex-1369101812"));
//final QueryableIndex index = TestHelper.getTestIndexIO().loadIndex(new File("D:/doc/datas/9_index")); //final QueryableIndex index = TestHelper.getTestIndexIO().loadIndex(new File("D:/doc/datas/9_index"));
/*final QueryableIndex index = IndexBuilder.create() /*final QueryableIndex index = IndexBuilder.create()
.tmpDir(temporaryFolder.newFolder()) .tmpDir(temporaryFolder.newFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema( .schema(
new IncrementalIndexSchema.Builder() new IncrementalIndexSchema.Builder()
.withMetrics( .withMetrics(
new CountAggregatorFactory("cnt"), new CountAggregatorFactory("cnt"),
new DoubleSumAggregatorFactory("m1", "m1"), new DoubleSumAggregatorFactory("m1", "m1"),
new HllAggregatorFactory( new HllAggregatorFactory(
"hll_dim1", "hll_dim1",
"dim1", "dim1",
null, null,
ROUND ROUND
) )
) )
.withRollup(false) .withRollup(false)
.build() .build()
) )
.rows(TestDataBuilder.ROWS1) .rows(TestDataBuilder.ROWS1)
.buildMMappedIndex();*/ .buildMMappedIndex();*/
return new SpecificSegmentsQuerySegmentWalker(conglomerate).add( return new SpecificSegmentsQuerySegmentWalker(conglomerate).add(
DataSegment.builder() DataSegment.builder()
.dataSource(CalciteTests.DATASOURCE1) .dataSource(CalciteTests.DATASOURCE1)
.interval(index.getDataInterval()) .interval(index.getDataInterval())
.version("1") .version("1")
.shardSpec(new LinearShardSpec(0)) .shardSpec(new LinearShardSpec(0))
.size(0) .size(0)
.build(), .build(),
index index
); );
} }
@Test @Test
public void testSqlQuery() throws Exception { public void testSqlQuery() throws Exception {
// Can't vectorize due to SUBSTRING expression. // Can't vectorize due to SUBSTRING expression.
cannotVectorize(); cannotVectorize();
String[] columns = new String[]{"__time", "dim1", "dim2", "dim3", "cnt", "hll_dim1", "m1"};
String[] columns = new String[]{"__time", "dim1", "dim2", "dim3", "cnt", "hll_dim1", "m1"};
String sql = "select " + String.join(",", columns) + " from druid.foo";
QueryTestBuilder builder = testBuilder().sql(sql); String sql = "select " + String.join(",", columns) + " from druid.foo";
builder.run(); QueryTestBuilder builder = testBuilder().sql(sql);
QueryTestRunner.QueryResults queryResults = builder.results(); builder.run();
List<Object[]> results = queryResults.results; QueryTestRunner.QueryResults queryResults = builder.results();
for (Object[] result : results) { List<Object[]> results = queryResults.results;
Map row = new LinkedHashMap(); for (Object[] result : results) {
for (int i = 0; i < result.length; i++) { Map row = new LinkedHashMap();
row.put(columns[i], result[i]); for (int i = 0; i < result.length; i++) {
} row.put(columns[i], result[i]);
System.out.println(JSON.toJSONString(row)); }
// System.out.println(Arrays.toString(result)); System.out.println(JSON.toJSONString(row));
} // System.out.println(Arrays.toString(result));
}
for (int i = 0; i < columns.length; i++) {
Object[] values = new Object[results.size()]; for (int i = 0; i < columns.length; i++) {
for (int j = 0; j < results.size(); j++) { Object[] values = new Object[results.size()];
values[j] = results.get(j)[i]; for (int j = 0; j < results.size(); j++) {
} values[j] = results.get(j)[i];
System.out.println(columns[i] + ":" + Arrays.toString(values)); }
} System.out.println(columns[i] + ":" + Arrays.toString(values));
} }
}
@Test
public void testSqlQuery1() throws Exception { @Test
// Can't vectorize due to SUBSTRING expression. public void testSqlQuery11() throws Exception {
cannotVectorize(); // Can't vectorize due to SUBSTRING expression.
//cannotVectorize();
String sql = "select dim1 from druid.foo";
QueryTestBuilder builder = testBuilder().sql(sql);
builder.run(); String sql = "select HLLD(hll_dim1) hll_dim1 from (select hll_dim1 from druid.foo limit 5) t ";
QueryTestRunner.QueryResults queryResults = builder.results(); //sql = "select HLLD(hll_dim1) hll_dim1 from druid.foo t ";
List<Object[]> results = queryResults.results; QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();;
for (Object[] result : results) { builder.run();
System.out.println(Arrays.toString(result)); QueryTestRunner.QueryResults queryResults = builder.results();
} List<Object[]> results = queryResults.results;
} for (Object[] result : results) {
System.out.println(Arrays.toString(result));
@Test }
public void testSqlQuery2() throws Exception { }
//cannotVectorize();
//String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = '1'"; @Test
// Caused by: org.apache.calcite.sql.validate.SqlValidatorException: Aggregate expressions cannot be nested public void testSqlQuery12() throws Exception {
//String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)), APPROX_COUNT_DISTINCT_HLLD(HLLD(hll_dim1)), HLLD(hll_dim1) from druid.foo"; // Can't vectorize due to SUBSTRING expression.
String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)), APPROX_COUNT_DISTINCT_HLLD(hll_dim1), HLLD(hll_dim1) from (select HLLD(hll_dim1) hll_dim1 from druid.foo) t"; cannotVectorize();
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run(); String sql = "select * from (select * from druid.foo limit 6) t where __time >= '1970-12-15 07:00:28' and __time < '2023-12-15 08:10:28' ";
QueryTestRunner.QueryResults queryResults = builder.results(); QueryTestBuilder builder = testBuilder().sql(sql);
List<Object[]> results = queryResults.results; builder.run();
for (Object[] result : results) { QueryTestRunner.QueryResults queryResults = builder.results();
System.out.println(Arrays.toString(result)); List<Object[]> results = queryResults.results;
} for (Object[] result : results) {
} System.out.println(Arrays.toString(result));
}
@Test }
public void testSqlQuery3() throws Exception {
//cannotVectorize(); @Test
//String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''"; public void testSqlQuery1() throws Exception {
String sql = "select APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select HLLD(hll_dim1) hll from druid.foo where dim1 = '1') t "; // Can't vectorize due to SUBSTRING expression.
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); cannotVectorize();
builder.run();
QueryTestRunner.QueryResults queryResults = builder.results(); String sql = "select dim1 from druid.foo";
List<Object[]> results = queryResults.results; QueryTestBuilder builder = testBuilder().sql(sql);
for (Object[] result : results) { builder.run();
System.out.println(Arrays.toString(result)); QueryTestRunner.QueryResults queryResults = builder.results();
} List<Object[]> results = queryResults.results;
} for (Object[] result : results) {
System.out.println(Arrays.toString(result));
@Test }
public void testSqlQuery4() throws Exception { }
//cannotVectorize();
//String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''"; @Test
String sql = "select APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select HLLD(hll_dim1) hll from druid.foo where dim1 = '1') t "; public void testSqlQuery2() throws Exception {
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); //cannotVectorize();
builder.run(); //String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = '1'";
QueryTestRunner.QueryResults queryResults = builder.results(); // Caused by: org.apache.calcite.sql.validate.SqlValidatorException: Aggregate expressions cannot be nested
List<Object[]> results = queryResults.results; //String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)), APPROX_COUNT_DISTINCT_HLLD(HLLD(hll_dim1)), HLLD(hll_dim1) from druid.foo";
for (Object[] result : results) { String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)), APPROX_COUNT_DISTINCT_HLLD(hll_dim1), HLLD(hll_dim1) from (select HLLD(hll_dim1) hll_dim1 from druid.foo) t";
System.out.println(Arrays.toString(result)); QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
} builder.run();
} QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
@Test for (Object[] result : results) {
public void testSqlQuery5() throws Exception { System.out.println(Arrays.toString(result));
//cannotVectorize(); }
//String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''"; }
String sql = "select dim1,APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select dim1,HLLD(hll_dim1) hll from druid.foo where dim1 = '1' group by dim1) t group by dim1";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); @Test
builder.run(); public void testSqlQuery3() throws Exception {
QueryTestRunner.QueryResults queryResults = builder.results(); //cannotVectorize();
List<Object[]> results = queryResults.results; //String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''";
for (Object[] result : results) { String sql = "select APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select HLLD(hll_dim1) hll from druid.foo where dim1 = '1') t ";
System.out.println(Arrays.toString(result)); QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
} builder.run();
} QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
@Test for (Object[] result : results) {
public void testSqlQuery6() throws Exception { System.out.println(Arrays.toString(result));
//cannotVectorize(); }
//String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''"; }
String sql = "select dim1,APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select dim1,HLLD(dim1) hll from druid.foo where dim1 = '1' group by dim1 limit 10) t group by dim1";
//String sql = "select dim1,HLLD_ESTIMATE(HLLD(hll), false) from (select dim1,HLLD(dim1) hll from druid.foo where dim1 = '1' group by dim1 limit 10) t group by dim1"; @Test
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); public void testSqlQuery4() throws Exception {
builder.run(); //cannotVectorize();
QueryTestRunner.QueryResults queryResults = builder.results(); //String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''";
List<Object[]> results = queryResults.results; String sql = "select APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select HLLD(hll_dim1) hll from druid.foo where dim1 = '1') t ";
for (Object[] result : results) { QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
System.out.println(Arrays.toString(result)); builder.run();
} QueryTestRunner.QueryResults queryResults = builder.results();
} List<Object[]> results = queryResults.results;
for (Object[] result : results) {
@Test System.out.println(Arrays.toString(result));
public void testSqlQuery62() throws Exception { }
//cannotVectorize(); }
//String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''";
String sql = "select dim1,APPROX_COUNT_DISTINCT_HLLD(hll) from (select dim1,HLLD(dim1) hll from druid.foo where dim1 = '1' group by dim1 limit 10) t group by dim1"; @Test
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); public void testSqlQuery5() throws Exception {
builder.run(); //cannotVectorize();
QueryTestRunner.QueryResults queryResults = builder.results(); //String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''";
List<Object[]> results = queryResults.results; String sql = "select dim1,APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select dim1,HLLD(hll_dim1) hll from druid.foo where dim1 = '1' group by dim1) t group by dim1";
for (Object[] result : results) { QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
System.out.println(Arrays.toString(result)); builder.run();
} QueryTestRunner.QueryResults queryResults = builder.results();
} List<Object[]> results = queryResults.results;
for (Object[] result : results) {
@Test System.out.println(Arrays.toString(result));
public void testSqlQuery7() throws Exception { }
//cannotVectorize(); }
//String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''";
String sql = "select dim1,APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select dim1,HLLD(dim1) hll from druid.foo where dim1 = '1' group by dim1) t group by dim1 limit 10"; @Test
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); public void testSqlQuery6() throws Exception {
builder.run(); //cannotVectorize();
QueryTestRunner.QueryResults queryResults = builder.results(); //String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''";
List<Object[]> results = queryResults.results; String sql = "select dim1,APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select dim1,HLLD(dim1) hll from druid.foo where dim1 = '1' group by dim1 limit 10) t group by dim1";
for (Object[] result : results) { //String sql = "select dim1,HLLD_ESTIMATE(HLLD(hll), false) from (select dim1,HLLD(dim1) hll from druid.foo where dim1 = '1' group by dim1 limit 10) t group by dim1";
System.out.println(Arrays.toString(result)); QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
} builder.run();
} QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
@Test for (Object[] result : results) {
public void testAgg() throws Exception { System.out.println(Arrays.toString(result));
final String sql = "SELECT\n" }
+ " SUM(cnt),\n" }
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n"
+ "FROM druid.foo"; @Test
public void testSqlQuery62() throws Exception {
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); //cannotVectorize();
builder.run(); //String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''";
QueryTestRunner.QueryResults queryResults = builder.results(); String sql = "select dim1,APPROX_COUNT_DISTINCT_HLLD(hll) from (select dim1,HLLD(dim1) hll from druid.foo where dim1 = '1' group by dim1 limit 10) t group by dim1";
List<Object[]> results = queryResults.results; QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
for (Object[] result : results) { builder.run();
System.out.println(Arrays.toString(result)); QueryTestRunner.QueryResults queryResults = builder.results();
} List<Object[]> results = queryResults.results;
for (Object[] result : results) {
} System.out.println(Arrays.toString(result));
}
@Test }
public void testDistinct() throws Exception {
final String sql = "SELECT\n" @Test
+ " SUM(cnt),\n" public void testSqlQuery7() throws Exception {
+ " APPROX_COUNT_DISTINCT_HLLD(dim2),\n" // uppercase //cannotVectorize();
+ " APPROX_COUNT_DISTINCT_HLLD(dim2) FILTER(WHERE dim2 <> ''),\n" // lowercase; also, filtered //String sql = "select HLLD_ESTIMATE(HLLD(hll_dim1)) from druid.foo where dim1 = ''";
+ " APPROX_COUNT_DISTINCT_HLLD(SUBSTRING(dim2, 1, 1)),\n" // on extractionFn String sql = "select dim1,APPROX_COUNT_DISTINCT_HLLD(hll, 12) from (select dim1,HLLD(dim1) hll from druid.foo where dim1 = '1' group by dim1) t group by dim1 limit 10";
+ " APPROX_COUNT_DISTINCT_HLLD(SUBSTRING(dim2, 1, 1) || 'x'),\n" // on expression QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 16),\n" // on native HllSketch column builder.run();
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n" // on native HllSketch column QueryTestRunner.QueryResults queryResults = builder.results();
+ "FROM druid.foo"; List<Object[]> results = queryResults.results;
for (Object[] result : results) {
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); System.out.println(Arrays.toString(result));
builder.run(); }
QueryTestRunner.QueryResults queryResults = builder.results(); }
List<Object[]> results = queryResults.results;
for (Object[] result : results) { @Test
System.out.println(Arrays.toString(result)); public void testAgg() throws Exception {
} final String sql = "SELECT\n"
} + " SUM(cnt),\n"
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n"
@Test + "FROM druid.foo";
public void testDistinct2() throws Exception {
final String sql = "SELECT\n" QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
+ " SUM(cnt),\n" builder.run();
+ " APPROX_COUNT_DISTINCT_HLLD(dim2),\n" QueryTestRunner.QueryResults queryResults = builder.results();
+ " HLLD(dim2),\n" List<Object[]> results = queryResults.results;
+ " HLLD(hll_dim1),\n" for (Object[] result : results) {
+ " HLLD_ESTIMATE(HLLD(dim2)),\n" System.out.println(Arrays.toString(result));
+ " HLLD_ESTIMATE(HLLD(dim2), true),\n" }
+ " HLLD_ESTIMATE(HLLD(dim1), true),\n"
+ " HLLD_ESTIMATE(HLLD(hll_dim1)),\n" // on native HllSketch column }
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n" // on native HllSketch column
+ "FROM druid.foo"; @Test
public void testDistinct() throws Exception {
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); final String sql = "SELECT\n"
builder.run(); + " SUM(cnt),\n"
QueryTestRunner.QueryResults queryResults = builder.results(); + " APPROX_COUNT_DISTINCT_HLLD(dim2),\n" // uppercase
List<Object[]> results = queryResults.results; + " APPROX_COUNT_DISTINCT_HLLD(dim2) FILTER(WHERE dim2 <> ''),\n" // lowercase; also, filtered
for (Object[] result : results) { + " APPROX_COUNT_DISTINCT_HLLD(SUBSTRING(dim2, 1, 1)),\n" // on extractionFn
System.out.println(Arrays.toString(result)); + " APPROX_COUNT_DISTINCT_HLLD(SUBSTRING(dim2, 1, 1) || 'x'),\n" // on expression
} + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 16),\n" // on native HllSketch column
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n" // on native HllSketch column
} + "FROM druid.foo";
@Test QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
public void testDistinctDebug2() throws Exception { builder.run();
final String sql = "SELECT\n" QueryTestRunner.QueryResults queryResults = builder.results();
+ " dim1, dim2\n" List<Object[]> results = queryResults.results;
+ "FROM druid.foo"; for (Object[] result : results) {
System.out.println(Arrays.toString(result));
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); }
builder.run(); }
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results; @Test
for (Object[] result : results) { public void testDistinct2() throws Exception {
System.out.println(Arrays.toString(result)); final String sql = "SELECT\n"
} + " SUM(cnt),\n"
+ " APPROX_COUNT_DISTINCT_HLLD(dim2),\n"
} + " HLLD(dim2),\n"
+ " HLLD(hll_dim1),\n"
@Test + " HLLD_ESTIMATE(HLLD(dim2)),\n"
public void testDistinctDebug() throws Exception { + " HLLD_ESTIMATE(HLLD(dim2), true),\n"
final String sql = "SELECT\n" + " HLLD_ESTIMATE(HLLD(dim1), true),\n"
+ " SUM(cnt),\n" + " HLLD_ESTIMATE(HLLD(hll_dim1)),\n" // on native HllSketch column
+ " APPROX_COUNT_DISTINCT_HLLD(dim2)\n" + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1)\n" // on native HllSketch column
+ "FROM druid.foo"; + "FROM druid.foo";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run(); builder.run();
QueryTestRunner.QueryResults queryResults = builder.results(); QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results; List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }
} }
@Test @Test
public void testDeser() throws Exception { public void testDistinctDebug2() throws Exception {
final String sql = "SELECT\n" final String sql = "SELECT\n"
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1) cnt\n" + " dim1, dim2\n"
+ "FROM druid.foo"; + "FROM druid.foo";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run(); builder.run();
QueryTestRunner.QueryResults queryResults = builder.results(); QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results; List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }
} }
@Test
@Test public void testDistinctDebug() throws Exception {
public void testGroupBy() throws Exception { final String sql = "SELECT\n"
final String sql = "SELECT cnt,\n" + " SUM(cnt),\n"
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt2\n" + " APPROX_COUNT_DISTINCT_HLLD(dim2)\n"
+ "FROM druid.foo group by cnt"; + "FROM druid.foo";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run(); builder.run();
QueryTestRunner.QueryResults queryResults = builder.results(); QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results; List<Object[]> results = queryResults.results;
for (Object[] result : results) { for (Object[] result : results) {
System.out.println(Arrays.toString(result)); System.out.println(Arrays.toString(result));
} }
}
}
@Test
public void testGroupBy1() throws Exception { @Test
final String sql = "SELECT __time,\n" public void testDeser() throws Exception {
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt\n" final String sql = "SELECT\n"
+ "FROM druid.foo group by __time"; + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1) cnt\n"
+ "FROM druid.foo";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run(); QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
QueryTestRunner.QueryResults queryResults = builder.results(); builder.run();
List<Object[]> results = queryResults.results; QueryTestRunner.QueryResults queryResults = builder.results();
for (Object[] result : results) { List<Object[]> results = queryResults.results;
System.out.println(Arrays.toString(result)); for (Object[] result : results) {
} System.out.println(Arrays.toString(result));
}
}
}
@Test
public void testGroupBy2() throws Exception {
final String sql = "SELECT __time,\n" @Test
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt\n" public void testGroupBy() throws Exception {
+ "FROM druid.foo group by __time order by cnt desc"; final String sql = "SELECT cnt,\n"
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize(); + " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt2\n"
builder.run(); + "FROM druid.foo group by cnt";
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results; QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
for (Object[] result : results) { builder.run();
System.out.println(Arrays.toString(result)); QueryTestRunner.QueryResults queryResults = builder.results();
} List<Object[]> results = queryResults.results;
for (Object[] result : results) {
} System.out.println(Arrays.toString(result));
} }
}
@Test
public void testGroupBy1() throws Exception {
final String sql = "SELECT __time,\n"
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt\n"
+ "FROM druid.foo group by __time";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) {
System.out.println(Arrays.toString(result));
}
}
@Test
public void testGroupBy2() throws Exception {
final String sql = "SELECT __time,\n"
+ " APPROX_COUNT_DISTINCT_HLLD(hll_dim1, 14) cnt\n"
+ "FROM druid.foo group by __time order by cnt desc";
QueryTestBuilder builder = testBuilder().sql(sql).skipVectorize();
builder.run();
QueryTestRunner.QueryResults queryResults = builder.results();
List<Object[]> results = queryResults.results;
for (Object[] result : results) {
System.out.println(Arrays.toString(result));
}
}
}