TSG-22013 添加dimension_bucket函数,计算维度bucket

This commit is contained in:
lifengchao
2024-08-09 11:30:47 +08:00
parent 9a6c44112e
commit 8c546e20d7
8 changed files with 537 additions and 0 deletions

143
druid-udf/pom.xml Normal file
View File

@@ -0,0 +1,143 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>druid-udf_26.0.0</artifactId>
<name>druid-udf</name>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<druid.version>26.0.0</druid.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-server</artifactId>
<version>${druid.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-sql</artifactId>
<version>${druid.version}</version>
<scope>provided</scope>
</dependency>
<!-- Tests -->
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<version>4.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-processing</artifactId>
<version>${druid.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-server</artifactId>
<version>${druid.version}</version>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-sql</artifactId>
<version>${druid.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2</artifactId>
<version>2.0.34</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<compilerArgument>-Xlint:unchecked</compilerArgument>
<source>11</source>
<target>11</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version>
<configuration>
<argLine>-Duser.timezone=UTC</argLine>
<redirectTestOutputToFile>true</redirectTestOutputToFile>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.5.5</version>
<executions>
<execution>
<id>distro-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<finalName>${project.artifactId}-${project.version}</finalName>
<tarLongFileMode>posix</tarLongFileMode>
<descriptors>
<descriptor>src/assembly/assembly.xml</descriptor>
</descriptors>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-release-plugin</artifactId>
<version>2.5.3</version>
<dependencies>
<dependency>
<groupId>org.apache.maven.scm</groupId>
<artifactId>maven-scm-provider-gitexe</artifactId>
<version>1.9.4</version>
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
<configuration>
<archive>
<addMavenDescriptor>false</addMavenDescriptor>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,54 @@
<?xml version="1.0"?>
<!--
~ Copyright 2016 Imply Data, Inc.
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
<id>bin</id>
<formats>
<format>tar.gz</format>
</formats>
<baseDirectory>${project.name}</baseDirectory>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<useTransitiveDependencies>true</useTransitiveDependencies>
<outputDirectory>.</outputDirectory>
<unpack>false</unpack>
</dependencySet>
</dependencySets>
<fileSets>
<fileSet>
<directory>.</directory>
<outputDirectory/>
<includes>
<include>README.md</include>
<include>LICENSE</include>
</includes>
</fileSet>
<fileSet>
<directory>${project.build.directory}</directory>
<outputDirectory>.</outputDirectory>
<includes>
<include>*.jar</include>
</includes>
</fileSet>
</fileSets>
</assembly>

View File

@@ -0,0 +1,23 @@
package org.apache.druid.query.udf;
import com.google.inject.Binder;
import org.apache.druid.guice.ExpressionModule;
import org.apache.druid.initialization.DruidModule;
import org.apache.druid.query.udf.expressions.DimensionBucketExprMacro;
import org.apache.druid.query.udf.sql.DimensionBucketOperatorConversion;
import org.apache.druid.sql.guice.SqlBindings;
public class UdfModule implements DruidModule {
@Override
public void configure(Binder binder) {
SqlBindings.addOperatorConversion(binder, DimensionBucketOperatorConversion.class);
ExpressionModule.addExprMacro(binder, DimensionBucketExprMacro.class);
}
/*@Override
public List<? extends Module> getJacksonModules() {
// Register Jackson module for any classes we need to be able to use in JSON queries or ingestion specs.
return Collections.<Module>singletonList(new SimpleModule("UdfModule"));
}*/
}

View File

@@ -0,0 +1,82 @@
package org.apache.druid.query.udf.expressions;
import org.apache.druid.math.expr.*;
import org.apache.druid.math.expr.ExprMacroTable.ExprMacro;
import javax.annotation.Nullable;
import java.util.List;
import java.util.stream.Collectors;
public class DimensionBucketExprMacro implements ExprMacro {
private static final String NAME = "dimension_bucket";
@Override
public String name() {
return NAME;
}
@Override
public Expr apply(List<Expr> args) {
validationHelperCheckMinArgumentCount(args, 2);
Expr bucketCnt = args.get(0);
if(!bucketCnt.isLiteral()|| bucketCnt.eval(InputBindings.nilBindings()).asInt() <= 0) {
throw validationFailed("first bucketCount argument must is int literal and > 0");
}
return new DimensionBucketExpr(args);
}
static class DimensionBucketExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr {
private final int bucketCount;
public DimensionBucketExpr(List<Expr> args) {
super(NAME, args);
bucketCount = args.get(0).eval(InputBindings.nilBindings()).asInt();
}
@Override
public ExprEval eval(ObjectBinding bindings) {
int result = 1;
for (int i = 1; i < args.size(); i++) {
ExprEval eval = args.get(i).eval(bindings);
Object element = eval.value();
if(element instanceof Object[]){
for (Object ele : (Object[]) element) {
result = 31 * result + (ele == null ? 0 : ele.hashCode());
}
}else{
result = 31 * result + (element == null ? 0 : element.hashCode());
}
/*else if (element instanceof Number) {
//result = 31 * result + Integer.hashCode(((Number)element).intValue());
result = 31 * result + Long.hashCode(((Number)element).longValue());
}*/
}
int bucket = Math.abs(result) % bucketCount;
return ExprEval.of(IntToHexUtil.uInt16ToHexStringFast(bucket));
}
@Override
public Expr visit(Shuttle shuttle) {
List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList());
return shuttle.visit(new DimensionBucketExpr(newArgs));
}
@Override
public BindingAnalysis analyzeInputs() {
return super.analyzeInputs();
}
@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector) {
return ExpressionType.STRING;
}
@Override
public boolean canVectorize(InputBindingInspector inspector) {
return false;
}
}
}

View File

@@ -0,0 +1,45 @@
package org.apache.druid.query.udf.expressions;
import java.nio.charset.StandardCharsets;
public class IntToHexUtil {
static final byte[] digits = {
'0' , '1' , '2' , '3' , '4' , '5' ,
'6' , '7' , '8' , '9' , 'a' , 'b' ,
'c' , 'd' , 'e' , 'f' , 'g' , 'h' ,
'i' , 'j' , 'k' , 'l' , 'm' , 'n' ,
'o' , 'p' , 'q' , 'r' , 's' , 't' ,
'u' , 'v' , 'w' , 'x' , 'y' , 'z'
};
static final String[] uInt16HexsCache;
static final int uInt16HexsCacheSize = 8192;
static{
uInt16HexsCache = new String[uInt16HexsCacheSize];
for (int i = 0; i < uInt16HexsCacheSize; i++) {
uInt16HexsCache[i] = uInt16ToHexString(i);
}
}
public static String uInt16ToHexStringFast(int i){
if(i < uInt16HexsCacheSize){
return uInt16HexsCache[i];
}else{
return uInt16ToHexString(i);
}
}
private static String uInt16ToHexString(int i){
byte[] bytes = new byte[4];
int mask = 15; // 16 - 1
int value = i;
bytes[3] = digits[value & mask];
value >>>= 4;
bytes[2] = digits[value & mask];
value >>>= 4;
bytes[1] = digits[value & mask];
value >>>= 4;
bytes[0] = digits[value & mask];
return new String(bytes, StandardCharsets.US_ASCII);
}
}

View File

@@ -0,0 +1,43 @@
package org.apache.druid.query.udf.sql;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlOperandCountRanges;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import javax.annotation.Nullable;
public class DimensionBucketOperatorConversion implements SqlOperatorConversion {
private static final SqlFunction SQL_FUNCTION = new SqlFunction(
"DIMENSION_BUCKET",
SqlKind.OTHER_FUNCTION,
ReturnTypes.explicit(
factory -> Calcites.createSqlTypeWithNullability(factory, SqlTypeName.VARCHAR, true)
),
null,
OperandTypes.variadic(SqlOperandCountRanges.from(2)),
SqlFunctionCategory.USER_DEFINED_FUNCTION
);
@Override
public SqlOperator calciteOperator() {
return SQL_FUNCTION;
}
@Nullable
@Override
public DruidExpression toDruidExpression(PlannerContext plannerContext, RowSignature rowSignature, RexNode rexNode) {
return OperatorConversions.convertDirectCall(plannerContext, rowSignature, rexNode, "dimension_bucket");
}
}

View File

@@ -0,0 +1 @@
org.apache.druid.query.udf.UdfModule

View File

@@ -0,0 +1,146 @@
package org.apache.druid.query.udf.expressions;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.math.expr.*;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Test;
import java.util.Collections;
public class DimensionBucketExprTest extends InitializedNullHandlingTest {
private final ExprMacroTable exprMacroTable = new ExprMacroTable(Collections.singletonList(new DimensionBucketExprMacro()));
Expr.ObjectBinding inputBindings = InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier>()
.put("string", InputBindings.inputSupplier(ExpressionType.STRING, () -> "abcdef"))
.put("long", InputBindings.inputSupplier(ExpressionType.LONG, () -> 1234L))
.put("double", InputBindings.inputSupplier(ExpressionType.DOUBLE, () -> 1.234))
.put("array1", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> new Object[]{"1", "2", "3"}))
.put("array2", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> new String[]{"1", "2", "3"}))
.put("nullString", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("nullLong", InputBindings.inputSupplier(ExpressionType.LONG, () -> null))
.put("nullDouble", InputBindings.inputSupplier(ExpressionType.DOUBLE, () -> null))
.build()
);
Expr.ObjectBinding[] inputBindingArray = new Expr.ObjectBinding[]{
InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier>()
.put("device_id", InputBindings.inputSupplier(ExpressionType.STRING, () -> "1"))
.put("rule_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 81))
.put("template_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 81))
.put("chart_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 81))
.put("version", InputBindings.inputSupplier(ExpressionType.LONG, () -> 1))
.put("client_ip_object", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> null))
.put("server_ip_object", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> null))
.put("fqdn_category", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> null))
.put("client_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_fqdn", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_domain", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("application", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.build()
),
InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier>()
.put("device_id", InputBindings.inputSupplier(ExpressionType.STRING, () -> "1"))
.put("rule_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 101))
.put("template_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 101))
.put("chart_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 101))
.put("version", InputBindings.inputSupplier(ExpressionType.LONG, () -> 1))
.put("client_ip_object", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> new Object[]{"5","7","8"}))
.put("server_ip_object", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> null))
.put("fqdn_category", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> null))
.put("client_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_fqdn", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_domain", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("application", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.build()
),
InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier>()
.put("device_id", InputBindings.inputSupplier(ExpressionType.STRING, () -> "1"))
.put("rule_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 271L))
.put("template_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 271L))
.put("chart_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 271L))
.put("version", InputBindings.inputSupplier(ExpressionType.LONG, () -> 1L))
.put("client_ip_object", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> null))
.put("server_ip_object", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> null))
.put("fqdn_category", InputBindings.inputSupplier(ExpressionType.STRING_ARRAY, () -> null))
.put("client_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> "5.245.228.51"))
.put("server_fqdn", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_domain", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("application", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.build()
),
// ...
InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier>()
.put("device_id", InputBindings.inputSupplier(ExpressionType.STRING, () -> "1"))
.put("rule_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 81))
.put("template_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 81))
.put("chart_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 81))
.put("version", InputBindings.inputSupplier(ExpressionType.LONG, () -> 1))
.put("client_ip_object", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_ip_object", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("fqdn_category", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("client_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_fqdn", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_domain", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("application", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.build()
),
InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier>()
.put("device_id", InputBindings.inputSupplier(ExpressionType.STRING, () -> "1"))
.put("rule_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 101))
.put("template_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 101))
.put("chart_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 101))
.put("version", InputBindings.inputSupplier(ExpressionType.LONG, () -> 1))
.put("client_ip_object", InputBindings.inputSupplier(ExpressionType.STRING, () -> "5,7,8"))
.put("server_ip_object", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("fqdn_category", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("client_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_fqdn", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_domain", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("application", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.build()
),
InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier>()
.put("device_id", InputBindings.inputSupplier(ExpressionType.STRING, () -> "1"))
.put("rule_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 271L))
.put("template_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 271L))
.put("chart_id", InputBindings.inputSupplier(ExpressionType.LONG, () -> 271L))
.put("version", InputBindings.inputSupplier(ExpressionType.LONG, () -> 1L))
.put("client_ip_object", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_ip_object", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("fqdn_category", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("client_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_ip", InputBindings.inputSupplier(ExpressionType.STRING, () -> "5.245.228.51"))
.put("server_fqdn", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("server_domain", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.put("application", InputBindings.inputSupplier(ExpressionType.STRING, () -> null))
.build()
),
};
@Test
public void test() {
Expr expr = Parser.parse("dimension_bucket(1024, 100, 'aaa', string,long,double,array1, array2, nullString, nullLong)", exprMacroTable);
ExprEval eval = expr.eval(inputBindings);
System.out.println(eval.value());
}
@Test
public void test2() {
for (Expr.ObjectBinding objectBinding : inputBindingArray) {
Expr expr = Parser.parse("dimension_bucket(1024, device_id, rule_id, template_id, chart_id, version, client_ip_object, server_ip_object, fqdn_category, client_ip, server_ip, server_fqdn, server_domain, application)", exprMacroTable);
ExprEval eval = expr.eval(objectBinding);
System.out.println(objectBinding.get("rule_id") + ", bucket_id:" + eval.value());
}
}
}