-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Add IPv4 druid expressions #8197
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.druid.query.expression; | ||
|
|
||
| import com.google.common.net.InetAddresses; | ||
|
|
||
| import javax.annotation.Nullable; | ||
| import java.net.Inet4Address; | ||
| import java.net.InetAddress; | ||
| import java.util.regex.Pattern; | ||
|
|
||
| class IPv4AddressExprUtils | ||
| { | ||
| private static final Pattern IPV4_PATTERN = Pattern.compile( | ||
| "^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$" | ||
| ); | ||
|
|
||
| /** | ||
| * @return True if argument cannot be represented by an unsigned integer (4 bytes), else false | ||
| */ | ||
| static boolean overflowsUnsignedInt(long value) | ||
| { | ||
| return value < 0L || 0xff_ff_ff_ffL < value; | ||
| } | ||
|
|
||
| /** | ||
| * @return True if argument is a valid IPv4 address dotted-decimal string | ||
| */ | ||
| static boolean isValidAddress(@Nullable String string) | ||
| { | ||
| return string != null && IPV4_PATTERN.matcher(string).matches(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This'll accept some invalid IP-like strings, such as Or alternatively, making the check exact and keeping the name the same.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah! I'll change the regex to be tighter and add more test cases.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! |
||
| } | ||
|
|
||
| @Nullable | ||
| static Inet4Address parse(@Nullable String string) | ||
| { | ||
| // Explicitly check for valid address to avoid overhead of InetAddresses#forString() potentially | ||
| // throwing IllegalArgumentException | ||
| if (isValidAddress(string)) { | ||
| // Do not use java.lang.InetAddress#getByName() as it may do DNS lookups | ||
| InetAddress address = InetAddresses.forString(string); | ||
| if (address instanceof Inet4Address) { | ||
| return (Inet4Address) address; | ||
| } | ||
| } | ||
| return null; | ||
| } | ||
|
|
||
| static Inet4Address parse(int value) | ||
| { | ||
| return InetAddresses.fromInteger(value); | ||
| } | ||
|
|
||
| /** | ||
| * @return IPv4 address dotted-decimal notated string | ||
| */ | ||
| static String toString(Inet4Address address) | ||
| { | ||
| return address.getHostAddress(); | ||
| } | ||
|
|
||
| static long toLong(Inet4Address address) | ||
| { | ||
| int value = InetAddresses.coerceToInteger(address); | ||
| return Integer.toUnsignedLong(value); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,142 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.druid.query.expression; | ||
|
|
||
| import org.apache.commons.net.util.SubnetUtils; | ||
| import org.apache.druid.java.util.common.IAE; | ||
| import org.apache.druid.math.expr.Expr; | ||
| import org.apache.druid.math.expr.ExprEval; | ||
| import org.apache.druid.math.expr.ExprMacroTable; | ||
| import org.apache.druid.math.expr.ExprType; | ||
|
|
||
| import javax.annotation.Nonnull; | ||
| import java.util.List; | ||
|
|
||
| /** | ||
| * <pre> | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No reason to include these tags, since we generally write javadocs with reading in source form in mind. (Nobody is looking at the generated docs AFAIK)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The tags also preserve the formatting when invoking "Quick Documentation" in IntelliJ.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, well, good point, in that case feel free to leave them. |
||
| * Implements an expression that checks if an IPv4 address belongs to a particular subnet. | ||
| * | ||
| * Expression signatures: | ||
| * - long ipv4_match(string address, string subnet) | ||
| * - long ipv4_match(long address, string subnet) | ||
| * | ||
| * Valid "address" argument formats are: | ||
| * - unsigned int long (e.g., 3232235521) | ||
| * - IPv4 address dotted-decimal string (e.g., "198.168.0.1") | ||
| * | ||
| * The argument format for the "subnet" argument should be a literal in CIDR notation | ||
| * (e.g., "198.168.0.0/16"). | ||
| * | ||
| * If the "address" argument does not represent an IPv4 address then false is returned. | ||
| * </pre> | ||
| * | ||
| * @see IPv4AddressParseExprMacro | ||
| * @see IPv4AddressStringifyExprMacro | ||
| */ | ||
| public class IPv4AddressMatchExprMacro implements ExprMacroTable.ExprMacro | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason these functions defined via
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I made
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There is probably a sort of janky way that stuff could be pre-computed in Side note, please don't mark comments as resolved, leave that for the reviewer who made the comment to do.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Building I've undone all the comment threads that I marked as resolved (was using that to keep track of the ones I've addressed in my branch).
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The purpose of
It's probably an accident that Function and ExprMacro are in different packages. I added ExprMacro in #4365 and I don't recall having a good reason for having them be in different places. Fwiw, I don't have a strong separation in my mind between the responsibility of processing and core, anyway, largely agreeing with this comment: #4312 (comment)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation, 👍 I had stored away in my head for whatever reason that the main purpose of |
||
| { | ||
| public static final String NAME = "ipv4_match"; | ||
| private static final int ARG_SUBNET = 1; | ||
|
|
||
| @Override | ||
| public String name() | ||
| { | ||
| return NAME; | ||
| } | ||
|
|
||
| @Override | ||
| public Expr apply(final List<Expr> args) | ||
| { | ||
| if (args.size() != 2) { | ||
| throw new IAE(ExprUtils.createErrMsg(name(), "must have 2 arguments")); | ||
| } | ||
|
|
||
| SubnetUtils.SubnetInfo subnetInfo = getSubnetInfo(args); | ||
| Expr arg = args.get(0); | ||
|
|
||
| class IPv4AddressMatchExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr | ||
| { | ||
| private final SubnetUtils.SubnetInfo subnetInfo; | ||
|
|
||
| private IPv4AddressMatchExpr(Expr arg, SubnetUtils.SubnetInfo subnetInfo) | ||
| { | ||
| super(arg); | ||
| this.subnetInfo = subnetInfo; | ||
| } | ||
|
|
||
| @Nonnull | ||
| @Override | ||
| public ExprEval eval(final ObjectBinding bindings) | ||
| { | ||
| ExprEval eval = arg.eval(bindings); | ||
| boolean match; | ||
| switch (eval.type()) { | ||
| case STRING: | ||
| match = isStringMatch(eval.asString()); | ||
| break; | ||
| case LONG: | ||
| match = !eval.isNumericNull() && isLongMatch(eval.asLong()); | ||
| break; | ||
| default: | ||
| match = false; | ||
| } | ||
| return ExprEval.of(match, ExprType.LONG); | ||
| } | ||
|
|
||
| private boolean isStringMatch(String stringValue) | ||
| { | ||
| return IPv4AddressExprUtils.isValidAddress(stringValue) && subnetInfo.isInRange(stringValue); | ||
| } | ||
|
|
||
| private boolean isLongMatch(long longValue) | ||
| { | ||
| return !IPv4AddressExprUtils.overflowsUnsignedInt(longValue) && subnetInfo.isInRange((int) longValue); | ||
| } | ||
|
|
||
| @Override | ||
| public Expr visit(Shuttle shuttle) | ||
| { | ||
| Expr newArg = arg.visit(shuttle); | ||
| return shuttle.visit(new IPv4AddressMatchExpr(newArg, subnetInfo)); | ||
| } | ||
| } | ||
|
|
||
| return new IPv4AddressMatchExpr(arg, subnetInfo); | ||
| } | ||
|
|
||
| private SubnetUtils.SubnetInfo getSubnetInfo(List<Expr> args) | ||
| { | ||
| String subnetArgName = "subnet"; | ||
| Expr arg = args.get(ARG_SUBNET); | ||
| ExprUtils.checkLiteralArgument(name(), arg, subnetArgName); | ||
| String subnet = (String) arg.getLiteralValue(); | ||
|
|
||
| SubnetUtils subnetUtils; | ||
| try { | ||
| subnetUtils = new SubnetUtils(subnet); | ||
| } | ||
| catch (IllegalArgumentException e) { | ||
| throw new IAE(e, ExprUtils.createErrMsg(name(), subnetArgName + " arg has an invalid format: " + subnet)); | ||
| } | ||
| subnetUtils.setInclusiveHostCount(true); // make network and broadcast addresses match | ||
|
|
||
| return subnetUtils.getInfo(); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any reason not to use shorter names like
ipv4_match, etc?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since it's more convenient to type and it's reasonable that users will understand "ipv4" to mean an "IP address", I'm ok with using the shorter names you suggested.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I was thinking about balancing between clarity and keystrokes. I think the current names are great, thanks.