From 3e32dcd5a88667f790e0657df429aa72f98edb2e Mon Sep 17 00:00:00 2001 From: Matt Calder Date: Mon, 12 Aug 2024 16:30:15 -0700 Subject: [PATCH] initial commit for ip_prefix_collapse function docs more tests Update presto-docs/src/main/sphinx/functions/ip.rst Co-authored-by: Steve Burnett addressing reviewer comments reviewer feedback 2 retrigger checks renaming function to ip_prefix_collapse --- presto-docs/src/main/sphinx/functions/ip.rst | 11 + .../operator/scalar/IpPrefixFunctions.java | 221 ++++++++++++++++++ .../scalar/TestIpPrefixFunctions.java | 98 ++++++++ 3 files changed, 330 insertions(+) diff --git a/presto-docs/src/main/sphinx/functions/ip.rst b/presto-docs/src/main/sphinx/functions/ip.rst index dcf5d4923b7bf..b8792483b0c00 100644 --- a/presto-docs/src/main/sphinx/functions/ip.rst +++ b/presto-docs/src/main/sphinx/functions/ip.rst @@ -49,3 +49,14 @@ IP Functions SELECT is_subnet_of(IPPREFIX '192.168.3.131/26', IPPREFIX '192.168.3.144/30'); -- true SELECT is_subnet_of(IPPREFIX '64:ff9b::17/64', IPPREFIX '64:ffff::17/64'); -- false SELECT is_subnet_of(IPPREFIX '192.168.3.131/26', IPPREFIX '192.168.3.131/26'); -- true + +.. function:: ip_prefix_collapse(array(ip_prefix)) -> array(ip_prefix) + + Returns the minimal CIDR representation of the input ``IPPREFIX`` array. + Every ``IPPREFIX`` in the input array must be the same IP version (that is, only IPv4 or only IPv6) + or the query will fail and raise an error. :: + + SELECT IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/24', IPPREFIX '192.168.1.0/24']); -- [{192.168.0.0/23}] + SELECT IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '2620:10d:c090::/48', IPPREFIX '2620:10d:c091::/48']); -- [{2620:10d:c090::/47}] + SELECT IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.1.0/24', IPPREFIX '192.168.0.0/24', IPPREFIX '192.168.2.0/24', IPPREFIX '192.168.9.0/24']); -- [{192.168.0.0/23}, {192.168.2.0/24}, {192.168.9.0/24}] + diff --git a/presto-main/src/main/java/com/facebook/presto/operator/scalar/IpPrefixFunctions.java b/presto-main/src/main/java/com/facebook/presto/operator/scalar/IpPrefixFunctions.java index e40bb3c6e0bd3..c0c808508f4a4 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/scalar/IpPrefixFunctions.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/scalar/IpPrefixFunctions.java @@ -24,9 +24,14 @@ import com.google.common.net.InetAddresses; import io.airlift.slice.Slice; +import java.math.BigInteger; import java.net.InetAddress; import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import static com.facebook.presto.operator.scalar.ArraySortFunction.sort; import static com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; import static com.facebook.presto.type.IpAddressOperators.between; @@ -44,6 +49,8 @@ public final class IpPrefixFunctions { + private static final BigInteger TWO = BigInteger.valueOf(2); + private IpPrefixFunctions() {} @Description("IP prefix for a given IP address and subnet size") @@ -137,6 +144,220 @@ public static boolean isPrefixSubnetOf(@SqlType(StandardTypes.IPPREFIX) Slice fi return between(ipSubnetMin(second), ipSubnetMin(first), ipSubnetMax(first)) && between(ipSubnetMax(second), ipSubnetMin(first), ipSubnetMax(first)); } + @Description("Combines the input set of IP prefixes into the fewest contiguous CIDR ranges possible.") + @ScalarFunction("ip_prefix_collapse") + @SqlType("array(IPPREFIX)") + public static Block collapseIpPrefixes(@SqlType("array(IPPREFIX)") Block unsortedIpPrefixArray) + { + int inputPrefixCount = unsortedIpPrefixArray.getPositionCount(); + + // If we get an empty array or an array non-null single element, just return the original array. + if (inputPrefixCount == 0 || (inputPrefixCount == 1 && !unsortedIpPrefixArray.isNull(0))) { + return unsortedIpPrefixArray; + } + + // Sort prefixes. lessThanFunction is never used. NULLs are placed at the end. + // Prefixes are ordered by first IP and then prefix length. + // Example: + // Input: 10.0.0.0/8, 9.255.255.0/24, 10.0.0.0/7, 10.1.0.0/24, 10.10.0.0/16 + // Output: 9.255.255.0/24, 10.0.0.0/7, 10.0.0.0/8, 10.1.0.0/24, 10.10.0.0/16 + Block ipPrefixArray = sort(null, IPPREFIX, unsortedIpPrefixArray); + + // throw if anything is null + if (ipPrefixArray.isNull(0) || ipPrefixArray.isNull(inputPrefixCount - 1)) { + throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "ip_prefix_collapse does not support null elements"); + } + + // check the first and last prefixes in the array to make sure their IP versions match. + Slice firstIpPrefix = IPPREFIX.getSlice(ipPrefixArray, 0); + boolean v4 = isIpv4(firstIpPrefix); + Slice lastIpPrefix = IPPREFIX.getSlice(ipPrefixArray, inputPrefixCount - 1); + if (isIpv4(lastIpPrefix) != v4) { + throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "All IPPREFIX elements must be the same IP version."); + } + + List> outputIpPrefixes = new ArrayList<>(); + int outputPrefixCount = 0; + int ipMaxBitLength = v4 ? 32 : 128; + + List mergedIpRanges = mergeIpRanges(ipPrefixArray); + for (BigInteger[] ipRange : mergedIpRanges) { + List ipPrefixes = generateMinIpPrefixes(ipRange[0], ipRange[1], ipMaxBitLength); + outputIpPrefixes.add(ipPrefixes); + outputPrefixCount += ipPrefixes.size(); + } + + BlockBuilder blockBuilder = IPPREFIX.createBlockBuilder(null, outputPrefixCount); + for (List ipPrefixSlices : outputIpPrefixes) { + for (Slice ipPrefix : ipPrefixSlices) { + IPPREFIX.writeSlice(blockBuilder, ipPrefix); + } + } + + return blockBuilder.build(); + } + + private static List generateMinIpPrefixes(BigInteger firstIpAddress, BigInteger lastIpAddress, int ipVersionMaxBits) + { + List ipPrefixSlices = new ArrayList<>(); + + // i.e., while firstIpAddress <= lastIpAddress + while (firstIpAddress.compareTo(lastIpAddress) <= 0) { + long rangeBits = findRangeBits(firstIpAddress, lastIpAddress); // find the number of bits for the next prefix in the range + int prefixLength = (int) (ipVersionMaxBits - rangeBits); + + try { + InetAddress asInetAddress = bigIntegerToIpAddress(firstIpAddress); // convert firstIpAddress from BigInt to Slice + Slice ipPrefixAsSlice = castFromVarcharToIpPrefix(utf8Slice(InetAddresses.toAddrString(asInetAddress) + "/" + prefixLength)); + ipPrefixSlices.add(ipPrefixAsSlice); + } + catch (UnknownHostException ex) { + throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unable to convert " + firstIpAddress + " to IP prefix", ex); + } + + BigInteger ipCount = TWO.pow(ipVersionMaxBits - prefixLength); + firstIpAddress = firstIpAddress.add(ipCount); // move to the next prefix in the range + } + + return ipPrefixSlices; + } + + private static long findRangeBits(BigInteger firstIpAddress, BigInteger lastIpAddress) + { + // The number of IP addresses in the range + BigInteger ipCount = lastIpAddress.subtract(firstIpAddress).add(BigInteger.ONE); + + // We have two possibilities for determining the right prefix boundary + + // Case 1. Find the largest possible prefix that firstIpAddress can be. + // Say we have an input range of 192.168.0.0 to 192.184.0.0. + // The number of IP addresses in the range is 1048576 = 2^20, so we would need a /12 (32-20). + // to cover that many IP addresses but the largest valid prefix that can start from 192.168.0.0 is /13. + int firstAddressMaxBits = firstIpAddress.getLowestSetBit(); + + // Case 2. Find the largest prefix length to cover N IP addresses. + // The number of IP addresses within a valid prefix must be a power of 2 but the IP count + // in our IP ranges may not be a power of 2. If it isn't exactly a power of 2, we find the + // highest power of 2 that the doesn't overrun the ipCount. + + // If ipCount's bitLength is greater than the number of IP addresses (i.e., not a power of 2), then use 1 bit less. + int ipRangeMaxBits = (TWO.pow(ipCount.bitLength()).compareTo(ipCount) > 0) ? ipCount.bitLength() - 1 : ipCount.bitLength(); + + return min(firstAddressMaxBits, ipRangeMaxBits); + } + + private static List mergeIpRanges(Block ipPrefixArray) + { + List mergedRanges = new ArrayList<>(); + + Slice startingIpPrefix = IPPREFIX.getSlice(ipPrefixArray, 0); + BigInteger firstIpAddress = toBigInteger(ipSubnetMin(startingIpPrefix)); + BigInteger lastIpAddress = toBigInteger(ipSubnetMax(startingIpPrefix)); + + /* + There are four cases to cover for two IP ranges where range1.startIp <= range2.startIp + + 1. Could be equal/duplicates. + [-------] + [-------] + In this case, we just ignore the second one. + + 2. Second could be subnet/contained within first. + [-------] OR [-------] OR [-------] + [---] [----] [----] + In this case we ignore the second one. + + 3. Second could be adjacent/contiguous with the first. + [-------] + [-------] + In this case we extend the range to include the last IP address of the second one. + + 4. Second can be disjoint from the first. + [-------] + [-------] + In this case the first range is finalized, and the second range becomes the current one. + */ + + for (int i = 1; i < ipPrefixArray.getPositionCount(); i++) { + Slice ipPrefix = IPPREFIX.getSlice(ipPrefixArray, i); + BigInteger nextFirstIpAddress = toBigInteger(ipSubnetMin(ipPrefix)); + BigInteger nextLastIpAddress = toBigInteger(ipSubnetMax(ipPrefix)); + + // If nextFirstIpAddress <= lastIpAddress then there is overlap. + // However, based on the properties of the input sorted array, this will + // always mean that the next* range is a subnet of [firstIpAddress, lastIpAddress]. + // We just ignore these prefixes since they are already covered (case 1 and case 2). + if (lastIpAddress.compareTo(nextFirstIpAddress) < 0) { // i.e. nextFirstIpAddress > lastIpAddress -- the next range does not overlap the first + // If they are not contiguous (case 4), finalize the range. + // Otherwise, extend the current range (case 3). + if (lastIpAddress.add(BigInteger.ONE).compareTo(nextFirstIpAddress) != 0) { + BigInteger[] finalizedRange = {firstIpAddress, lastIpAddress}; + mergedRanges.add(finalizedRange); + firstIpAddress = nextFirstIpAddress; + } + lastIpAddress = nextLastIpAddress; + } + } + + // Add the last range + BigInteger[] finalizedRange = {firstIpAddress, lastIpAddress}; + mergedRanges.add(finalizedRange); + + return mergedRanges; + } + + private static byte[] bigIntegerToIPAddressBytes(BigInteger ipAddress) + { + byte[] ipAddressBytes = ipAddress.toByteArray(); + + // Covers IPv4 (4 bytes) and IPv6 (16 bytes) plus an additional 0-value byte for sign + if ((ipAddressBytes.length == 5 || ipAddressBytes.length == 17) && ipAddressBytes[0] == 0) { + ipAddressBytes = Arrays.copyOfRange(ipAddressBytes, 1, ipAddressBytes.length); // remove leading 0 + } + // Covers IPv4 and IPv6 cases when BigInteger needs less than 4 or 16 bytes to represent + // the integer value. E.g., 0.0.0.1 will be 1 byte and 15.1.99.212 will be 3 bytes + else if (ipAddressBytes.length <= 3 || (ipAddressBytes.length != 4 && ipAddressBytes.length <= 15)) { + // start with zero'd out byte array and fill in starting at position j + byte[] emptyRange = new byte[ipAddressBytes.length <= 3 ? 4 : 16]; + int j = emptyRange.length - ipAddressBytes.length; + for (int i = 0; i < ipAddressBytes.length; i++, j++) { + emptyRange[j] = ipAddressBytes[i]; + } + ipAddressBytes = emptyRange; + } + // else length is already 4 or 16 + return ipAddressBytes; + } + + private static InetAddress bigIntegerToIpAddress(BigInteger ipAddress) throws UnknownHostException + { + byte[] ipAddressBytes = bigIntegerToIPAddressBytes(ipAddress); + return InetAddress.getByAddress(ipAddressBytes); + } + + private static BigInteger toBigInteger(Slice ipAddress) + { + // first param sets values to always be positive + return new BigInteger(1, ipAddress.getBytes()); + } + + private static boolean isIpv4(Slice ipPrefix) + { + // IPADDRESS types are 16 bytes for IPv4 and IPv6. IPv4 is stored as IPv4-mapped IPv6 addresses specified in RFC 4291. + // The IPv4 address is encoded into the low-order 32 bits of the IPv6 address, and the high-order 96 bits + // hold the fixed prefix 0:0:0:0:0:FFFF. + // To check if this is an IPv4 address, we check if the first 10 bytes are 0 and that bytes 11 and 12 are 0xFF. + byte[] ipPartBytes = ipPrefix.getBytes(0, 2 * Long.BYTES); + + for (int i = 0; i <= 9; i++) { + if (ipPartBytes[i] != (byte) 0) { + return false; + } + } + + return ipPartBytes[10] == (byte) 0xff && ipPartBytes[11] == (byte) 0xff; + } + private static InetAddress toInetAddress(Slice ipAddress) { try { diff --git a/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestIpPrefixFunctions.java b/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestIpPrefixFunctions.java index f30361c859d46..d1ed2f711af53 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestIpPrefixFunctions.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestIpPrefixFunctions.java @@ -118,4 +118,102 @@ public void testIsSubnetOf() assertFunction("IS_SUBNET_OF(IPPREFIX '170.0.52.0/22', IPPREFIX '170.0.52.0/24')", BOOLEAN, true); assertFunction("IS_SUBNET_OF(IPPREFIX '170.0.52.0/24', IPPREFIX '170.0.52.0/22')", BOOLEAN, false); } + + @Test + public void testIpv4PrefixCollapse() + { + // simple + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/24', IPPREFIX '192.168.1.0/24'])", + new ArrayType(IPPREFIX), + ImmutableList.of("192.168.0.0/23")); + + // unsorted input, 1 adjacent prefix that cannot be aggregated, and one disjoint. + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.1.0/24', IPPREFIX '192.168.0.0/24', IPPREFIX '192.168.2.0/24', IPPREFIX '192.168.9.0/24'])", + new ArrayType(IPPREFIX), + ImmutableList.of("192.168.0.0/23", "192.168.2.0/24", "192.168.9.0/24")); + } + + @Test + public void testIpv6PrefixCollapse() + { + // simple + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '2620:10d:c090::/48', IPPREFIX '2620:10d:c091::/48'])", + new ArrayType(IPPREFIX), + ImmutableList.of("2620:10d:c090::/47")); + + // unsorted input, 1 adjacent prefix that cannot be aggregated, and one disjoint. + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '2804:13c:4d6:e200::/56', IPPREFIX '2804:13c:4d6:dd00::/56', IPPREFIX '2804:13c:4d6:dc00::/56', IPPREFIX '2804:13c:4d6:de00::/56'])", + new ArrayType(IPPREFIX), + ImmutableList.of("2804:13c:4d6:dc00::/55", "2804:13c:4d6:de00::/56", "2804:13c:4d6:e200::/56")); + } + + @Test + public void testIpPrefixCollapseIpv4SingleIPs() + { + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.1/32', IPPREFIX '192.168.33.1/32'])", + new ArrayType(IPPREFIX), + ImmutableList.of("192.168.0.1/32", "192.168.33.1/32")); + } + + @Test + public void testIpPrefixCollapseIpv6SingleIPs() + { + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '2620:10d:c090:400::5:a869/128', IPPREFIX '2620:10d:c091:400::5:a869/128'])", + new ArrayType(IPPREFIX), + ImmutableList.of("2620:10d:c090:400::5:a869/128", "2620:10d:c091:400::5:a869/128")); + } + + @Test + public void testIpPrefixCollapseSinglePrefixReturnsSamePrefix() + { + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22'])", + new ArrayType(IPPREFIX), + ImmutableList.of("192.168.0.0/22")); + } + + @Test + public void testIpPrefixCollapseOverlappingPrefixes() + { + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22', IPPREFIX '192.168.0.0/24'])", + new ArrayType(IPPREFIX), + ImmutableList.of("192.168.0.0/22")); + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22', IPPREFIX '192.168.2.0/24'])", + new ArrayType(IPPREFIX), + ImmutableList.of("192.168.0.0/22")); + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22', IPPREFIX '192.168.3.0/24'])", + new ArrayType(IPPREFIX), + ImmutableList.of("192.168.0.0/22")); + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '10.0.64.0/18', IPPREFIX '10.2.0.0/15', IPPREFIX '10.0.0.0/8', IPPREFIX '11.0.0.0/8', IPPREFIX '172.168.32.0/20', IPPREFIX '172.168.0.0/18'])", + new ArrayType(IPPREFIX), + ImmutableList.of("10.0.0.0/7", "172.168.0.0/18")); + assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '10.0.0.0/8', IPPREFIX '10.0.0.0/7'])", + new ArrayType(IPPREFIX), + ImmutableList.of("10.0.0.0/7")); + } + + @Test + public void testIpPrefixCollapseEmptyArrayInput() + { + assertFunction("IP_PREFIX_COLLAPSE(CAST(ARRAY[] AS ARRAY(IPPREFIX)))", new ArrayType(IPPREFIX), ImmutableList.of()); + } + + @Test + public void testIpPrefixCollapseNullInput() + { + assertFunction("IP_PREFIX_COLLAPSE(CAST(NULL AS ARRAY(IPPREFIX)))", new ArrayType(IPPREFIX), null); + } + + @Test + public void testIpPrefixCollapseNoNullPrefixesError() + { + assertInvalidFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22', CAST(NULL AS IPPREFIX)])", + "ip_prefix_collapse does not support null elements"); + } + + @Test + public void testIpPrefixCollapseMixedIpVersionError() + { + assertInvalidFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22', IPPREFIX '2409:4043:251a:d200::/56'])", + "All IPPREFIX elements must be the same IP version."); + } }