Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions presto-docs/src/main/sphinx/functions/ip.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,14 @@ IP Functions
SELECT is_subnet_of(IPPREFIX '192.168.3.131/26', IPPREFIX '192.168.3.144/30'); -- true
SELECT is_subnet_of(IPPREFIX '64:ff9b::17/64', IPPREFIX '64:ffff::17/64'); -- false
SELECT is_subnet_of(IPPREFIX '192.168.3.131/26', IPPREFIX '192.168.3.131/26'); -- true

.. function:: ip_prefix_collapse(array(ip_prefix)) -> array(ip_prefix)

Returns the minimal CIDR representation of the input ``IPPREFIX`` array.
Every ``IPPREFIX`` in the input array must be the same IP version (that is, only IPv4 or only IPv6)
or the query will fail and raise an error. ::

SELECT IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/24', IPPREFIX '192.168.1.0/24']); -- [{192.168.0.0/23}]
SELECT IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '2620:10d:c090::/48', IPPREFIX '2620:10d:c091::/48']); -- [{2620:10d:c090::/47}]
SELECT IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.1.0/24', IPPREFIX '192.168.0.0/24', IPPREFIX '192.168.2.0/24', IPPREFIX '192.168.9.0/24']); -- [{192.168.0.0/23}, {192.168.2.0/24}, {192.168.9.0/24}]

Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,14 @@
import com.google.common.net.InetAddresses;
import io.airlift.slice.Slice;

import java.math.BigInteger;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static com.facebook.presto.operator.scalar.ArraySortFunction.sort;
import static com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
import static com.facebook.presto.type.IpAddressOperators.between;
Expand All @@ -44,6 +49,8 @@

public final class IpPrefixFunctions
{
private static final BigInteger TWO = BigInteger.valueOf(2);

private IpPrefixFunctions() {}

@Description("IP prefix for a given IP address and subnet size")
Expand Down Expand Up @@ -137,6 +144,220 @@ public static boolean isPrefixSubnetOf(@SqlType(StandardTypes.IPPREFIX) Slice fi
return between(ipSubnetMin(second), ipSubnetMin(first), ipSubnetMax(first)) && between(ipSubnetMax(second), ipSubnetMin(first), ipSubnetMax(first));
}

@Description("Combines the input set of IP prefixes into the fewest contiguous CIDR ranges possible.")
@ScalarFunction("ip_prefix_collapse")
@SqlType("array(IPPREFIX)")
public static Block collapseIpPrefixes(@SqlType("array(IPPREFIX)") Block unsortedIpPrefixArray)
{
int inputPrefixCount = unsortedIpPrefixArray.getPositionCount();

// If we get an empty array or an array non-null single element, just return the original array.
if (inputPrefixCount == 0 || (inputPrefixCount == 1 && !unsortedIpPrefixArray.isNull(0))) {
return unsortedIpPrefixArray;
}

// Sort prefixes. lessThanFunction is never used. NULLs are placed at the end.
// Prefixes are ordered by first IP and then prefix length.
// Example:
// Input: 10.0.0.0/8, 9.255.255.0/24, 10.0.0.0/7, 10.1.0.0/24, 10.10.0.0/16
// Output: 9.255.255.0/24, 10.0.0.0/7, 10.0.0.0/8, 10.1.0.0/24, 10.10.0.0/16
Block ipPrefixArray = sort(null, IPPREFIX, unsortedIpPrefixArray);

// throw if anything is null
if (ipPrefixArray.isNull(0) || ipPrefixArray.isNull(inputPrefixCount - 1)) {
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "ip_prefix_collapse does not support null elements");
}

// check the first and last prefixes in the array to make sure their IP versions match.
Slice firstIpPrefix = IPPREFIX.getSlice(ipPrefixArray, 0);
boolean v4 = isIpv4(firstIpPrefix);
Slice lastIpPrefix = IPPREFIX.getSlice(ipPrefixArray, inputPrefixCount - 1);
if (isIpv4(lastIpPrefix) != v4) {
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "All IPPREFIX elements must be the same IP version.");
}

List<List<Slice>> outputIpPrefixes = new ArrayList<>();
int outputPrefixCount = 0;
int ipMaxBitLength = v4 ? 32 : 128;

List<BigInteger[]> mergedIpRanges = mergeIpRanges(ipPrefixArray);
for (BigInteger[] ipRange : mergedIpRanges) {
List<Slice> ipPrefixes = generateMinIpPrefixes(ipRange[0], ipRange[1], ipMaxBitLength);
outputIpPrefixes.add(ipPrefixes);
outputPrefixCount += ipPrefixes.size();
}

BlockBuilder blockBuilder = IPPREFIX.createBlockBuilder(null, outputPrefixCount);
for (List<Slice> ipPrefixSlices : outputIpPrefixes) {
for (Slice ipPrefix : ipPrefixSlices) {
IPPREFIX.writeSlice(blockBuilder, ipPrefix);
}
}

return blockBuilder.build();
}

private static List<Slice> generateMinIpPrefixes(BigInteger firstIpAddress, BigInteger lastIpAddress, int ipVersionMaxBits)
{
List<Slice> ipPrefixSlices = new ArrayList<>();

// i.e., while firstIpAddress <= lastIpAddress
while (firstIpAddress.compareTo(lastIpAddress) <= 0) {
long rangeBits = findRangeBits(firstIpAddress, lastIpAddress); // find the number of bits for the next prefix in the range
int prefixLength = (int) (ipVersionMaxBits - rangeBits);

try {
Comment thread
matt-calder marked this conversation as resolved.
InetAddress asInetAddress = bigIntegerToIpAddress(firstIpAddress); // convert firstIpAddress from BigInt to Slice
Slice ipPrefixAsSlice = castFromVarcharToIpPrefix(utf8Slice(InetAddresses.toAddrString(asInetAddress) + "/" + prefixLength));
ipPrefixSlices.add(ipPrefixAsSlice);
}
catch (UnknownHostException ex) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unable to convert " + firstIpAddress + " to IP prefix", ex);
}

BigInteger ipCount = TWO.pow(ipVersionMaxBits - prefixLength);
firstIpAddress = firstIpAddress.add(ipCount); // move to the next prefix in the range
}

return ipPrefixSlices;
}

private static long findRangeBits(BigInteger firstIpAddress, BigInteger lastIpAddress)
{
// The number of IP addresses in the range
BigInteger ipCount = lastIpAddress.subtract(firstIpAddress).add(BigInteger.ONE);

// We have two possibilities for determining the right prefix boundary

// Case 1. Find the largest possible prefix that firstIpAddress can be.
// Say we have an input range of 192.168.0.0 to 192.184.0.0.
// The number of IP addresses in the range is 1048576 = 2^20, so we would need a /12 (32-20).
// to cover that many IP addresses but the largest valid prefix that can start from 192.168.0.0 is /13.
int firstAddressMaxBits = firstIpAddress.getLowestSetBit();

// Case 2. Find the largest prefix length to cover N IP addresses.
// The number of IP addresses within a valid prefix must be a power of 2 but the IP count
// in our IP ranges may not be a power of 2. If it isn't exactly a power of 2, we find the
// highest power of 2 that the doesn't overrun the ipCount.

// If ipCount's bitLength is greater than the number of IP addresses (i.e., not a power of 2), then use 1 bit less.
int ipRangeMaxBits = (TWO.pow(ipCount.bitLength()).compareTo(ipCount) > 0) ? ipCount.bitLength() - 1 : ipCount.bitLength();

return min(firstAddressMaxBits, ipRangeMaxBits);
}

private static List<BigInteger[]> mergeIpRanges(Block ipPrefixArray)
{
List<BigInteger[]> mergedRanges = new ArrayList<>();

Slice startingIpPrefix = IPPREFIX.getSlice(ipPrefixArray, 0);
BigInteger firstIpAddress = toBigInteger(ipSubnetMin(startingIpPrefix));
BigInteger lastIpAddress = toBigInteger(ipSubnetMax(startingIpPrefix));

/*
There are four cases to cover for two IP ranges where range1.startIp <= range2.startIp

1. Could be equal/duplicates.
[-------]
[-------]
In this case, we just ignore the second one.

2. Second could be subnet/contained within first.
[-------] OR [-------] OR [-------]
[---] [----] [----]
In this case we ignore the second one.

3. Second could be adjacent/contiguous with the first.
[-------]
[-------]
In this case we extend the range to include the last IP address of the second one.

4. Second can be disjoint from the first.
[-------]
[-------]
In this case the first range is finalized, and the second range becomes the current one.
*/

for (int i = 1; i < ipPrefixArray.getPositionCount(); i++) {
Slice ipPrefix = IPPREFIX.getSlice(ipPrefixArray, i);
BigInteger nextFirstIpAddress = toBigInteger(ipSubnetMin(ipPrefix));
BigInteger nextLastIpAddress = toBigInteger(ipSubnetMax(ipPrefix));

// If nextFirstIpAddress <= lastIpAddress then there is overlap.
// However, based on the properties of the input sorted array, this will
// always mean that the next* range is a subnet of [firstIpAddress, lastIpAddress].
// We just ignore these prefixes since they are already covered (case 1 and case 2).
if (lastIpAddress.compareTo(nextFirstIpAddress) < 0) { // i.e. nextFirstIpAddress > lastIpAddress -- the next range does not overlap the first
// If they are not contiguous (case 4), finalize the range.
// Otherwise, extend the current range (case 3).
if (lastIpAddress.add(BigInteger.ONE).compareTo(nextFirstIpAddress) != 0) {
BigInteger[] finalizedRange = {firstIpAddress, lastIpAddress};
mergedRanges.add(finalizedRange);
firstIpAddress = nextFirstIpAddress;
}
lastIpAddress = nextLastIpAddress;
}
}

// Add the last range
BigInteger[] finalizedRange = {firstIpAddress, lastIpAddress};
mergedRanges.add(finalizedRange);

return mergedRanges;
}

private static byte[] bigIntegerToIPAddressBytes(BigInteger ipAddress)
{
byte[] ipAddressBytes = ipAddress.toByteArray();

// Covers IPv4 (4 bytes) and IPv6 (16 bytes) plus an additional 0-value byte for sign
if ((ipAddressBytes.length == 5 || ipAddressBytes.length == 17) && ipAddressBytes[0] == 0) {
ipAddressBytes = Arrays.copyOfRange(ipAddressBytes, 1, ipAddressBytes.length); // remove leading 0
}
// Covers IPv4 and IPv6 cases when BigInteger needs less than 4 or 16 bytes to represent
// the integer value. E.g., 0.0.0.1 will be 1 byte and 15.1.99.212 will be 3 bytes
else if (ipAddressBytes.length <= 3 || (ipAddressBytes.length != 4 && ipAddressBytes.length <= 15)) {
// start with zero'd out byte array and fill in starting at position j
byte[] emptyRange = new byte[ipAddressBytes.length <= 3 ? 4 : 16];
int j = emptyRange.length - ipAddressBytes.length;
for (int i = 0; i < ipAddressBytes.length; i++, j++) {
emptyRange[j] = ipAddressBytes[i];
}
ipAddressBytes = emptyRange;
}
// else length is already 4 or 16
return ipAddressBytes;
}

private static InetAddress bigIntegerToIpAddress(BigInteger ipAddress) throws UnknownHostException
{
byte[] ipAddressBytes = bigIntegerToIPAddressBytes(ipAddress);
return InetAddress.getByAddress(ipAddressBytes);
}

private static BigInteger toBigInteger(Slice ipAddress)
{
// first param sets values to always be positive
return new BigInteger(1, ipAddress.getBytes());
}

private static boolean isIpv4(Slice ipPrefix)
{
// IPADDRESS types are 16 bytes for IPv4 and IPv6. IPv4 is stored as IPv4-mapped IPv6 addresses specified in RFC 4291.
// The IPv4 address is encoded into the low-order 32 bits of the IPv6 address, and the high-order 96 bits
// hold the fixed prefix 0:0:0:0:0:FFFF.
// To check if this is an IPv4 address, we check if the first 10 bytes are 0 and that bytes 11 and 12 are 0xFF.
byte[] ipPartBytes = ipPrefix.getBytes(0, 2 * Long.BYTES);

for (int i = 0; i <= 9; i++) {
if (ipPartBytes[i] != (byte) 0) {
return false;
}
}

return ipPartBytes[10] == (byte) 0xff && ipPartBytes[11] == (byte) 0xff;
}

private static InetAddress toInetAddress(Slice ipAddress)
{
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,102 @@ public void testIsSubnetOf()
assertFunction("IS_SUBNET_OF(IPPREFIX '170.0.52.0/22', IPPREFIX '170.0.52.0/24')", BOOLEAN, true);
assertFunction("IS_SUBNET_OF(IPPREFIX '170.0.52.0/24', IPPREFIX '170.0.52.0/22')", BOOLEAN, false);
}

@Test
public void testIpv4PrefixCollapse()
{
// simple
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/24', IPPREFIX '192.168.1.0/24'])",
new ArrayType(IPPREFIX),
ImmutableList.of("192.168.0.0/23"));

// unsorted input, 1 adjacent prefix that cannot be aggregated, and one disjoint.
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.1.0/24', IPPREFIX '192.168.0.0/24', IPPREFIX '192.168.2.0/24', IPPREFIX '192.168.9.0/24'])",
new ArrayType(IPPREFIX),
ImmutableList.of("192.168.0.0/23", "192.168.2.0/24", "192.168.9.0/24"));
}

@Test
public void testIpv6PrefixCollapse()
{
// simple
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '2620:10d:c090::/48', IPPREFIX '2620:10d:c091::/48'])",
new ArrayType(IPPREFIX),
ImmutableList.of("2620:10d:c090::/47"));

// unsorted input, 1 adjacent prefix that cannot be aggregated, and one disjoint.
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '2804:13c:4d6:e200::/56', IPPREFIX '2804:13c:4d6:dd00::/56', IPPREFIX '2804:13c:4d6:dc00::/56', IPPREFIX '2804:13c:4d6:de00::/56'])",
new ArrayType(IPPREFIX),
ImmutableList.of("2804:13c:4d6:dc00::/55", "2804:13c:4d6:de00::/56", "2804:13c:4d6:e200::/56"));
}

@Test
public void testIpPrefixCollapseIpv4SingleIPs()
{
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.1/32', IPPREFIX '192.168.33.1/32'])",
new ArrayType(IPPREFIX),
ImmutableList.of("192.168.0.1/32", "192.168.33.1/32"));
}

@Test
public void testIpPrefixCollapseIpv6SingleIPs()
{
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '2620:10d:c090:400::5:a869/128', IPPREFIX '2620:10d:c091:400::5:a869/128'])",
new ArrayType(IPPREFIX),
ImmutableList.of("2620:10d:c090:400::5:a869/128", "2620:10d:c091:400::5:a869/128"));
}

@Test
public void testIpPrefixCollapseSinglePrefixReturnsSamePrefix()
{
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22'])",
new ArrayType(IPPREFIX),
ImmutableList.of("192.168.0.0/22"));
}

@Test
public void testIpPrefixCollapseOverlappingPrefixes()
{
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22', IPPREFIX '192.168.0.0/24'])",
new ArrayType(IPPREFIX),
ImmutableList.of("192.168.0.0/22"));
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22', IPPREFIX '192.168.2.0/24'])",
new ArrayType(IPPREFIX),
ImmutableList.of("192.168.0.0/22"));
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22', IPPREFIX '192.168.3.0/24'])",
new ArrayType(IPPREFIX),
ImmutableList.of("192.168.0.0/22"));
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '10.0.64.0/18', IPPREFIX '10.2.0.0/15', IPPREFIX '10.0.0.0/8', IPPREFIX '11.0.0.0/8', IPPREFIX '172.168.32.0/20', IPPREFIX '172.168.0.0/18'])",
new ArrayType(IPPREFIX),
ImmutableList.of("10.0.0.0/7", "172.168.0.0/18"));
assertFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '10.0.0.0/8', IPPREFIX '10.0.0.0/7'])",
new ArrayType(IPPREFIX),
ImmutableList.of("10.0.0.0/7"));
}

@Test
public void testIpPrefixCollapseEmptyArrayInput()
{
assertFunction("IP_PREFIX_COLLAPSE(CAST(ARRAY[] AS ARRAY(IPPREFIX)))", new ArrayType(IPPREFIX), ImmutableList.of());
}

@Test
public void testIpPrefixCollapseNullInput()
{
assertFunction("IP_PREFIX_COLLAPSE(CAST(NULL AS ARRAY(IPPREFIX)))", new ArrayType(IPPREFIX), null);
}

@Test
public void testIpPrefixCollapseNoNullPrefixesError()
{
assertInvalidFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22', CAST(NULL AS IPPREFIX)])",
"ip_prefix_collapse does not support null elements");
}

@Test
public void testIpPrefixCollapseMixedIpVersionError()
{
assertInvalidFunction("IP_PREFIX_COLLAPSE(ARRAY[IPPREFIX '192.168.0.0/22', IPPREFIX '2409:4043:251a:d200::/56'])",
"All IPPREFIX elements must be the same IP version.");
}
}