diff --git a/velox/docs/develop/types.rst b/velox/docs/develop/types.rst index 543360a518a..ac8cefd322d 100644 --- a/velox/docs/develop/types.rst +++ b/velox/docs/develop/types.rst @@ -136,6 +136,7 @@ HYPERLOGLOG VARBINARY JSON VARCHAR TIMESTAMP WITH TIME ZONE BIGINT UUID HUGEINT +IPADDRESS HUGEINT ======================== ===================== TIMESTAMP WITH TIME ZONE represents a time point in milliseconds precision @@ -146,6 +147,14 @@ Supported range of milliseconds is [0xFFF8000000000000L, 0x7FFFFFFFFFFFF] store timezone ID. Supported range of timezone ID is [1, 1680]. The definition of timezone IDs can be found in ``TimeZoneDatabase.cpp``. +IPADDRESS represents an IPV6 or IPV4 formatted IPV6 address. Its physical +type is HUGEINT. The format that the address is stored in is defined as part of `(RFC 4291#section-2.5.5.2) `_ +As Velox is run on Little Endian systems and the standard is network byte(Big Endian) +order, we reverse the bytes to allow for masking and other bit operations +used in IPADDRESS/IPPREFIX related functions. This type can be used to +create IPPREFIX networks as well as to check IPADDRESS validity within +IPPREFIX networks. + Spark Types ~~~~~~~~~~~~ The `data types `_ in Spark have some semantic differences compared to those in diff --git a/velox/docs/functions/presto/conversion.rst b/velox/docs/functions/presto/conversion.rst index 2615b5f3ec4..b24117ae306 100644 --- a/velox/docs/functions/presto/conversion.rst +++ b/velox/docs/functions/presto/conversion.rst @@ -30,7 +30,7 @@ are supported if the conversion of their element types are supported. In additio supported conversions to/from JSON are listed in :doc:`json`. .. list-table:: - :widths: 25 25 25 25 25 25 25 25 25 25 25 25 25 25 + :widths: 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 :header-rows: 1 * - @@ -42,11 +42,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - real - double - varchar + - varbinary - timestamp - timestamp with time zone - date - interval day to second - decimal + - ipaddress * - tinyint - Y - Y @@ -56,11 +58,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - Y - Y - Y + - - - - - - Y + - * - smallint - Y - Y @@ -70,11 +74,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - Y - Y - Y + - - - - - - Y + - * - integer - Y - Y @@ -84,11 +90,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - Y - Y - Y + - - - - - - Y + - * - bigint - Y - Y @@ -98,11 +106,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - Y - Y - Y + - - - - - - Y + - * - boolean - Y - Y @@ -112,11 +122,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - Y - Y - Y + - - - - - - Y + - * - real - Y - Y @@ -126,11 +138,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - Y - Y - Y + - - - - - - Y + - * - double - Y - Y @@ -140,11 +154,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - Y - Y - Y + - - - - - - Y + - * - varchar - Y - Y @@ -154,11 +170,29 @@ supported conversions to/from JSON are listed in :doc:`json`. - Y - Y - Y + - - Y - Y - Y - - Y + - Y + * - varbinary + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - Y * - timestamp - - @@ -168,11 +202,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - Y + - - Y - Y - Y - - + - * - timestamp with time zone - - @@ -182,11 +218,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - Y + - - Y - - Y - - + - * - date - - @@ -196,11 +234,13 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - Y + - - Y - Y - - - + - * - interval day to second - - @@ -210,6 +250,8 @@ supported conversions to/from JSON are listed in :doc:`json`. - - - Y + - + - - - - @@ -224,11 +266,29 @@ supported conversions to/from JSON are listed in :doc:`json`. - Y - Y - Y + - - - - - - Y + - + * - ipaddress + - + - + - + - + - + - + - + - Y + - Y + - + - + - + - + - + - Cast to Integral Types ---------------------- @@ -604,6 +664,60 @@ is the number of whole days in the interval, HH is then number of hours between SELECT cast(now() - date('2024-03-01') as varchar); -- '35 09:15:54.092' SELECT cast(date('2024-03-01') - now() as varchar); -- '-35 09:16:20.598' +From IPADDRESS +^^^^^^^^^^^^^^ + +Casting from IPADDRESS to VARCHAR returns a string formatted as x.x.x.x for IPV4 formatted IPV6 addresses. +For all other IPV6 addresses it will be formatted in compressed alternate form IPV6 defined in `RFC 4291#section-2.2 `_ + +IPV4: + +:: + + SELECT cast(ipaddress '1.2.3.4' as varchar); -- '1.2.3.4' + +IPV6: + +:: + + SELECT cast(ipaddress '2001:0db8:0000:0000:0000:ff00:0042:8329' as varchar); -- '2001:db8::ff00:42:8329' + SELECT cast(ipaddress '0:0:0:0:0:0:13.1.68.3' as varchar); -- '::13.1.68.3' + +IPV4 mapped IPV6: + +:: + + SELECT cast(ipaddress '::ffff:ffff:ffff' as varchar); -- '255.255.255.255' + +Cast to VARBINARY +----------------- + +From IPADDRESS +^^^^^^^^^^^^^^ + +Returns the IPV6 address as a 16 byte varbinary string in network byte order. + +Internally, the type is a pure IPv6 address. Support for IPv4 is handled using the IPv4-mapped IPv6 address range `(RFC 4291#section-2.5.5.2) `_. +When creating an IPADDRESS, IPv4 addresses will be mapped into that range. + +IPV6: + +:: + + SELECT cast(ipaddress '2001:0db8:0000:0000:0000:ff00:0042:8329' as varbinary); -- 0x20010db8000000000000ff0000428329 + +IPV4: + +:: + + SELECT cast('1.2.3.4' as ipaddress); -- 0x00000000000000000000ffff01020304 + +IPV4 mapped IPV6: + +:: + + SELECT cast('::ffff:ffff:ffff' as ipaddress); -- 0x00000000000000000000ffffffffffff + Cast to TIMESTAMP ----------------- @@ -919,6 +1033,101 @@ Invalid example SELECT cast('-3E+2.1' as decimal(12, 2)); -- Value is not a number SELECT cast('3E+' as decimal(12, 2)); -- Value is not a number +Cast to IPADDRESS +----------------- + +From VARCHAR +^^^^^^^^^^^^ + +To cast a varchar to IPAddress input string must be in the form of either +IPV4 or IPV6. + +For IPV4 it must be in the form of: +x.x.x.x where each x is an integer value between 0-255. + +For IPV6 it must follow any of the forms defined in `RFC 4291#section-2.2 `_. + +Full form: + +:: + + 2001:0DB8:0000:0000:0008:0800:200C:417A + 2001:DB8:0:0:8:800:200C:417A + +Compressed form: +:: + 2001:DB8::8:800:200C:417A + +Alternate form: +:: + 0:0:0:0:0:0:13.1.68.3 + ::13.1.68.3 + +Internally, the type is a pure IPv6 address. Support for IPv4 is handled using the IPv4-mapped IPv6 address range `(RFC 4291#section-2.5.5.2) `_. +When creating an IPADDRESS, IPv4 addresses will be mapped into that range. + +When formatting an IPADDRESS, any address within the mapped range will be formatted as an IPv4 address. +Other addresses will be formatted as IPv6 using the canonical format defined in `RFC 5952 `_. + +Valid examples: + +:: + + SELECT cast('2001:0db8:0000:0000:0000:ff00:0042:8329' as ipaddress); -- ipaddress '2001:db8::ff00:42:8329' + SELECT cast('1.2.3.4' as ipaddress); -- ipaddress '1.2.3.4' + SELECT cast('::ffff:ffff:ffff' as ipaddress); -- ipaddress '255.255.255.255' + +Invalid examples: + +:: + + SELECT cast('2001:db8::1::1' as ipaddress); -- Invalid IP address '2001:db8::1::1' + SELECT cast('789.1.1.1' as ipaddress); -- Invalid IP address '789.1.1.1' + +From VARBINARY +^^^^^^^^^^^^^^ + +To cast a varbinary to IPAddress it must be either IPV4(4 Bytes) +or IPV6(16 Bytes) in network byte order. + +IPV4: + +:: + +[01, 02, 03, 04] -> 1.2.3.4 + +IPV6: + +:: + +[0x20, 0x01, 0x0d, 0xb8 0x00, 0x00, 0x00, 0x00 0x00 0x00, 0xff, 0x00, 0x00, 0x42, 0x83, 0x29] -> 2001:db8::ff00:42:8329 + +Internally, the type is a pure IPv6 address. Support for IPv4 is handled using the IPv4-mapped IPv6 address range `(RFC 4291#section-2.5.5.2) `_. +When creating an IPADDRESS, IPv4 addresses will be mapped into that range. + +When formatting an IPADDRESS, any address within the mapped range will be formatted as an IPv4 address. +Other addresses will be formatted as IPv6 using the canonical format defined in `RFC 5952 `_. + +IPV6 mapped IPV4 address: + +:: + +[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x01, 0x02, 0x03, 0x04] -> 1.2.3.4 + +Valid examples: + +:: + + SELECT cast(from_hex('20010db8000000000000ff0000428329') as ipaddress); -- ipaddress '2001:db8::ff00:42:8329' + SELECT cast(from_hex('01020304') as ipaddress); -- ipaddress '1.2.3.4' + SELECT cast(from_hex('00000000000000000000ffff01020304') as ipaddress); -- ipaddress '1.2.3.4' + +Invalid examples: + +:: + + SELECT cast(from_hex('f000001100') as ipaddress); -- Invalid IP address binary length: 5 + Miscellaneous ------------- diff --git a/velox/expression/tests/CustomTypeTest.cpp b/velox/expression/tests/CustomTypeTest.cpp index 01db687746d..c55015d8590 100644 --- a/velox/expression/tests/CustomTypeTest.cpp +++ b/velox/expression/tests/CustomTypeTest.cpp @@ -216,6 +216,7 @@ TEST_F(CustomTypeTest, getCustomTypeNames) { "HYPERLOGLOG", "TIMESTAMP WITH TIME ZONE", "UUID", + "IPADDRESS", }), names); @@ -229,6 +230,7 @@ TEST_F(CustomTypeTest, getCustomTypeNames) { "HYPERLOGLOG", "TIMESTAMP WITH TIME ZONE", "UUID", + "IPADDRESS", "FANCY_INT", }), names); diff --git a/velox/functions/prestosql/IPAddressFunctions.h b/velox/functions/prestosql/IPAddressFunctions.h new file mode 100644 index 00000000000..abd214a9aba --- /dev/null +++ b/velox/functions/prestosql/IPAddressFunctions.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/functions/prestosql/types/IPAddressType.h" + +namespace facebook::velox::functions { + +void registerIPAddressFunctions(const std::string& prefix) { + registerIPAddressType(); +} + +} // namespace facebook::velox::functions diff --git a/velox/functions/prestosql/TypeOf.cpp b/velox/functions/prestosql/TypeOf.cpp index 048c8d031c6..77a4e653879 100644 --- a/velox/functions/prestosql/TypeOf.cpp +++ b/velox/functions/prestosql/TypeOf.cpp @@ -15,6 +15,7 @@ */ #include "velox/expression/VectorFunction.h" #include "velox/functions/prestosql/types/HyperLogLogType.h" +#include "velox/functions/prestosql/types/IPAddressType.h" #include "velox/functions/prestosql/types/JsonType.h" #include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" #include "velox/functions/prestosql/types/UuidType.h" @@ -54,6 +55,8 @@ std::string typeName(const TypePtr& type) { case TypeKind::HUGEINT: { if (isUuidType(type)) { return "uuid"; + } else if (isIPAddressType(type)) { + return "ipaddress"; } VELOX_USER_CHECK( type->isDecimal(), diff --git a/velox/functions/prestosql/registration/RegistrationFunctions.cpp b/velox/functions/prestosql/registration/RegistrationFunctions.cpp index 3e1b8bb647e..6bb91a6d77a 100644 --- a/velox/functions/prestosql/registration/RegistrationFunctions.cpp +++ b/velox/functions/prestosql/registration/RegistrationFunctions.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ #include +#include "velox/functions/prestosql/IPAddressFunctions.h" #include "velox/functions/prestosql/UuidFunctions.h" namespace facebook::velox::functions { @@ -108,6 +109,7 @@ void registerAllScalarFunctions(const std::string& prefix) { registerBinaryFunctions(prefix); registerBitwiseFunctions(prefix); registerUuidFunctions(prefix); + registerIPAddressFunctions(prefix); } void registerMapAllowingDuplicates( diff --git a/velox/functions/prestosql/tests/CMakeLists.txt b/velox/functions/prestosql/tests/CMakeLists.txt index 8fff437472c..03497750b8f 100644 --- a/velox/functions/prestosql/tests/CMakeLists.txt +++ b/velox/functions/prestosql/tests/CMakeLists.txt @@ -64,6 +64,7 @@ add_executable( HyperLogLogCastTest.cpp HyperLogLogFunctionsTest.cpp InPredicateTest.cpp + IPAddressCastTest.cpp JsonCastTest.cpp JsonExtractScalarTest.cpp JsonFunctionsTest.cpp diff --git a/velox/functions/prestosql/tests/IPAddressCastTest.cpp b/velox/functions/prestosql/tests/IPAddressCastTest.cpp new file mode 100644 index 00000000000..0f62b6b7248 --- /dev/null +++ b/velox/functions/prestosql/tests/IPAddressCastTest.cpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/common/base/tests/GTestUtils.h" +#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" + +namespace facebook::velox::functions::prestosql { + +namespace { + +class IPAddressCastTest : public functions::test::FunctionBaseTest { + protected: + std::optional castToVarchar( + const std::optional input) { + auto result = evaluateOnce( + "cast(cast(c0 as ipaddress) as varchar)", input); + return result; + } + + std::optional castFromVarbinary( + const std::optional input) { + auto result = + evaluateOnce("cast(from_hex(c0) as ipaddress)", input); + return result; + } + + std::optional allCasts(const std::optional input) { + auto result = evaluateOnce( + "cast(cast(cast(cast(c0 as ipaddress) as varbinary) as ipaddress) as varchar)", + input); + return result; + } +}; + +int128_t stringToInt128(std::string value) { + int128_t res = 0; + for (char c : value) { + res = res * 10 + c - '0'; + } + return res; +} + +TEST_F(IPAddressCastTest, castToVarchar) { + EXPECT_EQ(castToVarchar("::ffff:1.2.3.4"), "1.2.3.4"); + EXPECT_EQ(castToVarchar("0:0:0:0:0:0:13.1.68.3"), "::13.1.68.3"); + EXPECT_EQ(castToVarchar("1.2.3.4"), "1.2.3.4"); + EXPECT_EQ(castToVarchar("192.168.0.0"), "192.168.0.0"); + EXPECT_EQ( + castToVarchar("2001:0db8:0000:0000:0000:ff00:0042:8329"), + "2001:db8::ff00:42:8329"); + EXPECT_EQ(castToVarchar("2001:db8::ff00:42:8329"), "2001:db8::ff00:42:8329"); + EXPECT_EQ(castToVarchar("2001:db8:0:0:1:0:0:1"), "2001:db8::1:0:0:1"); + EXPECT_EQ(castToVarchar("2001:db8:0:0:1::1"), "2001:db8::1:0:0:1"); + EXPECT_EQ(castToVarchar("2001:db8::1:0:0:1"), "2001:db8::1:0:0:1"); + EXPECT_EQ( + castToVarchar("2001:DB8::FF00:ABCD:12EF"), "2001:db8::ff00:abcd:12ef"); + VELOX_ASSERT_THROW( + castToVarchar("facebook.com"), "Invalid IP address 'facebook.com'"); + VELOX_ASSERT_THROW( + castToVarchar("localhost"), "Invalid IP address 'localhost'"); + VELOX_ASSERT_THROW( + castToVarchar("2001:db8::1::1"), "Invalid IP address '2001:db8::1::1'"); + VELOX_ASSERT_THROW( + castToVarchar("2001:zxy::1::1"), "Invalid IP address '2001:zxy::1::1'"); + VELOX_ASSERT_THROW( + castToVarchar("789.1.1.1"), "Invalid IP address '789.1.1.1'"); +} + +TEST_F(IPAddressCastTest, castFromVarbinary) { + EXPECT_EQ( + castFromVarbinary("00000000000000000000ffff01020304"), + stringToInt128("281470698652420")); + EXPECT_EQ(castFromVarbinary("01020304"), stringToInt128("281470698652420")); + EXPECT_EQ(castFromVarbinary("c0a80000"), stringToInt128("281473913978880")); + EXPECT_EQ( + castFromVarbinary("20010db8000000000000ff0000428329"), + stringToInt128("42540766411282592856904265327123268393")); + EXPECT_THROW(castFromVarbinary("f000001100"), VeloxUserError); +} + +TEST_F(IPAddressCastTest, allCasts) { + EXPECT_EQ(allCasts("::ffff:1.2.3.4"), "1.2.3.4"); + EXPECT_EQ( + allCasts("2001:0db8:0000:0000:0000:ff00:0042:8329"), + "2001:db8::ff00:42:8329"); + EXPECT_EQ(allCasts("2001:db8::ff00:42:8329"), "2001:db8::ff00:42:8329"); +} + +TEST_F(IPAddressCastTest, nullTest) { + EXPECT_EQ(castToVarchar(std::nullopt), std::nullopt); + EXPECT_EQ(castFromVarbinary(std::nullopt), std::nullopt); +} + +TEST_F(IPAddressCastTest, castRoundTrip) { + auto strings = makeFlatVector( + {"87a0:ce14:8989:44c9:826e:b4d8:73f9:1542", + "7cd6:bcec:1216:5c20:4b67:b1bd:173:ced", + "192.128.0.0"}); + + auto ipaddresses = + evaluate("cast(c0 as ipaddress)", makeRowVector({strings})); + auto stringsCopy = + evaluate("cast(c0 as varchar)", makeRowVector({ipaddresses})); + auto ipaddressesCopy = + evaluate("cast(c0 as ipaddress)", makeRowVector({stringsCopy})); + + velox::test::assertEqualVectors(strings, stringsCopy); + velox::test::assertEqualVectors(ipaddresses, ipaddressesCopy); +} +} // namespace + +} // namespace facebook::velox::functions::prestosql diff --git a/velox/functions/prestosql/types/CMakeLists.txt b/velox/functions/prestosql/types/CMakeLists.txt index dc7c6b9c634..0089307a4f0 100644 --- a/velox/functions/prestosql/types/CMakeLists.txt +++ b/velox/functions/prestosql/types/CMakeLists.txt @@ -16,7 +16,8 @@ velox_add_library( HyperLogLogType.cpp JsonType.cpp TimestampWithTimeZoneType.cpp - UuidType.cpp) + UuidType.cpp + IPAddressType.cpp) velox_link_libraries( velox_presto_types diff --git a/velox/functions/prestosql/types/IPAddressType.cpp b/velox/functions/prestosql/types/IPAddressType.cpp new file mode 100644 index 00000000000..b8ec2918d5a --- /dev/null +++ b/velox/functions/prestosql/types/IPAddressType.cpp @@ -0,0 +1,232 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/functions/prestosql/types/IPAddressType.h" +#include +#include "velox/expression/CastExpr.h" + +static constexpr int kIPV4AddressBytes = 4; +static constexpr int kIPV4ToV6FFIndex = 10; +static constexpr int kIPV4ToV6Index = 12; +static constexpr int kIPAddressBytes = 16; +static constexpr int kIPAddressMaxStrLen = 39; + +namespace facebook::velox { + +namespace { + +class IPAddressCastOperator : public exec::CastOperator { + public: + bool isSupportedFromType(const TypePtr& other) const override { + switch (other->kind()) { + case TypeKind::VARBINARY: + case TypeKind::VARCHAR: + return true; + default: + return false; + } + } + + bool isSupportedToType(const TypePtr& other) const override { + switch (other->kind()) { + case TypeKind::VARBINARY: + case TypeKind::VARCHAR: + return true; + default: + return false; + } + } + + void castTo( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result) const override { + context.ensureWritable(rows, resultType, result); + + if (input.typeKind() == TypeKind::VARCHAR) { + castFromString(input, context, rows, *result); + } else if (input.typeKind() == TypeKind::VARBINARY) { + castFromVarbinary(input, context, rows, *result); + } else { + VELOX_UNSUPPORTED( + "Cast from {} to IPAddress not supported", resultType->toString()); + } + } + + void castFrom( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result) const override { + context.ensureWritable(rows, resultType, result); + + if (resultType->kind() == TypeKind::VARCHAR) { + castToString(input, context, rows, *result); + } else if (resultType->kind() == TypeKind::VARBINARY) { + castToVarbinary(input, context, rows, *result); + } else { + VELOX_UNSUPPORTED( + "Cast from IPAddress to {} not supported", resultType->toString()); + } + } + + private: + static void castToString( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipaddresses = input.as>(); + folly::ByteArray16 addrBytes; + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto intAddr = ipaddresses->valueAt(row); + memcpy(&addrBytes, &intAddr, kIPAddressBytes); + + std::reverse(addrBytes.begin(), addrBytes.end()); + folly::IPAddressV6 v6Addr(addrBytes); + + exec::StringWriter result(flatResult, row); + if (v6Addr.isIPv4Mapped()) { + result.append(v6Addr.createIPv4().str()); + } else { + result.append(v6Addr.str()); + } + result.finalize(); + }); + } + + static void castFromString( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipAddressStrings = input.as>(); + int128_t intAddr; + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto ipAddressString = ipAddressStrings->valueAt(row); + + auto maybeIp = folly::IPAddress::tryFromString(ipAddressString); + if (maybeIp.hasError()) { + if (threadSkipErrorDetails()) { + context.setStatus(row, Status::UserError()); + } else { + context.setStatus( + row, + Status::UserError("Invalid IP address '{}'", ipAddressString)); + } + return; + } + folly::IPAddress addr = maybeIp.value(); + auto addrBytes = folly::IPAddress::createIPv6(addr).toByteArray(); + + std::reverse(addrBytes.begin(), addrBytes.end()); + memcpy(&intAddr, &addrBytes, kIPAddressBytes); + + flatResult->set(row, intAddr); + }); + } + + static void castToVarbinary( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipaddresses = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + const auto intAddr = ipaddresses->valueAt(row); + folly::ByteArray16 addrBytes; + memcpy(&addrBytes, &intAddr, kIPAddressBytes); + std::reverse(addrBytes.begin(), addrBytes.end()); + + exec::StringWriter result(flatResult, row); + result.resize(kIPAddressBytes); + memcpy(result.data(), &addrBytes, kIPAddressBytes); + result.finalize(); + }); + } + + static void castFromVarbinary( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + BaseVector& result) { + auto* flatResult = result.as>(); + const auto* ipAddressBinaries = input.as>(); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + int128_t intAddr; + folly::ByteArray16 addrBytes = {}; + const auto ipAddressBinary = ipAddressBinaries->valueAt(row); + + if (ipAddressBinary.size() == kIPV4AddressBytes) { + addrBytes[kIPV4ToV6FFIndex] = 0xFF; + addrBytes[kIPV4ToV6FFIndex + 1] = 0xFF; + memcpy( + &addrBytes[kIPV4ToV6Index], + ipAddressBinary.data(), + kIPV4AddressBytes); + } else if (ipAddressBinary.size() == kIPAddressBytes) { + memcpy(&addrBytes, ipAddressBinary.data(), kIPAddressBytes); + } else { + if (threadSkipErrorDetails()) { + context.setStatus(row, Status::UserError()); + } else { + context.setStatus( + row, + Status::UserError( + "Invalid IP address binary length: {}", + ipAddressBinary.size())); + } + return; + } + + std::reverse(addrBytes.begin(), addrBytes.end()); + memcpy(&intAddr, &addrBytes, kIPAddressBytes); + flatResult->set(row, intAddr); + }); + } +}; + +class IPAddressTypeFactories : public CustomTypeFactories { + public: + IPAddressTypeFactories() = default; + + TypePtr getType() const override { + return IPADDRESS(); + } + + exec::CastOperatorPtr getCastOperator() const override { + return std::make_shared(); + } +}; + +} // namespace + +void registerIPAddressType() { + registerCustomType( + "ipaddress", std::make_unique()); +} + +} // namespace facebook::velox diff --git a/velox/functions/prestosql/types/IPAddressType.h b/velox/functions/prestosql/types/IPAddressType.h new file mode 100644 index 00000000000..e1e2d9fc1bf --- /dev/null +++ b/velox/functions/prestosql/types/IPAddressType.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "velox/type/SimpleFunctionApi.h" +#include "velox/type/Type.h" + +namespace facebook::velox { + +class IPAddressType : public HugeintType { + IPAddressType() = default; + + public: + static const std::shared_ptr& get() { + static const std::shared_ptr instance{ + new IPAddressType()}; + + return instance; + } + + bool equivalent(const Type& other) const override { + // Pointer comparison works since this type is a singleton. + return this == &other; + } + + const char* name() const override { + return "IPADDRESS"; + } + + std::string toString() const override { + return name(); + } + + folly::dynamic serialize() const override { + folly::dynamic obj = folly::dynamic::object; + obj["name"] = "Type"; + obj["type"] = name(); + return obj; + } +}; + +FOLLY_ALWAYS_INLINE bool isIPAddressType(const TypePtr& type) { + // Pointer comparison works since this type is a singleton. + return IPAddressType::get() == type; +} + +FOLLY_ALWAYS_INLINE std::shared_ptr IPADDRESS() { + return IPAddressType::get(); +} + +// Type used for function registration. +struct IPAddressT { + using type = int128_t; + static constexpr const char* typeName = "ipaddress"; +}; + +using IPAddress = CustomType; + +void registerIPAddressType(); + +} // namespace facebook::velox diff --git a/velox/functions/prestosql/types/tests/CMakeLists.txt b/velox/functions/prestosql/types/tests/CMakeLists.txt index dc52144e172..ede51b00b80 100644 --- a/velox/functions/prestosql/types/tests/CMakeLists.txt +++ b/velox/functions/prestosql/types/tests/CMakeLists.txt @@ -18,7 +18,8 @@ add_executable( JsonTypeTest.cpp TimestampWithTimeZoneTypeTest.cpp TypeTestBase.cpp - UuidTypeTest.cpp) + UuidTypeTest.cpp + IPAddressTypeTest.cpp) add_test(velox_presto_types_test velox_presto_types_test) diff --git a/velox/functions/prestosql/types/tests/IPAddressTypeTest.cpp b/velox/functions/prestosql/types/tests/IPAddressTypeTest.cpp new file mode 100644 index 00000000000..a24ce1fec3a --- /dev/null +++ b/velox/functions/prestosql/types/tests/IPAddressTypeTest.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/prestosql/types/IPAddressType.h" +#include "velox/functions/prestosql/types/tests/TypeTestBase.h" + +namespace facebook::velox::test { + +class IPAddressTypeTest : public testing::Test, public TypeTestBase { + public: + IPAddressTypeTest() { + registerIPAddressType(); + } +}; + +TEST_F(IPAddressTypeTest, basic) { + ASSERT_EQ(IPADDRESS()->name(), "IPADDRESS"); + ASSERT_EQ(IPADDRESS()->kindName(), "HUGEINT"); + ASSERT_TRUE(IPADDRESS()->parameters().empty()); + ASSERT_EQ(IPADDRESS()->toString(), "IPADDRESS"); + + ASSERT_TRUE(hasType("IPADDRESS")); + ASSERT_EQ(*getType("IPADDRESS", {}), *IPADDRESS()); +} + +TEST_F(IPAddressTypeTest, serde) { + testTypeSerde(IPADDRESS()); +} +} // namespace facebook::velox::test