Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions pyvelox/pyvelox.h
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,7 @@ static void addVectorBindings(
TypeKind::REAL,
TypeKind::DOUBLE,
TypeKind::VARBINARY,
TypeKind::TIMESTAMP,
TypeKind::DATE};
TypeKind::TIMESTAMP};

for (int i = 0; i < sizeof(supportedTypes) / sizeof(supportedTypes[0]); i++) {
VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(
Expand Down
7 changes: 0 additions & 7 deletions velox/common/memory/ByteStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,13 +393,6 @@ inline int128_t ByteStream::read<int128_t>() {
return value;
}

template <>
inline Date ByteStream::read<Date>() {
Date value;
readBytes(reinterpret_cast<uint8_t*>(&value), sizeof(value));
return value;
}

class IOBufOutputStream : public OutputStream {
public:
explicit IOBufOutputStream(
Expand Down
10 changes: 0 additions & 10 deletions velox/connectors/hive/HivePartitionFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,6 @@ void hashTyped<TypeKind::TIMESTAMP>(
abstractHashTyped<Timestamp>(values, size, mix, hashTimestamp, hashes);
}

template <>
void hashTyped<TypeKind::DATE>(
const DecodedVector& values,
vector_size_t size,
bool mix,
std::vector<uint32_t>& hashes) {
auto hashDate = [](const Date& value) { return value.days(); };
abstractHashTyped<Date>(values, size, mix, hashDate, hashes);
}

void hash(
const DecodedVector& values,
TypeKind typeKind,
Expand Down
28 changes: 15 additions & 13 deletions velox/connectors/hive/HivePartitionUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@ namespace facebook::velox::connector::hive {
case TypeKind::BIGINT: \
case TypeKind::VARCHAR: \
case TypeKind::VARBINARY: \
case TypeKind::DATE: { \
return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( \
TEMPLATE_FUNC, typeKind, __VA_ARGS__); \
} \
default: \
VELOX_UNSUPPORTED( \
"Unsupported partition type: {}", mapTypeKindToName(typeKind)); \
Expand All @@ -49,11 +47,6 @@ inline std::string makePartitionValueString(bool value) {
return value ? "true" : "false";
}

template <>
inline std::string makePartitionValueString(Date value) {
return value.toString();
}

template <TypeKind Kind>
std::pair<std::string, std::string> makePartitionKeyValueString(
const BaseVector* partitionVector,
Expand All @@ -73,12 +66,21 @@ std::vector<std::pair<std::string, std::string>> extractPartitionKeyValues(
vector_size_t row) {
std::vector<std::pair<std::string, std::string>> partitionKeyValues;
for (auto i = 0; i < partitionsVector->childrenSize(); i++) {
partitionKeyValues.push_back(PARTITION_TYPE_DISPATCH(
makePartitionKeyValueString,
partitionsVector->childAt(i)->typeKind(),
partitionsVector->childAt(i)->loadedVector(),
row,
asRowType(partitionsVector->type())->nameOf(i)));
if (partitionsVector->childAt(i)->type()->isDate()) {
auto partitionVector = partitionsVector->childAt(i)->loadedVector();
auto partitionName = asRowType(partitionsVector->type())->nameOf(i);
partitionKeyValues.push_back(
{partitionName,
DATE()->toString(
partitionVector->as<SimpleVector<int32_t>>()->valueAt(row))});
} else {
partitionKeyValues.push_back(PARTITION_TYPE_DISPATCH(
makePartitionKeyValueString,
partitionsVector->childAt(i)->typeKind(),
partitionsVector->childAt(i)->loadedVector(),
row,
asRowType(partitionsVector->type())->nameOf(i)));
}
}
return partitionKeyValues;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ constexpr std::array<TypeKind, 10> kSupportedTypes{
TypeKind::REAL,
TypeKind::DOUBLE,
TypeKind::VARCHAR,
TypeKind::TIMESTAMP,
TypeKind::DATE};
TypeKind::TIMESTAMP};

class HivePartitionFunctionBenchmark
: public functions::test::FunctionBenchmarkBase {
Expand Down Expand Up @@ -265,23 +264,6 @@ BENCHMARK_RELATIVE(timestampManyRowsManyBuckets) {
}

BENCHMARK_DRAW_LINE();

BENCHMARK(dateFewRowsFewBuckets) {
benchmarkFew->runFew<TypeKind::DATE>();
}

BENCHMARK_RELATIVE(dateFewRowsManyBuckets) {
benchmarkFew->runMany<TypeKind::DATE>();
}

BENCHMARK(dateManyRowsFewBuckets) {
benchmarkMany->runFew<TypeKind::DATE>();
}

BENCHMARK_RELATIVE(dateManyRowsManyBuckets) {
benchmarkMany->runMany<TypeKind::DATE>();
}

} // namespace

int main(int argc, char** argv) {
Expand Down
9 changes: 5 additions & 4 deletions velox/connectors/hive/tests/HivePartitionFunctionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,12 @@ TEST_F(HivePartitionFunctionTest, timestamp) {
}

TEST_F(HivePartitionFunctionTest, date) {
auto values = makeNullableFlatVector<Date>(
auto values = makeNullableFlatVector<int32_t>(
{std::nullopt,
Date(2'000'000'000),
Date(std::numeric_limits<int32_t>::min()),
Date(std::numeric_limits<int32_t>::max())});
2'000'000'000,
std::numeric_limits<int32_t>::min(),
std::numeric_limits<int32_t>::max()},
DATE());

assertPartitions(values, 1, {0, 0, 0, 0});
assertPartitions(values, 2, {0, 0, 0, 1});
Expand Down
2 changes: 1 addition & 1 deletion velox/connectors/hive/tests/HivePartitionUtilTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ TEST_F(HivePartitionUtilTest, partitionName) {
makeFlatVector<int32_t>(std::vector<int32_t>{1000}),
makeFlatVector<int64_t>(std::vector<int64_t>{10000}),
makeDictionary<StringView>(std::vector<StringView>{"str1000"}),
makeConstant<Date>(Date(10000), 1)});
makeConstant<int32_t>(10000, 1, DATE())});

std::vector<std::string> expectedPartitionKeyValues{
"flat_bool_col=false",
Expand Down
8 changes: 4 additions & 4 deletions velox/connectors/hive/tests/PartitionIdGeneratorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ TEST_F(PartitionIdGeneratorTest, consecutiveIdsSingleKey) {
auto input = makeRowVector(
{makeFlatVector<StringView>(numPartitions * 3, [&](auto row) {
return StringView::makeInline(
Date(18000 + row % numPartitions).toString());
DATE()->toString(18000 + row % numPartitions));
})});

raw_vector<uint64_t> ids;
Expand All @@ -56,7 +56,7 @@ TEST_F(PartitionIdGeneratorTest, consecutiveIdsMultipleKeys) {
makeFlatVector<StringView>(
1'000,
[&](auto row) {
return StringView::makeInline(Date(18000 + row % 5).toString());
return StringView::makeInline(DATE()->toString(18000 + row % 5));
}),
makeFlatVector<int32_t>(1'000, [&](auto row) { return row % 17; }),
});
Expand Down Expand Up @@ -111,7 +111,7 @@ TEST_F(PartitionIdGeneratorTest, stableIdsMultipleKeys) {
makeFlatVector<StringView>(
size,
[](auto row) {
return StringView::makeInline(Date(18000 + row % 3).toString());
return StringView::makeInline(DATE()->toString(18000 + row % 3));
}),
makeFlatVector<int32_t>(size, [](auto row) { return row % 7; }),
});
Expand All @@ -124,7 +124,7 @@ TEST_F(PartitionIdGeneratorTest, stableIdsMultipleKeys) {
makeFlatVector<StringView>(
size,
[](auto row) {
return StringView::makeInline(Date(18000 + row % 5).toString());
return StringView::makeInline(DATE()->toString(18000 + row % 5));
}),
makeFlatVector<int32_t>(size, [](auto row) { return row % 17; }),
});
Expand Down
4 changes: 2 additions & 2 deletions velox/connectors/tpch/tests/TpchConnectorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ TEST_F(TpchConnectorTest, orderDateCount) {
.planNode();

auto output = getResults(plan, {makeTpchSplit()});
auto orderDate = output->childAt(0)->asFlatVector<Date>();
EXPECT_EQ("1992-01-01", orderDate->valueAt(0).toString());
auto orderDate = output->childAt(0)->asFlatVector<int32_t>();
EXPECT_EQ("1992-01-01", DATE()->toString(orderDate->valueAt(0)));
// Match with count obtained from Java.
EXPECT_EQ(9, orderDate->size());
}
Expand Down
5 changes: 1 addition & 4 deletions velox/docs/develop/types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ SMALLINT int16_t 2
INTEGER int32_t 4
BIGINT int64_t 8
HUGEINT int128_t 16
DATE struct Date 8
REAL float 4
DOUBLE double 8
TIMESTAMP struct Timestamp 16
Expand Down Expand Up @@ -84,14 +83,12 @@ their corresponding physical type.
====================== ======================================================
Logical Type Physical Type
====================== ======================================================
DATE INTEGER
DECIMAL BIGINT if precision <= 18, HUGEINT if precision >= 19
INTERVAL DAY TO SECOND BIGINT
INTERVAL YEAR TO MONTH INTEGER
====================== ======================================================

We are in the process of migrating (:pr:`4744`) DATE type to a logical type backed
by BIGINT.

DECIMAL type carries additional `precision`,
and `scale` information. `Precision` is the number of
digits in a number. `Scale` is the number of digits to the right of the decimal
Expand Down
8 changes: 5 additions & 3 deletions velox/duckdb/conversion/DuckConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ LogicalType fromVeloxType(const TypePtr& type) {
auto [precision, scale] = getDecimalPrecisionScale(*type);
return LogicalType::DECIMAL(precision, scale);
}

switch (type->kind()) {
case TypeKind::BOOLEAN:
return LogicalType::BOOLEAN;
Expand All @@ -70,6 +71,9 @@ LogicalType fromVeloxType(const TypePtr& type) {
if (type->isIntervalYearMonth()) {
return LogicalType::INTERVAL;
}
if (type->isDate()) {
return LogicalType::DATE;
}
return LogicalType::INTEGER;
case TypeKind::BIGINT:
if (type->isIntervalDayTime()) {
Expand All @@ -84,8 +88,6 @@ LogicalType fromVeloxType(const TypePtr& type) {
return LogicalType::VARCHAR;
case TypeKind::TIMESTAMP:
return LogicalType::TIMESTAMP;
case TypeKind::DATE:
return LogicalType::DATE;
case TypeKind::ARRAY:
return LogicalType::LIST(fromVeloxType(type->childAt(0)));
case TypeKind::MAP:
Expand Down Expand Up @@ -207,7 +209,7 @@ variant duckValueToVariant(const Value& val) {
case LogicalTypeId::BLOB:
return variant::binary(val.GetValue<std::string>());
case LogicalTypeId::DATE:
return variant::date(val.GetValue<::duckdb::date_t>().days);
return variant(val.GetValue<::duckdb::date_t>().days);
default:
throw std::runtime_error(
"unsupported type for duckdb value -> velox variant conversion: " +
Expand Down
10 changes: 5 additions & 5 deletions velox/duckdb/conversion/DuckConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,15 +198,15 @@ struct DuckTimestampConversion {

struct DuckDateConversion {
typedef ::duckdb::date_t DUCK_TYPE;
typedef Date VELOX_TYPE;
typedef int32_t VELOX_TYPE;

static ::duckdb::date_t toDuck(
const Date& input,
const int32_t& input,
::duckdb::Vector& /* unused */) {
return ::duckdb::Date::EpochDaysToDate(input.days());
return ::duckdb::Date::EpochDaysToDate(input);
}
static Date toVelox(const ::duckdb::date_t& input) {
return Date(::duckdb::Date::EpochDays(input));
static int32_t toVelox(const ::duckdb::date_t& input) {
return ::duckdb::Date::EpochDays(input);
}
};

Expand Down
9 changes: 4 additions & 5 deletions velox/duckdb/conversion/tests/DuckWrapperTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ TEST_F(BaseDuckWrapperTest, scalarTypes) {
verifyUnaryResult<double>("SELECT 1::DOUBLE", {1.0});

// date/timestamp
verifyUnaryResult<Date>("SELECT DATE '1992-01-01'", {Date(8035)});
verifyUnaryResult<int32_t>("SELECT DATE '1992-01-01'", {8035});
verifyUnaryResult<Timestamp>(
"SELECT TIMESTAMP '1992-01-01 13:04:20'", {Timestamp(694271060, 0)});

Expand Down Expand Up @@ -189,9 +189,9 @@ TEST_F(BaseDuckWrapperTest, types) {
{false, false, false, true});

// date/timestamp
verifyUnaryResult<Date>(
verifyUnaryResult<int32_t>(
"SELECT i FROM (VALUES (DATE '1992-01-01'), (NULL)) tbl(i)",
{Date(8035), Date(0)},
{8035, 0},
{false, true});
verifyUnaryResult<Timestamp>(
"SELECT i FROM (VALUES (TIMESTAMP '1992-01-01 13:04:20'), (NULL)) tbl(i)",
Expand Down Expand Up @@ -220,8 +220,7 @@ TEST_F(BaseDuckWrapperTest, tpchSF1) {
execute("CALL dbgen(sf=0.01)");
// test conversion of date, decimal and string
verifyUnaryResult<int64_t>("SELECT l_discount FROM lineitem LIMIT 1", {4});
verifyUnaryResult<Date>(
"SELECT l_shipdate FROM lineitem LIMIT 1", {Date(9568)});
verifyUnaryResult<int32_t>("SELECT l_shipdate FROM lineitem LIMIT 1", {9568});
verifyUnaryResult<StringView>(
"SELECT l_comment FROM lineitem LIMIT 1",
{StringView("egular courts above the")});
Expand Down
3 changes: 0 additions & 3 deletions velox/dwio/common/SelectiveColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,6 @@ void SelectiveColumnReader::getIntValues(
VELOX_FAIL("Unsupported value size: {}", valueSize_);
}
break;
case TypeKind::DATE:
getFlatValues<Date, Date>(rows, result, requestedType);
break;
case TypeKind::HUGEINT:
getFlatValues<int128_t, int128_t>(rows, result, requestedType);
break;
Expand Down
15 changes: 0 additions & 15 deletions velox/dwio/common/tests/utils/BatchMaker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,21 +279,6 @@ VectorPtr BatchMaker::createVector<TypeKind::TIMESTAMP>(
isNullAt);
}

template <>
VectorPtr BatchMaker::createVector<TypeKind::DATE>(
const std::shared_ptr<const Type>& /* unused */,
size_t size,
MemoryPool& pool,
std::mt19937& gen,
std::function<bool(vector_size_t /*index*/)> isNullAt) {
return createScalar<Date>(
size,
gen,
[&gen]() { return Date(Random::rand32(gen)); },
pool,
isNullAt);
}

template <>
VectorPtr BatchMaker::createVector<TypeKind::ROW>(
const std::shared_ptr<const Type>& type,
Expand Down
8 changes: 0 additions & 8 deletions velox/dwio/common/tests/utils/FilterGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,6 @@ VectorPtr getChildBySubfield(

uint32_t AbstractColumnStats::counter_ = 0;

template <>
int64_t ColumnStats<Date>::getIntegerValue(const Date& value) {
return value.days();
}

template <>
std::unique_ptr<Filter> ColumnStats<bool>::makeRangeFilter(
const FilterSpec& filterSpec) {
Expand Down Expand Up @@ -413,9 +408,6 @@ SubfieldFilters FilterGenerator::makeSubfieldFilters(
case TypeKind::BIGINT:
stats = makeStats<TypeKind::BIGINT>(vector->type(), rowType_);
break;
case TypeKind::DATE:
stats = makeStats<TypeKind::DATE>(vector->type(), rowType_);
break;
case TypeKind::VARCHAR:
stats = makeStats<TypeKind::VARCHAR>(vector->type(), rowType_);
break;
Expand Down
3 changes: 2 additions & 1 deletion velox/dwio/dwrf/common/FileMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,9 @@ TypeKind TypeWrapper::kind() const {
return TypeKind::ROW;
case proto::orc::Type_Kind_VARCHAR:
return TypeKind::VARCHAR;
// Date is a logical type of INTEGER (for the number of days since EPOCH).
case proto::orc::Type_Kind_DATE:
return TypeKind::DATE;
return TypeKind::INTEGER;
case proto::orc::Type_Kind_DECIMAL:
case proto::orc::Type_Kind_CHAR:
case proto::orc::Type_Kind_TIMESTAMP_INSTANT:
Expand Down
8 changes: 0 additions & 8 deletions velox/dwio/dwrf/reader/ColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2380,14 +2380,6 @@ std::unique_ptr<ColumnReader> ColumnReader::build(
case TypeKind::TIMESTAMP:
return std::make_unique<TimestampColumnReader>(
dataType, stripe, streamLabels, std::move(flatMapContext));
case TypeKind::DATE:
return std::make_unique<IntegerDirectColumnReader<Date>>(
dataType,
requestedType->type,
stripe,
streamLabels,
dwio::common::INT_BYTE_SIZE,
std::move(flatMapContext));
default:
DWIO_RAISE("buildReader unhandled type");
}
Expand Down
Loading