Skip to content

Commit 0ef7351

Browse files
zanmato1984bkietz
andauthored
GH-41407: [C++] Use static method to fill scalar scratch space to prevent ub (#41421)
### Rationale for this change In #40237, I introduced scalar scratch space filling in concrete scalar sub-class constructor, in which there is a static down-casting of `this` to sub-class pointer. Though this is common in CRTP, it happens in base cast constructor. And this is reported in #41407 to be UB by UBSAN's "vptr" sanitizing. I'm not a language lawyer to tell if this is a true/false-positive. So I proposed two approaches: 1. The easy way: add suppression in [1], like we already did for `shared_ptr`. But apparently this won't be feasible if this is a true-positive (need some language lawyer's help to confirm). 2. The hard way: totally avoid this so-to-speak UB but may introduce more boilerplate code. This PR is the hard way. [1] https://github.com/apache/arrow/blob/main/r/tools/ubsan.supp ### What changes are included in this PR? Make `FillScratchSpace` static. ### Are these changes tested? The existing UT should cover it well. ### Are there any user-facing changes? None. * GitHub Issue: #41407 Lead-authored-by: Ruoxi Sun <[email protected]> Co-authored-by: Rossi Sun <[email protected]> Co-authored-by: Benjamin Kietzman <[email protected]> Signed-off-by: Benjamin Kietzman <[email protected]>
1 parent e22197f commit 0ef7351

File tree

2 files changed

+130
-55
lines changed

2 files changed

+130
-55
lines changed

cpp/src/arrow/scalar.cc

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -563,25 +563,28 @@ Status Scalar::ValidateFull() const {
563563
BaseBinaryScalar::BaseBinaryScalar(std::string s, std::shared_ptr<DataType> type)
564564
: BaseBinaryScalar(Buffer::FromString(std::move(s)), std::move(type)) {}
565565

566-
void BinaryScalar::FillScratchSpace() {
566+
void BinaryScalar::FillScratchSpace(uint8_t* scratch_space,
567+
const std::shared_ptr<Buffer>& value) {
567568
FillScalarScratchSpace(
568-
scratch_space_,
569+
scratch_space,
569570
{int32_t(0), value ? static_cast<int32_t>(value->size()) : int32_t(0)});
570571
}
571572

572-
void BinaryViewScalar::FillScratchSpace() {
573+
void BinaryViewScalar::FillScratchSpace(uint8_t* scratch_space,
574+
const std::shared_ptr<Buffer>& value) {
573575
static_assert(sizeof(BinaryViewType::c_type) <= internal::kScalarScratchSpaceSize);
574-
auto* view = new (&scratch_space_) BinaryViewType::c_type;
576+
auto* view = new (scratch_space) BinaryViewType::c_type;
575577
if (value) {
576578
*view = util::ToBinaryView(std::string_view{*value}, 0, 0);
577579
} else {
578580
*view = {};
579581
}
580582
}
581583

582-
void LargeBinaryScalar::FillScratchSpace() {
584+
void LargeBinaryScalar::FillScratchSpace(uint8_t* scratch_space,
585+
const std::shared_ptr<Buffer>& value) {
583586
FillScalarScratchSpace(
584-
scratch_space_,
587+
scratch_space,
585588
{int64_t(0), value ? static_cast<int64_t>(value->size()) : int64_t(0)});
586589
}
587590

@@ -612,36 +615,40 @@ BaseListScalar::BaseListScalar(std::shared_ptr<Array> value,
612615
}
613616

614617
ListScalar::ListScalar(std::shared_ptr<Array> value, bool is_valid)
615-
: BaseListScalar(value, list(value->type()), is_valid) {}
618+
: ListScalar(value, list(value->type()), is_valid) {}
616619

617-
void ListScalar::FillScratchSpace() {
620+
void ListScalar::FillScratchSpace(uint8_t* scratch_space,
621+
const std::shared_ptr<Array>& value) {
618622
FillScalarScratchSpace(
619-
scratch_space_,
623+
scratch_space,
620624
{int32_t(0), value ? static_cast<int32_t>(value->length()) : int32_t(0)});
621625
}
622626

623627
LargeListScalar::LargeListScalar(std::shared_ptr<Array> value, bool is_valid)
624-
: BaseListScalar(value, large_list(value->type()), is_valid) {}
628+
: LargeListScalar(value, large_list(value->type()), is_valid) {}
625629

626-
void LargeListScalar::FillScratchSpace() {
627-
FillScalarScratchSpace(scratch_space_,
630+
void LargeListScalar::FillScratchSpace(uint8_t* scratch_space,
631+
const std::shared_ptr<Array>& value) {
632+
FillScalarScratchSpace(scratch_space,
628633
{int64_t(0), value ? value->length() : int64_t(0)});
629634
}
630635

631636
ListViewScalar::ListViewScalar(std::shared_ptr<Array> value, bool is_valid)
632-
: BaseListScalar(value, list_view(value->type()), is_valid) {}
637+
: ListViewScalar(value, list_view(value->type()), is_valid) {}
633638

634-
void ListViewScalar::FillScratchSpace() {
639+
void ListViewScalar::FillScratchSpace(uint8_t* scratch_space,
640+
const std::shared_ptr<Array>& value) {
635641
FillScalarScratchSpace(
636-
scratch_space_,
642+
scratch_space,
637643
{int32_t(0), value ? static_cast<int32_t>(value->length()) : int32_t(0)});
638644
}
639645

640646
LargeListViewScalar::LargeListViewScalar(std::shared_ptr<Array> value, bool is_valid)
641-
: BaseListScalar(value, large_list_view(value->type()), is_valid) {}
647+
: LargeListViewScalar(value, large_list_view(value->type()), is_valid) {}
642648

643-
void LargeListViewScalar::FillScratchSpace() {
644-
FillScalarScratchSpace(scratch_space_,
649+
void LargeListViewScalar::FillScratchSpace(uint8_t* scratch_space,
650+
const std::shared_ptr<Array>& value) {
651+
FillScalarScratchSpace(scratch_space,
645652
{int64_t(0), value ? value->length() : int64_t(0)});
646653
}
647654

@@ -652,11 +659,12 @@ inline std::shared_ptr<DataType> MakeMapType(const std::shared_ptr<DataType>& pa
652659
}
653660

654661
MapScalar::MapScalar(std::shared_ptr<Array> value, bool is_valid)
655-
: BaseListScalar(value, MakeMapType(value->type()), is_valid) {}
662+
: MapScalar(value, MakeMapType(value->type()), is_valid) {}
656663

657-
void MapScalar::FillScratchSpace() {
664+
void MapScalar::FillScratchSpace(uint8_t* scratch_space,
665+
const std::shared_ptr<Array>& value) {
658666
FillScalarScratchSpace(
659-
scratch_space_,
667+
scratch_space,
660668
{int32_t(0), value ? static_cast<int32_t>(value->length()) : int32_t(0)});
661669
}
662670

@@ -705,7 +713,9 @@ Result<std::shared_ptr<Scalar>> StructScalar::field(FieldRef ref) const {
705713

706714
RunEndEncodedScalar::RunEndEncodedScalar(std::shared_ptr<Scalar> value,
707715
std::shared_ptr<DataType> type)
708-
: Scalar{std::move(type), value->is_valid}, value{std::move(value)} {
716+
: Scalar{std::move(type), value->is_valid},
717+
ArraySpanFillFromScalarScratchSpace(*this->type),
718+
value{std::move(value)} {
709719
ARROW_CHECK_EQ(this->type->id(), Type::RUN_END_ENCODED);
710720
}
711721

@@ -716,18 +726,18 @@ RunEndEncodedScalar::RunEndEncodedScalar(const std::shared_ptr<DataType>& type)
716726

717727
RunEndEncodedScalar::~RunEndEncodedScalar() = default;
718728

719-
void RunEndEncodedScalar::FillScratchSpace() {
720-
auto run_end = run_end_type()->id();
729+
void RunEndEncodedScalar::FillScratchSpace(uint8_t* scratch_space, const DataType& type) {
730+
Type::type run_end = checked_cast<const RunEndEncodedType&>(type).run_end_type()->id();
721731
switch (run_end) {
722732
case Type::INT16:
723-
FillScalarScratchSpace(scratch_space_, {int16_t(1)});
733+
FillScalarScratchSpace(scratch_space, {int16_t(1)});
724734
break;
725735
case Type::INT32:
726-
FillScalarScratchSpace(scratch_space_, {int32_t(1)});
736+
FillScalarScratchSpace(scratch_space, {int32_t(1)});
727737
break;
728738
default:
729739
DCHECK_EQ(run_end, Type::INT64);
730-
FillScalarScratchSpace(scratch_space_, {int64_t(1)});
740+
FillScalarScratchSpace(scratch_space, {int64_t(1)});
731741
}
732742
}
733743

@@ -806,6 +816,7 @@ Result<TimestampScalar> TimestampScalar::FromISO8601(std::string_view iso8601,
806816
SparseUnionScalar::SparseUnionScalar(ValueType value, int8_t type_code,
807817
std::shared_ptr<DataType> type)
808818
: UnionScalar(std::move(type), type_code, /*is_valid=*/true),
819+
ArraySpanFillFromScalarScratchSpace(type_code),
809820
value(std::move(value)) {
810821
const auto child_ids = checked_cast<const SparseUnionType&>(*this->type).child_ids();
811822
if (type_code >= 0 && static_cast<size_t>(type_code) < child_ids.size() &&
@@ -833,13 +844,13 @@ std::shared_ptr<Scalar> SparseUnionScalar::FromValue(std::shared_ptr<Scalar> val
833844
return std::make_shared<SparseUnionScalar>(field_values, type_code, std::move(type));
834845
}
835846

836-
void SparseUnionScalar::FillScratchSpace() {
837-
auto* union_scratch_space = reinterpret_cast<UnionScratchSpace*>(&scratch_space_);
847+
void SparseUnionScalar::FillScratchSpace(uint8_t* scratch_space, int8_t type_code) {
848+
auto* union_scratch_space = reinterpret_cast<UnionScratchSpace*>(scratch_space);
838849
union_scratch_space->type_code = type_code;
839850
}
840851

841-
void DenseUnionScalar::FillScratchSpace() {
842-
auto* union_scratch_space = reinterpret_cast<UnionScratchSpace*>(&scratch_space_);
852+
void DenseUnionScalar::FillScratchSpace(uint8_t* scratch_space, int8_t type_code) {
853+
auto* union_scratch_space = reinterpret_cast<UnionScratchSpace*>(scratch_space);
843854
union_scratch_space->type_code = type_code;
844855
FillScalarScratchSpace(union_scratch_space->offsets, {int32_t(0), int32_t(1)});
845856
}

0 commit comments

Comments
 (0)