diff --git a/internal/core/src/cachinglayer/Translator.h b/internal/core/src/cachinglayer/Translator.h index d0236aa4633e8..d9675129a689f 100644 --- a/internal/core/src/cachinglayer/Translator.h +++ b/internal/core/src/cachinglayer/Translator.h @@ -27,8 +27,8 @@ struct Meta { CacheWarmupPolicy cache_warmup_policy; bool support_eviction; explicit Meta(StorageType storage_type, - CacheWarmupPolicy cache_warmup_policy, - bool support_eviction) + CacheWarmupPolicy cache_warmup_policy, + bool support_eviction) : storage_type(storage_type), cache_warmup_policy(cache_warmup_policy), support_eviction(support_eviction) { diff --git a/internal/core/src/exec/expression/JsonContainsExpr.cpp b/internal/core/src/exec/expression/JsonContainsExpr.cpp index fb054a9c9f294..6041212016879 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.cpp +++ b/internal/core/src/exec/expression/JsonContainsExpr.cpp @@ -25,6 +25,31 @@ void PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { auto input = context.get_offset_input(); SetHasOffsetInput((input != nullptr)); + + if (expr_->vals_.empty()) { + auto next_batch_size = GetNextBatchSize(); + auto real_batch_size = has_offset_input_ + ? context.get_offset_input()->size() + : next_batch_size; + if (real_batch_size == 0) { + result = nullptr; + return; + } + auto res_vec = + std::make_shared(TargetBitmap(real_batch_size, false), + TargetBitmap(real_batch_size, true)); + + TargetBitmapView res(res_vec->GetRawData(), real_batch_size); + TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); + + res.set(); + valid_res.set(); + + result = res_vec; + current_data_chunk_pos_ += real_batch_size; + return; + } + switch (expr_->column_.data_type_) { case DataType::ARRAY: { if (is_index_mode_ && !has_offset_input_) { diff --git a/internal/core/src/segcore/storagev1translator/ChunkTranslator.cpp b/internal/core/src/segcore/storagev1translator/ChunkTranslator.cpp index 904237e88fa01..b37215595c756 100644 --- a/internal/core/src/segcore/storagev1translator/ChunkTranslator.cpp +++ b/internal/core/src/segcore/storagev1translator/ChunkTranslator.cpp @@ -107,19 +107,21 @@ ChunkTranslator::load_chunk(milvus::cachinglayer::cid_t cid) { while (channel->pop(r)) { arrow::ArrayVector array_vec = read_single_column_batches(r->reader); - auto chunk = create_chunk(field_meta_, - IsVectorDataType(data_type) && - !IsSparseFloatVectorDataType(data_type) - ? field_meta_.get_dim() - : 1, - file, - /*file_offset*/ 0, - array_vec); + auto chunk = + create_chunk(field_meta_, + IsVectorDataType(data_type) && + !IsSparseFloatVectorDataType(data_type) + ? field_meta_.get_dim() + : 1, + file, + /*file_offset*/ 0, + array_vec); auto ok = unlink(filepath.c_str()); - AssertInfo(ok == 0, - fmt::format("failed to unlink mmap data file {}, err: {}", - filepath.c_str(), - strerror(errno))); + AssertInfo( + ok == 0, + fmt::format("failed to unlink mmap data file {}, err: {}", + filepath.c_str(), + strerror(errno))); return chunk; } } diff --git a/internal/core/unittest/test_array_expr.cpp b/internal/core/unittest/test_array_expr.cpp index ac4debfd7b5c4..bd29b23c4c102 100644 --- a/internal/core/unittest/test_array_expr.cpp +++ b/internal/core/unittest/test_array_expr.cpp @@ -1430,6 +1430,73 @@ TEST(Expr, TestArrayContains) { } } +TEST(Expr, TestArrayContainsEmptyValues) { + auto schema = std::make_shared(); + auto int_array_fid = + schema->AddDebugField("int_array", DataType::ARRAY, DataType::INT8); + auto long_array_fid = + schema->AddDebugField("long_array", DataType::ARRAY, DataType::INT64); + auto bool_array_fid = + schema->AddDebugField("bool_array", DataType::ARRAY, DataType::BOOL); + auto float_array_fid = + schema->AddDebugField("float_array", DataType::ARRAY, DataType::FLOAT); + auto double_array_fid = schema->AddDebugField( + "double_array", DataType::ARRAY, DataType::DOUBLE); + auto string_array_fid = schema->AddDebugField( + "string_array", DataType::ARRAY, DataType::VARCHAR); + schema->set_primary_field_id(schema->AddDebugField("id", DataType::INT64)); + std::vector fields = { + int_array_fid, + long_array_fid, + bool_array_fid, + float_array_fid, + double_array_fid, + string_array_fid, + }; + + auto dummy_seg = CreateGrowingSegment(schema, empty_index_meta); + + int N = 1000; + std::vector age_col; + int num_iters = 100; + for (int iter = 0; iter < num_iters; ++iter) { + auto raw_data = DataGen(schema, N, iter); + dummy_seg->PreInsert(N); + dummy_seg->Insert(iter * N, + N, + raw_data.row_ids_.data(), + raw_data.timestamps_.data(), + raw_data.raw_); + } + + auto seg_promote = dynamic_cast(dummy_seg.get()); + std::vector empty_values; + + for (auto field_id : fields) { + auto start = std::chrono::steady_clock::now(); + auto expr = std::make_shared( + expr::ColumnInfo(field_id, DataType::ARRAY), + proto::plan::JSONContainsExpr_JSONOp_ContainsAny, + true, + empty_values); + + BitsetType final; + auto plan = + std::make_shared(DEFAULT_PLANNODE_ID, expr); + final = + ExecuteQueryExpr(plan, seg_promote, N * num_iters, MAX_TIMESTAMP); + std::cout << "cost" + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - start) + .count() + << std::endl; + EXPECT_EQ(final.size(), N * num_iters); + for (int i = 0; i < N * num_iters; ++i) { + ASSERT_EQ(final[i], true); + } + } +} + TEST(Expr, TestArrayBinaryArith) { auto schema = std::make_shared(); auto i64_fid = schema->AddDebugField("id", DataType::INT64); diff --git a/internal/core/unittest/test_array_inverted_index.cpp b/internal/core/unittest/test_array_inverted_index.cpp index c524b4547f784..32d544f3003cd 100644 --- a/internal/core/unittest/test_array_inverted_index.cpp +++ b/internal/core/unittest/test_array_inverted_index.cpp @@ -168,6 +168,10 @@ TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAny) { auto ref = [this, &elems](size_t offset) -> bool { std::unordered_set row(this->vec_of_array_[offset].begin(), this->vec_of_array_[offset].end()); + if (elems.empty()) { + return true; + } + for (const auto& elem : elems) { if (row.find(elem) != row.end()) { return true; @@ -216,6 +220,10 @@ TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAll) { auto ref = [this, &elems](size_t offset) -> bool { std::unordered_set row(this->vec_of_array_[offset].begin(), this->vec_of_array_[offset].end()); + if (elems.empty()) { + return true; + } + for (const auto& elem : elems) { if (row.find(elem) == row.end()) { return false;