Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ etcd-client = "0.14"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "103948cbce833e1a17ee7083f5ba79564d08d6ec" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", branch = "zhongzc/alter-fulltext-backend" }
# greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "103948cbce833e1a17ee7083f5ba79564d08d6ec" }
hex = "0.4"
http = "1"
humantime = "2.1"
Expand Down
27 changes: 20 additions & 7 deletions src/api/src/v1/column_def.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@
use std::collections::HashMap;

use datatypes::schema::{
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, SkippingIndexOptions,
SkippingIndexType, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions,
SkippingIndexOptions, SkippingIndexType, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
SKIPPING_INDEX_KEY,
};
use greptime_proto::v1::{
Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
};
use greptime_proto::v1::{Analyzer, SkippingIndexType as PbSkippingIndexType};
use snafu::ResultExt;

use crate::error::{self, Result};
Expand Down Expand Up @@ -142,13 +145,21 @@ pub fn options_from_inverted() -> ColumnOptions {
}

/// Tries to construct a `FulltextAnalyzer` from the given analyzer.
pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer {
pub fn as_fulltext_option_analyzer(analyzer: Analyzer) -> FulltextAnalyzer {
match analyzer {
Analyzer::English => FulltextAnalyzer::English,
Analyzer::Chinese => FulltextAnalyzer::Chinese,
}
}

/// Tries to construct a `FulltextBackend` from the given backend.
pub fn as_fulltext_option_backend(backend: PbFulltextBackend) -> FulltextBackend {
match backend {
PbFulltextBackend::Bloom => FulltextBackend::Bloom,
PbFulltextBackend::Tantivy => FulltextBackend::Tantivy,
}
}

/// Tries to construct a `SkippingIndexType` from the given skipping index type.
pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> SkippingIndexType {
match skipping_index_type {
Expand All @@ -160,7 +171,7 @@ pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> Skipp
mod tests {

use datatypes::data_type::ConcreteDataType;
use datatypes::schema::FulltextAnalyzer;
use datatypes::schema::{FulltextAnalyzer, FulltextBackend};

use super::*;
use crate::v1::ColumnDataType;
Expand Down Expand Up @@ -219,13 +230,14 @@ mod tests {
enable: true,
analyzer: FulltextAnalyzer::English,
case_sensitive: false,
backend: FulltextBackend::Bloom,
})
.unwrap();
schema.set_inverted_index(true);
let options = options_from_column_schema(&schema).unwrap();
assert_eq!(
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}"
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}"
);
assert_eq!(
options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(),
Expand All @@ -239,11 +251,12 @@ mod tests {
enable: true,
analyzer: FulltextAnalyzer::English,
case_sensitive: false,
backend: FulltextBackend::Bloom,
};
let options = options_from_fulltext(&fulltext).unwrap().unwrap();
assert_eq!(
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}"
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}"
);
}

Expand Down
14 changes: 10 additions & 4 deletions src/common/grpc-expr/src/alter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@
use api::helper::ColumnDataTypeWrapper;
use api::v1::add_column_location::LocationType;
use api::v1::alter_table_expr::Kind;
use api::v1::column_def::{as_fulltext_option, as_skipping_index_type};
use api::v1::column_def::{
as_fulltext_option_analyzer, as_fulltext_option_backend, as_skipping_index_type,
};
use api::v1::{
column_def, AddColumnLocation as Location, AlterTableExpr, Analyzer, CreateTableExpr,
DropColumns, ModifyColumnTypes, RenameTable, SemanticType,
SkippingIndexType as PbSkippingIndexType,
DropColumns, FulltextBackend as PbFulltextBackend, ModifyColumnTypes, RenameTable,
SemanticType, SkippingIndexType as PbSkippingIndexType,
};
use common_query::AddColumnLocation;
use datatypes::schema::{ColumnSchema, FulltextOptions, RawSchema, SkippingIndexOptions};
Expand Down Expand Up @@ -126,11 +128,15 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
column_name: f.column_name.clone(),
options: FulltextOptions {
enable: f.enable,
analyzer: as_fulltext_option(
analyzer: as_fulltext_option_analyzer(
Analyzer::try_from(f.analyzer)
.context(InvalidSetFulltextOptionRequestSnafu)?,
),
case_sensitive: f.case_sensitive,
backend: as_fulltext_option_backend(
PbFulltextBackend::try_from(f.backend)
.context(InvalidSetFulltextOptionRequestSnafu)?,
),
},
},
},
Expand Down
5 changes: 3 additions & 2 deletions src/datatypes/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ use snafu::{ensure, ResultExt};
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
use crate::prelude::ConcreteDataType;
pub use crate::schema::column_schema::{
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions,
SkippingIndexType, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions, Metadata,
SkippingIndexOptions, SkippingIndexType, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE,
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
SKIPPING_INDEX_KEY, TIME_INDEX_KEY,
Expand Down
36 changes: 36 additions & 0 deletions src/datatypes/src/schema/column_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";
pub const COLUMN_FULLTEXT_OPT_KEY_BACKEND: &str = "backend";

/// Keys used in SKIPPING index options
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
Expand Down Expand Up @@ -514,6 +515,9 @@ pub struct FulltextOptions {
/// Whether the fulltext index is case-sensitive.
#[serde(default)]
pub case_sensitive: bool,
/// The fulltext backend to use.
#[serde(default)]
pub backend: FulltextBackend,
}

impl fmt::Display for FulltextOptions {
Expand All @@ -522,11 +526,30 @@ impl fmt::Display for FulltextOptions {
if self.enable {
write!(f, ", analyzer={}", self.analyzer)?;
write!(f, ", case_sensitive={}", self.case_sensitive)?;
write!(f, ", backend={}", self.backend)?;
}
Ok(())
}
}

/// The backend of the fulltext index.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
#[serde(rename_all = "kebab-case")]
pub enum FulltextBackend {
Comment thread
killme2008 marked this conversation as resolved.
#[default]
Tantivy,
Bloom, // TODO(zhongzc): when bloom is ready, use it as default
}

impl fmt::Display for FulltextBackend {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
FulltextBackend::Tantivy => write!(f, "tantivy"),
FulltextBackend::Bloom => write!(f, "bloom"),
}
}
}

impl TryFrom<HashMap<String, String>> for FulltextOptions {
type Error = Error;

Expand Down Expand Up @@ -575,6 +598,19 @@ impl TryFrom<HashMap<String, String>> for FulltextOptions {
}
}

if let Some(backend) = options.get(COLUMN_FULLTEXT_OPT_KEY_BACKEND) {
match backend.to_ascii_lowercase().as_str() {
"bloom" => fulltext_options.backend = FulltextBackend::Bloom,
"tantivy" => fulltext_options.backend = FulltextBackend::Tantivy,
_ => {
return InvalidFulltextOptionSnafu {
msg: format!("{backend}, expected: 'bloom' | 'tantivy'"),
}
.fail();
}
}
}

Ok(fulltext_options)
}
}
Expand Down
17 changes: 15 additions & 2 deletions src/index/src/fulltext_index/create/bloom_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::HashMap;
use std::sync::atomic::AtomicUsize;
use std::sync::Arc;

Expand All @@ -26,16 +27,20 @@ use crate::external_provider::ExternalTempFileProvider;
use crate::fulltext_index::create::FulltextIndexCreator;
use crate::fulltext_index::error::{
AbortedSnafu, BiErrorsSnafu, BloomFilterFinishSnafu, ExternalSnafu, PuffinAddBlobSnafu, Result,
SerializeToJsonSnafu,
};
use crate::fulltext_index::tokenizer::{Analyzer, ChineseTokenizer, EnglishTokenizer};
use crate::fulltext_index::Config;

const PIPE_BUFFER_SIZE_FOR_SENDING_BLOB: usize = 8192;

pub const KEY_FULLTEXT_CONFIG: &str = "fulltext_config";

/// `BloomFilterFulltextIndexCreator` is for creating a fulltext index using a bloom filter.
pub struct BloomFilterFulltextIndexCreator {
inner: Option<BloomFilterCreator>,
analyzer: Analyzer,
config: Config,
}

impl BloomFilterFulltextIndexCreator {
Expand All @@ -61,6 +66,7 @@ impl BloomFilterFulltextIndexCreator {
Self {
inner: Some(inner),
analyzer,
config,
}
}
}
Expand Down Expand Up @@ -89,10 +95,17 @@ impl FulltextIndexCreator for BloomFilterFulltextIndexCreator {

let (tx, rx) = tokio::io::duplex(PIPE_BUFFER_SIZE_FOR_SENDING_BLOB);

let property_key = KEY_FULLTEXT_CONFIG.to_string();
let property_value = serde_json::to_string(&self.config).context(SerializeToJsonSnafu)?;

let (index_finish, puffin_add_blob) = futures::join!(
creator.finish(tx.compat_write()),
// TODO(zhongzc): add fulltext config properties
puffin_writer.put_blob(blob_key, rx.compat(), put_options, Default::default())
puffin_writer.put_blob(
blob_key,
rx.compat(),
put_options,
HashMap::from([(property_key, property_value)]),
)
);

match (
Expand Down
18 changes: 18 additions & 0 deletions src/index/src/fulltext_index/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,22 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},

#[snafu(display("Failed to serialize to json"))]
SerializeToJson {
#[snafu(source)]
error: serde_json::error::Error,
#[snafu(implicit)]
location: Location,
},

#[snafu(display("Failed to deserialize from json"))]
DeserializeFromJson {
#[snafu(source)]
error: serde_json::error::Error,
#[snafu(implicit)]
location: Location,
},
}

impl ErrorExt for Error {
Expand All @@ -122,6 +138,8 @@ impl ErrorExt for Error {
PuffinAddBlob { source, .. } => source.status_code(),

External { source, .. } => source.status_code(),

SerializeToJson { .. } | DeserializeFromJson { .. } => StatusCode::Internal,
}
}

Expand Down
4 changes: 3 additions & 1 deletion src/mito2/src/engine/alter_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_recordbatch::RecordBatches;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextOptions};
use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions};
use store_api::metadata::ColumnMetadata;
use store_api::region_engine::{RegionEngine, RegionRole};
use store_api::region_request::{
Expand Down Expand Up @@ -90,6 +90,7 @@ fn alter_column_fulltext_options() -> RegionAlterRequest {
enable: true,
analyzer: FulltextAnalyzer::English,
case_sensitive: false,
backend: FulltextBackend::Bloom,
},
},
},
Expand Down Expand Up @@ -557,6 +558,7 @@ async fn test_alter_column_fulltext_options() {
enable: true,
analyzer: FulltextAnalyzer::English,
case_sensitive: false,
backend: FulltextBackend::Bloom,
};
let check_fulltext_options = |engine: &MitoEngine, expected: &FulltextOptions| {
let current_fulltext_options = engine
Expand Down
3 changes: 3 additions & 0 deletions src/mito2/src/sst/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ pub(crate) const TYPE_INVERTED_INDEX: &str = "inverted_index";
pub(crate) const TYPE_FULLTEXT_INDEX: &str = "fulltext_index";
pub(crate) const TYPE_BLOOM_FILTER_INDEX: &str = "bloom_filter_index";

const DEFAULT_FULLTEXT_BLOOM_ROW_GRANULARITY: usize = 8096;

/// Output of the index creation.
#[derive(Debug, Clone, Default)]
pub struct IndexOutput {
Expand Down Expand Up @@ -292,6 +294,7 @@ impl IndexerBuilderImpl {
&self.intermediate_manager,
&self.metadata,
self.fulltext_index_config.compress,
DEFAULT_FULLTEXT_BLOOM_ROW_GRANULARITY,
mem_limit,
)
.await;
Expand Down
Loading