Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 80 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ futures = "0.3"
async-trait = "0.1"
tokio = { version = "1", features = ["full"] }
chrono = { version = "0.4", features = ["std"] }
structopt = { version = "0.3", default-features = false }
108 changes: 102 additions & 6 deletions src/datatypes.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
use std::sync::Arc;

use chrono::NaiveDateTime;
use datafusion::arrow::array::{Array, BooleanArray, ListArray, PrimitiveArray, StringArray};
use datafusion::arrow::datatypes::{
DataType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
UInt32Type, UInt64Type, UInt8Type,
};
use chrono::{NaiveDate, NaiveDateTime};
use datafusion::arrow::array::*;
use datafusion::arrow::datatypes::*;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::common::{DFSchema, ParamValues};
use datafusion::prelude::*;
Expand Down Expand Up @@ -150,6 +147,75 @@ fn get_utf8_value(arr: &Arc<dyn Array>, idx: usize) -> &str {
.value(idx)
}

fn get_date32_value(arr: &Arc<dyn Array>, idx: usize) -> Option<NaiveDate> {
arr.as_any()
.downcast_ref::<Date32Array>()
.unwrap()
.value_as_date(idx)
}

fn get_date64_value(arr: &Arc<dyn Array>, idx: usize) -> Option<NaiveDate> {
arr.as_any()
.downcast_ref::<Date64Array>()
.unwrap()
.value_as_date(idx)
}

fn get_time32_second_value(arr: &Arc<dyn Array>, idx: usize) -> Option<NaiveDateTime> {
arr.as_any()
.downcast_ref::<Time32SecondArray>()
.unwrap()
.value_as_datetime(idx)
}

fn get_time32_millisecond_value(arr: &Arc<dyn Array>, idx: usize) -> Option<NaiveDateTime> {
arr.as_any()
.downcast_ref::<Time32MillisecondArray>()
.unwrap()
.value_as_datetime(idx)
}

fn get_time64_microsecond_value(arr: &Arc<dyn Array>, idx: usize) -> Option<NaiveDateTime> {
arr.as_any()
.downcast_ref::<Time64MicrosecondArray>()
.unwrap()
.value_as_datetime(idx)
}
fn get_time64_nanosecond_value(arr: &Arc<dyn Array>, idx: usize) -> Option<NaiveDateTime> {
arr.as_any()
.downcast_ref::<Time64NanosecondArray>()
.unwrap()
.value_as_datetime(idx)
}

fn get_timestamp_second_value(arr: &Arc<dyn Array>, idx: usize) -> Option<NaiveDateTime> {
arr.as_any()
.downcast_ref::<TimestampSecondArray>()
.unwrap()
.value_as_datetime(idx)
}

fn get_timestamp_millisecond_value(arr: &Arc<dyn Array>, idx: usize) -> Option<NaiveDateTime> {
arr.as_any()
.downcast_ref::<TimestampMillisecondArray>()
.unwrap()
.value_as_datetime(idx)
}

fn get_timestamp_microsecond_value(arr: &Arc<dyn Array>, idx: usize) -> Option<NaiveDateTime> {
arr.as_any()
.downcast_ref::<TimestampMicrosecondArray>()
.unwrap()
.value_as_datetime(idx)
}

fn get_timestamp_nanosecond_value(arr: &Arc<dyn Array>, idx: usize) -> Option<NaiveDateTime> {
arr.as_any()
.downcast_ref::<TimestampNanosecondArray>()
.unwrap()
.value_as_datetime(idx)
}

fn get_utf8_list_value(arr: &Arc<dyn Array>, idx: usize) -> Vec<Option<String>> {
let list_arr = arr.as_any().downcast_ref::<ListArray>().unwrap().value(idx);
list_arr
Expand Down Expand Up @@ -179,6 +245,34 @@ fn encode_value(
DataType::Float32 => encoder.encode_field(&get_f32_value(arr, idx))?,
DataType::Float64 => encoder.encode_field(&get_f64_value(arr, idx))?,
DataType::Utf8 => encoder.encode_field(&get_utf8_value(arr, idx))?,
DataType::Date32 => encoder.encode_field(&get_date32_value(arr, idx))?,
DataType::Date64 => encoder.encode_field(&get_date64_value(arr, idx))?,
DataType::Time32(unit) => match unit {
TimeUnit::Second => encoder.encode_field(&get_time32_second_value(arr, idx))?,
TimeUnit::Millisecond => {
encoder.encode_field(&get_time32_millisecond_value(arr, idx))?
}
_ => {}
},
DataType::Time64(unit) => match unit {
TimeUnit::Microsecond => {
encoder.encode_field(&get_time64_microsecond_value(arr, idx))?
}
TimeUnit::Nanosecond => encoder.encode_field(&get_time64_nanosecond_value(arr, idx))?,
_ => {}
},
DataType::Timestamp(unit, _) => match unit {
TimeUnit::Second => encoder.encode_field(&get_timestamp_second_value(arr, idx))?,
TimeUnit::Millisecond => {
encoder.encode_field(&get_timestamp_millisecond_value(arr, idx))?
}
TimeUnit::Microsecond => {
encoder.encode_field(&get_timestamp_microsecond_value(arr, idx))?
}
TimeUnit::Nanosecond => {
encoder.encode_field(&get_timestamp_nanosecond_value(arr, idx))?
}
},
DataType::List(field) => match field.data_type() {
DataType::Boolean => encoder.encode_field(&get_bool_list_value(arr, idx))?,
DataType::Int8 => encoder.encode_field(&get_i8_list_value(arr, idx))?,
Expand All @@ -192,6 +286,8 @@ fn encode_value(
DataType::Float32 => encoder.encode_field(&get_f32_list_value(arr, idx))?,
DataType::Float64 => encoder.encode_field(&get_f64_list_value(arr, idx))?,
DataType::Utf8 => encoder.encode_field(&get_utf8_list_value(arr, idx))?,

// TODO: more types
list_type => {
return Err(PgWireError::UserError(Box::new(ErrorInfo::new(
"ERROR".to_owned(),
Expand Down
24 changes: 6 additions & 18 deletions src/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,23 @@ use datafusion::logical_expr::LogicalPlan;
use datafusion::prelude::*;
use pgwire::api::portal::Portal;
use pgwire::api::query::{ExtendedQueryHandler, SimpleQueryHandler};
use pgwire::api::results::{DescribePortalResponse, DescribeStatementResponse, Response, Tag};
use pgwire::api::results::{DescribePortalResponse, DescribeStatementResponse, Response};
use pgwire::api::stmt::QueryParser;
use pgwire::api::stmt::StoredStatement;
use pgwire::api::{ClientInfo, Type};
use pgwire::error::{ErrorInfo, PgWireError, PgWireResult};

use tokio::sync::Mutex;

use crate::datatypes::{self, into_pg_type};

pub(crate) struct DfSessionService {
session_context: Arc<Mutex<SessionContext>>,
pub(crate) session_context: Arc<Mutex<SessionContext>>,
parser: Arc<Parser>,
}

impl DfSessionService {
pub fn new() -> DfSessionService {
let session_context = Arc::new(Mutex::new(SessionContext::new()));
pub fn new(session_context: SessionContext) -> DfSessionService {
let session_context = Arc::new(Mutex::new(session_context));
let parser = Arc::new(Parser {
session_context: session_context.clone(),
});
Expand All @@ -44,18 +43,7 @@ impl SimpleQueryHandler for DfSessionService {
where
C: ClientInfo + Unpin + Send + Sync,
{
if query.starts_with("LOAD") {
let command = query.trim_end();
let command = command.strip_suffix(';').unwrap_or(command);
let args = command.split(' ').collect::<Vec<&str>>();
let table_name = args[2];
let json_path = args[1];
let ctx = self.session_context.lock().await;
ctx.register_json(table_name, json_path, NdJsonReadOptions::default())
.await
.map_err(|e| PgWireError::ApiError(Box::new(e)))?;
Ok(vec![Response::Execution(Tag::new("OK").with_rows(1))])
} else if query.to_uppercase().starts_with("SELECT") {
if query.to_uppercase().starts_with("SELECT") {
let ctx = self.session_context.lock().await;
let df = ctx
.sql(query)
Expand All @@ -68,7 +56,7 @@ impl SimpleQueryHandler for DfSessionService {
Ok(vec![Response::Error(Box::new(ErrorInfo::new(
"ERROR".to_owned(),
"XX000".to_owned(),
"Datafusion is a readonly execution engine. To load data, call\nLOAD json_file_path table_name;".to_owned(),
"Only select statements is supported by this tool.".to_owned(),
)))])
}
}
Expand Down
Loading