From f4c275b8d79f972d1467cb8ce1f759491aa7d813 Mon Sep 17 00:00:00 2001 From: Adhityaa Chandrasekar Date: Thu, 19 Dec 2024 19:20:46 -0500 Subject: [PATCH] feature: use native-tls in ureq requests to trust system TLS certs --- pdf2md/cli/Cargo.lock | 192 ++++++++++++++++++++++++ pdf2md/cli/Cargo.toml | 3 +- pdf2md/cli/src/operators/create_task.rs | 7 +- pdf2md/cli/src/operators/poll_task.rs | 7 +- server/Cargo.lock | 2 + server/Cargo.toml | 3 +- server/src/bin/crawl-worker.rs | 9 +- server/src/operators/model_operator.rs | 60 +++++--- 8 files changed, 260 insertions(+), 23 deletions(-) diff --git a/pdf2md/cli/Cargo.lock b/pdf2md/cli/Cargo.lock index f3d922a408..7da850b3db 100644 --- a/pdf2md/cli/Cargo.lock +++ b/pdf2md/cli/Cargo.lock @@ -63,6 +63,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + [[package]] name = "cc" version = "1.1.37" @@ -124,6 +130,7 @@ version = "0.1.0" dependencies = [ "base64", "clap", + "native-tls", "serde_json", "ureq", ] @@ -134,6 +141,22 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "crc32fast" version = "1.4.2" @@ -154,6 +177,22 @@ dependencies = [ "syn", ] +[[package]] +name = "errno" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "flate2" version = "1.0.34" @@ -164,6 +203,21 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -347,6 +401,12 @@ version = "0.2.162" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + [[package]] name = "litemap" version = "0.7.3" @@ -374,18 +434,85 @@ dependencies = [ "adler2", ] +[[package]] +name = "native-tls" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "once_cell" version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "openssl" +version = "0.10.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "percent-encoding" version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + [[package]] name = "proc-macro2" version = "1.0.89" @@ -419,6 +546,19 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustix" +version = "0.38.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + [[package]] name = "rustls" version = "0.23.16" @@ -457,6 +597,38 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "schannel" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1863fd3768cd83c56a7f60faa4dc0d403f1b6df0a38c3c25f44b7894e45370d5" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "serde" version = "1.0.214" @@ -547,6 +719,19 @@ dependencies = [ "syn", ] +[[package]] +name = "tempfile" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "tinystr" version = "0.7.6" @@ -578,6 +763,7 @@ dependencies = [ "base64", "flate2", "log", + "native-tls", "once_cell", "rustls", "rustls-pki-types", @@ -616,6 +802,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/pdf2md/cli/Cargo.toml b/pdf2md/cli/Cargo.toml index c753f0365b..34bbdde129 100644 --- a/pdf2md/cli/Cargo.toml +++ b/pdf2md/cli/Cargo.toml @@ -7,4 +7,5 @@ edition = "2021" base64 = "0.22.1" clap = { version = "4.5.20", features = ["derive", "env"] } serde_json = "1.0.132" -ureq = { version = "2.10.1", features = ["json"] } +ureq = { version = "2.10.1", features = ["json", "native-tls"] } +native-tls = "0.2.12" diff --git a/pdf2md/cli/src/operators/create_task.rs b/pdf2md/cli/src/operators/create_task.rs index fb6fccc75d..095e58552c 100644 --- a/pdf2md/cli/src/operators/create_task.rs +++ b/pdf2md/cli/src/operators/create_task.rs @@ -1,10 +1,15 @@ +use std::sync::Arc; + use base64::Engine; pub fn create_task(file: &str, base_url: &str, api_key: &str) { let file_buf = std::fs::read(file).expect("Failed to read file"); let file_base64 = base64::prelude::BASE64_STANDARD.encode(file_buf); - let request = ureq::post(format!("{}/api/task", base_url).as_str()) + let request = ureq::AgentBuilder::new() + .tls_connector(Arc::new(native_tls::TlsConnector::new()?)) + .build() + .post(format!("{}/api/task", base_url).as_str()) .set("Content-Type", "application/json") .set("Authorization", api_key) .send_json(serde_json::json!({ diff --git a/pdf2md/cli/src/operators/poll_task.rs b/pdf2md/cli/src/operators/poll_task.rs index 2bdbf6edbc..9d5a764f55 100644 --- a/pdf2md/cli/src/operators/poll_task.rs +++ b/pdf2md/cli/src/operators/poll_task.rs @@ -1,6 +1,11 @@ +use std::sync::Arc; + pub fn poll_task(task_id: &str, base_url: &str, api_key: &str) { loop { - let request = ureq::get(format!("{}/api/task/{}", base_url, task_id).as_str()) + let request = ureq::AgentBuilder::new() + .tls_connector(Arc::new(native_tls::TlsConnector::new()?)) + .build() + .get(format!("{}/api/task/{}", base_url, task_id).as_str()) .set("Content-Type", "application/json") .set("Authorization", api_key) .call() diff --git a/server/Cargo.lock b/server/Cargo.lock index e38ee8998a..5dbd74a004 100644 --- a/server/Cargo.lock +++ b/server/Cargo.lock @@ -6847,6 +6847,7 @@ dependencies = [ "minijinja", "minijinja-embed", "murmur3", + "native-tls", "ndarray", "oas3", "oauth2", @@ -7038,6 +7039,7 @@ dependencies = [ "base64 0.22.1", "flate2", "log", + "native-tls", "once_cell", "rustls 0.23.11", "rustls-pki-types", diff --git a/server/Cargo.toml b/server/Cargo.toml index 07066bbf99..38251e5db4 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -155,7 +155,8 @@ scraper = "0.19.0" regex-split = "0.1.0" simple-server-timing-header = "0.1.1" ndarray = "0.15.6" -ureq = { version = "2.9.6", features = ["json"] } +ureq = { version = "2.9.6", features = ["json", "native-tls"] } +native-tls = "0.2.12" env_logger = "0.11.5" tokio-postgres = "0.7.10" postgres-openssl = "0.5.0" diff --git a/server/src/bin/crawl-worker.rs b/server/src/bin/crawl-worker.rs index 5dea9529c8..91af0260df 100644 --- a/server/src/bin/crawl-worker.rs +++ b/server/src/bin/crawl-worker.rs @@ -608,7 +608,14 @@ async fn crawl( let cleaned_url = crawl_request.url.trim_end_matches("/"); let url = format!("{}/products.json?page={}", cleaned_url, cur_page); - let response: ShopifyResponse = ureq::get(&url) + let response: ShopifyResponse = ureq::AgentBuilder::new() + .tls_connector(Arc::new(native_tls::TlsConnector::new().map_err(|_| { + ServiceError::InternalServerError( + "Failed to acquire tls connection".to_string(), + ) + })?)) + .build() + .get(&url) .call() .map_err(|e| ServiceError::InternalServerError(format!("Failed to fetch: {}", e)))? .into_json() diff --git a/server/src/operators/model_operator.rs b/server/src/operators/model_operator.rs index c2cc53b52c..f48d7f9d8d 100644 --- a/server/src/operators/model_operator.rs +++ b/server/src/operators/model_operator.rs @@ -8,7 +8,7 @@ use actix_web::web; use murmur3::murmur3_32; use openai_dive::v1::resources::embedding::EmbeddingInput; use serde::{Deserialize, Serialize}; -use std::{collections::HashMap, io::Cursor, ops::IndexMut}; +use std::{collections::HashMap, io::Cursor, ops::IndexMut, sync::Arc}; use super::parse_operator::convert_html_to_text; @@ -108,21 +108,26 @@ pub async fn get_dense_vector( }; web::block(move || { - let embeddings_resp_a = ureq::post(&format!( - "{}/embeddings?api-version=2023-05-15", - embedding_base_url - )) - .set("Authorization", &format!("Bearer {}", &embedding_api_key)) - .set("api-key", &embedding_api_key) - .set("Content-Type", "application/json") - .send_json(serde_json::to_value(parameters).unwrap()) - .map_err(|e| { - ServiceError::InternalServerError(format!( - "Could not get embeddings from server: {:?}, {:?}", - e, - e.to_string() + let embeddings_resp_a = ureq::AgentBuilder::new() + .tls_connector(Arc::new(native_tls::TlsConnector::new().map_err(|_| { + ServiceError::InternalServerError("Failed to acquire tls connection".to_string()) + })?)) + .build() + .post(&format!( + "{}/embeddings?api-version=2023-05-15", + embedding_base_url )) - })?; + .set("Authorization", &format!("Bearer {}", &embedding_api_key)) + .set("api-key", &embedding_api_key) + .set("Content-Type", "application/json") + .send_json(serde_json::to_value(parameters).unwrap()) + .map_err(|e| { + ServiceError::InternalServerError(format!( + "Could not get embeddings from server: {:?}, {:?}", + e, + e.to_string() + )) + })?; let embeddings_resp = embeddings_resp_a .into_json::() @@ -207,7 +212,12 @@ pub async fn get_sparse_vector( let embed_type_string = embed_type.to_owned(); web::block(move || { - let mut sparse_vectors = ureq::post(&embedding_server_call) + let mut sparse_vectors = ureq::AgentBuilder::new() + .tls_connector(Arc::new(native_tls::TlsConnector::new().map_err(|_| { + ServiceError::InternalServerError("Failed to acquire tls connection".to_string()) + })?)) + .build() + .post(&embedding_server_call) .set("Content-Type", "application/json") .set( "Authorization", @@ -890,7 +900,14 @@ pub async fn cross_encoder( if server_origin != default_server_origin { // Assume cohere let reranker_model_name = dataset_config.RERANKER_MODEL_NAME.clone(); - let resp = ureq::post(&embedding_server_call) + let resp = ureq::AgentBuilder::new() + .tls_connector(Arc::new(native_tls::TlsConnector::new().map_err(|_| { + ServiceError::InternalServerError( + "Failed to acquire tls connection".to_string(), + ) + })?)) + .build() + .post(&embedding_server_call) .set("Content-Type", "application/json") .set( "Authorization", @@ -919,7 +936,14 @@ pub async fn cross_encoder( results.index_mut(pair.index).score = pair.relevance_score as f64; }); } else { - let resp = ureq::post(&embedding_server_call) + let resp = ureq::AgentBuilder::new() + .tls_connector(Arc::new(native_tls::TlsConnector::new().map_err(|_| { + ServiceError::InternalServerError( + "Failed to acquire tls connection".to_string(), + ) + })?)) + .build() + .post(&embedding_server_call) .set("Content-Type", "application/json") .set( "Authorization",