diff --git a/e2e/tests-dfx/assetscanister.bash b/e2e/tests-dfx/assetscanister.bash index 01032fca04..c7497934ba 100644 --- a/e2e/tests-dfx/assetscanister.bash +++ b/e2e/tests-dfx/assetscanister.bash @@ -13,6 +13,35 @@ teardown() { dfx_stop } +@test "generates gzipped content encoding for .js files" { + install_asset assetscanister + for i in $(seq 1 400); do + echo "some easily duplicate text $i" >>src/e2e_project_assets/assets/notreally.js + done + + dfx_start + assert_command dfx deploy + dfx canister call --query e2e_project_assets list '(record{})' + + ID=$(dfx canister id e2e_project_assets) + PORT=$(cat .dfx/webserver-port) + + assert_command curl -v --output not-compressed http://localhost:"$PORT"/notreally.js?canisterId="$ID" + assert_not_match "content-encoding:" + diff not-compressed src/e2e_project_assets/assets/notreally.js + + assert_command curl -v --output encoded-compressed-1.gz -H "Accept-Encoding: gzip" http://localhost:"$PORT"/notreally.js?canisterId="$ID" + assert_match "content-encoding: gzip" + gunzip encoded-compressed-1.gz + diff encoded-compressed-1 src/e2e_project_assets/assets/notreally.js + + # should split up accept-encoding lines with more than one encoding + assert_command curl -v --output encoded-compressed-2.gz -H "Accept-Encoding: gzip, deflate, br" http://localhost:"$PORT"/notreally.js?canisterId="$ID" + assert_match "content-encoding: gzip" + gunzip encoded-compressed-2.gz + diff encoded-compressed-2 src/e2e_project_assets/assets/notreally.js +} + @test "leaves in place files that were already installed" { install_asset assetscanister dd if=/dev/urandom of=src/e2e_project_assets/assets/asset1.bin bs=400000 count=1 diff --git a/src/dfx/src/lib/installers/assets/content.rs b/src/dfx/src/lib/installers/assets/content.rs new file mode 100644 index 0000000000..d63ba9a676 --- /dev/null +++ b/src/dfx/src/lib/installers/assets/content.rs @@ -0,0 +1,49 @@ +use crate::lib::error::DfxResult; + +use crate::lib::installers::assets::content_encoder::ContentEncoder; +use flate2::write::GzEncoder; +use flate2::Compression; +use mime::Mime; +use openssl::sha::Sha256; +use std::io::Write; +use std::path::Path; + +pub struct Content { + pub data: Vec, + pub media_type: Mime, +} + +impl Content { + pub fn load(path: &Path) -> DfxResult { + let data = std::fs::read(path)?; + + // todo: check contents if mime_guess fails https://github.com/dfinity/sdk/issues/1594 + let media_type = mime_guess::from_path(path) + .first() + .unwrap_or(mime::APPLICATION_OCTET_STREAM); + + Ok(Content { data, media_type }) + } + + pub fn encode(&self, encoder: &ContentEncoder) -> DfxResult { + match encoder { + ContentEncoder::Gzip => self.to_gzip(), + } + } + + pub fn to_gzip(&self) -> DfxResult { + let mut e = GzEncoder::new(Vec::new(), Compression::default()); + e.write_all(&self.data)?; + let data = e.finish()?; + Ok(Content { + data, + media_type: self.media_type.clone(), + }) + } + + pub fn sha256(&self) -> Vec { + let mut sha256 = Sha256::new(); + sha256.update(&self.data); + sha256.finish().to_vec() + } +} diff --git a/src/dfx/src/lib/installers/assets/content_encoder.rs b/src/dfx/src/lib/installers/assets/content_encoder.rs new file mode 100644 index 0000000000..93abc4b56b --- /dev/null +++ b/src/dfx/src/lib/installers/assets/content_encoder.rs @@ -0,0 +1,11 @@ +pub enum ContentEncoder { + Gzip, +} + +impl std::fmt::Display for ContentEncoder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + ContentEncoder::Gzip => f.write_str("gzip"), + } + } +} diff --git a/src/dfx/src/lib/installers/assets.rs b/src/dfx/src/lib/installers/assets/mod.rs similarity index 83% rename from src/dfx/src/lib/installers/assets.rs rename to src/dfx/src/lib/installers/assets/mod.rs index 74d90c6bbf..dd8ee072a6 100644 --- a/src/dfx/src/lib/installers/assets.rs +++ b/src/dfx/src/lib/installers/assets/mod.rs @@ -1,6 +1,8 @@ use crate::lib::canister_info::assets::AssetsCanisterInfo; use crate::lib::canister_info::CanisterInfo; use crate::lib::error::{DfxError, DfxResult}; +use crate::lib::installers::assets::content::Content; +use crate::lib::installers::assets::content_encoder::ContentEncoder; use crate::lib::waiter::waiter_with_timeout; use candid::{CandidType, Decode, Encode, Nat}; @@ -8,13 +10,16 @@ use delay::{Delay, Waiter}; use ic_agent::Agent; use ic_types::Principal; use mime::Mime; -use openssl::sha::Sha256; use serde::Deserialize; use std::collections::HashMap; use std::path::PathBuf; use std::time::Duration; use walkdir::WalkDir; +mod content; +mod content_encoder; + +const CONTENT_ENCODING_IDENTITY: &str = "identity"; const CREATE_BATCH: &str = "create_batch"; const CREATE_CHUNK: &str = "create_chunk"; const COMMIT_BATCH: &str = "commit_batch"; @@ -182,23 +187,29 @@ async fn upload_content_chunks( canister_call_params: &CanisterCallParams<'_>, batch_id: &Nat, asset_location: &AssetLocation, - content: &[u8], + content: &Content, + content_encoding: &str, ) -> DfxResult> { let mut chunk_ids: Vec = vec![]; - let chunks = content.chunks(MAX_CHUNK_SIZE); + let chunks = content.data.chunks(MAX_CHUNK_SIZE); let (num_chunks, _) = chunks.size_hint(); for (i, data_chunk) in chunks.enumerate() { println!( - " {} {}/{} ({} bytes)", + " {}{} {}/{} ({} bytes)", &asset_location.key, + content_encoding_descriptive_suffix(content_encoding), i + 1, num_chunks, - data_chunk.len() + data_chunk.len(), ); chunk_ids.push(create_chunk(canister_call_params, batch_id, data_chunk).await?); } if chunk_ids.is_empty() { - println!(" {} 1/1 (0 bytes)", &asset_location.key); + println!( + " {}{} 1/1 (0 bytes)", + &asset_location.key, + content_encoding_descriptive_suffix(content_encoding) + ); let empty = vec![]; chunk_ids.push(create_chunk(canister_call_params, batch_id, &empty).await?); } @@ -210,17 +221,14 @@ async fn make_project_asset_encoding( batch_id: &Nat, asset_location: &AssetLocation, container_assets: &HashMap, - content: &[u8], + content: &Content, content_encoding: &str, - media_type: &Mime, ) -> DfxResult { - let mut sha256 = Sha256::new(); - sha256.update(&content); - let sha256 = sha256.finish().to_vec(); + let sha256 = content.sha256(); let already_in_place = if let Some(container_asset) = container_assets.get(&asset_location.key) { - if container_asset.content_type != media_type.to_string() { + if container_asset.content_type != content.media_type.to_string() { false } else if let Some(container_asset_encoding_sha256) = container_asset .encodings @@ -238,14 +246,22 @@ async fn make_project_asset_encoding( let chunk_ids = if already_in_place { println!( - " {} ({} bytes) sha {} is already installed", + " {}{} ({} bytes) sha {} is already installed", &asset_location.key, - content.len(), + content_encoding_descriptive_suffix(content_encoding), + content.data.len(), hex::encode(&sha256), ); vec![] } else { - upload_content_chunks(canister_call_params, batch_id, &asset_location, content).await? + upload_content_chunks( + canister_call_params, + batch_id, + &asset_location, + content, + content_encoding, + ) + .await? }; Ok(ProjectAssetEncoding { @@ -255,61 +271,87 @@ async fn make_project_asset_encoding( }) } +fn content_encoding_descriptive_suffix(content_encoding: &str) -> String { + if content_encoding == CONTENT_ENCODING_IDENTITY { + "".to_string() + } else { + format!(" ({})", content_encoding) + } +} + async fn make_project_asset( canister_call_params: &CanisterCallParams<'_>, batch_id: &Nat, asset_location: AssetLocation, container_assets: &HashMap, ) -> DfxResult { - let content = std::fs::read(&asset_location.source)?; - - let media_type = mime_guess::from_path(&asset_location.source) - .first() - .unwrap_or(mime::APPLICATION_OCTET_STREAM); + let content = Content::load(&asset_location.source)?; - let mut encodings = HashMap::new(); - - add_identity_encoding( - &mut encodings, + let encodings = make_encodings( canister_call_params, batch_id, &asset_location, container_assets, &content, - &media_type, ) .await?; Ok(ProjectAsset { asset_location, - media_type, + media_type: content.media_type, encodings, }) } -async fn add_identity_encoding( - encodings: &mut HashMap, +// todo: make this configurable https://github.com/dfinity/dx-triage/issues/152 +fn applicable_encoders(media_type: &Mime) -> Vec { + match (media_type.type_(), media_type.subtype()) { + (mime::TEXT, _) | (_, mime::JAVASCRIPT) | (_, mime::HTML) => vec![ContentEncoder::Gzip], + _ => vec![], + } +} + +async fn make_encodings( canister_call_params: &CanisterCallParams<'_>, batch_id: &Nat, asset_location: &AssetLocation, container_assets: &HashMap, - content: &[u8], - media_type: &Mime, -) -> DfxResult { - let content_encoding = "identity".to_string(); - let project_asset_encoding = make_project_asset_encoding( + content: &Content, +) -> DfxResult> { + let mut encodings = HashMap::new(); + + let identity_asset_encoding = make_project_asset_encoding( canister_call_params, batch_id, &asset_location, container_assets, &content, - &content_encoding, - media_type, + CONTENT_ENCODING_IDENTITY, ) .await?; + encodings.insert( + CONTENT_ENCODING_IDENTITY.to_string(), + identity_asset_encoding, + ); + + for encoder in applicable_encoders(&content.media_type) { + let encoded = content.encode(&encoder)?; + if encoded.data.len() < content.data.len() { + let content_encoding = format!("{}", encoder); + let project_asset_encoding = make_project_asset_encoding( + canister_call_params, + batch_id, + &asset_location, + container_assets, + &encoded, + &content_encoding, + ) + .await?; + encodings.insert(content_encoding, project_asset_encoding); + } + } - encodings.insert(content_encoding, project_asset_encoding); - Ok(()) + Ok(encodings) } async fn make_project_assets( diff --git a/src/distributed/assetstorage/Main.mo b/src/distributed/assetstorage/Main.mo index a3ffb7bf13..6e6a57d4e1 100644 --- a/src/distributed/assetstorage/Main.mo +++ b/src/distributed/assetstorage/Main.mo @@ -1,4 +1,6 @@ import Array "mo:base/Array"; +import Buffer "mo:base/Buffer"; +import Char "mo:base/Char"; import Debug "mo:base/Debug"; import Error "mo:base/Error"; import HashMap "mo:base/HashMap"; @@ -10,6 +12,9 @@ import Result "mo:base/Result"; import Text "mo:base/Text"; import Time "mo:base/Time"; +// todo: remove direct dependency on Prim https://github.com/dfinity/sdk/issues/1598 +import Prim "mo:prim"; + import A "Asset"; import B "Batch"; import C "Chunk"; @@ -398,10 +403,11 @@ shared ({caller = creator}) actor class () { public query func http_request(request: T.HttpRequest): async T.HttpResponse { let key = getKey(request.url); + let acceptEncodings = getAcceptEncodings(request.headers); - let assetAndEncoding: ?(A.Asset, A.AssetEncoding) = switch (getAssetAndEncoding(key)) { + let assetAndEncoding: ?(A.Asset, A.AssetEncoding) = switch (getAssetAndEncoding(key, acceptEncodings)) { case (?found) ?found; - case (null) getAssetAndEncoding("/index.html"); + case (null) getAssetAndEncoding("/index.html", acceptEncodings); }; @@ -416,9 +422,15 @@ shared ({caller = creator}) actor class () { case null null; }; + let headers = Buffer.Buffer(2); + headers.add(("Content-Type", asset.contentType)); + if (assetEncoding.contentEncoding != "identity") { + headers.add(("Content-Encoding", assetEncoding.contentEncoding)); + }; + { status_code = 200; - headers = []; + headers = headers.toArray(); body = assetEncoding.content[0]; streaming_strategy = streaming_strategy; } @@ -426,6 +438,38 @@ shared ({caller = creator}) actor class () { } }; + func getAcceptEncodings(headers: [T.HeaderField]): [Text] { + let accepted_encodings = Buffer.Buffer(2); + for (header in headers.vals()) { + // todo: remove direct dependency on Prim https://github.com/dfinity/sdk/issues/1598 + let k = Text.map(header.0, Prim.charToUpper); + let v = header.1; + // todo: use caseInsensitiveTextEqual, see https://github.com/dfinity/sdk/issues/1599 + if (k == "ACCEPT-ENCODING") { + for (t in Text.split(v, #char ',')) { + let encoding = Text.trim(t, #char ' '); + accepted_encodings.add(encoding); + } + } + }; + // last choice + accepted_encodings.add("identity"); + + accepted_encodings.toArray() + }; + + // todo: use this once Text.compareWith uses its cmp parameter https://github.com/dfinity/sdk/issues/1599 + //func caseInsensitiveTextEqual(s1: Text, s2: Text): Bool { + // switch(Text.compareWith(s1, s2, caseInsensitiveCharCompare)) { + // case (#equal) true; + // case _ false; + // } + //}; + + func caseInsensitiveCharCompare(c1: Char, c2: Char) : { #less; #equal; #greater } { + Char.compare(Prim.charToUpper(c1), Prim.charToUpper(c2)) + }; + // Get subsequent chunks of an asset encoding's content, after http_request(). // Like get_chunk, but converts url to key public query func http_request_streaming_callback(token: T.StreamingCallbackToken) : async T.StreamingCallbackHttpResponse { @@ -474,11 +518,11 @@ shared ({caller = creator}) actor class () { path }; - private func getAssetAndEncoding(path: Text): ?(A.Asset, A.AssetEncoding) { + private func getAssetAndEncoding(path: Text, acceptEncodings: [Text]): ?(A.Asset, A.AssetEncoding) { switch (assets.get(path)) { case null null; case (?asset) { - switch (asset.getEncoding("identity")) { + switch (asset.chooseEncoding(acceptEncodings)) { case null null; case (?assetEncoding) ?(asset, assetEncoding); }