From fc15bc58f62148d9cf0af586da7dce11f5372845 Mon Sep 17 00:00:00 2001 From: Mark McCaskey Date: Fri, 24 May 2019 18:00:07 -0700 Subject: [PATCH 1/6] add mapdir for emscripten; implement getdents, etc. --- Cargo.lock | 1 + lib/emscripten/Cargo.toml | 7 +- lib/emscripten/src/lib.rs | 12 +++- lib/emscripten/src/syscalls/mod.rs | 53 ++++++++------ lib/emscripten/src/syscalls/unix.rs | 95 ++++++++++++++++++++++++-- lib/emscripten/src/syscalls/windows.rs | 6 ++ lib/emscripten/src/utils.rs | 32 +++++++++ src/bin/wasmer.rs | 1 + 8 files changed, 176 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5504e8c14d..72fda4277ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2336,6 +2336,7 @@ version = "0.4.2" dependencies = [ "byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "hashbrown 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.54 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/lib/emscripten/Cargo.toml b/lib/emscripten/Cargo.toml index c9209eb8f43..3d9e5df713d 100644 --- a/lib/emscripten/Cargo.toml +++ b/lib/emscripten/Cargo.toml @@ -9,14 +9,15 @@ edition = "2018" build = "build/mod.rs" [dependencies] -wasmer-runtime-core = { path = "../runtime-core", version = "0.4.2" } +byteorder = "1" +hashbrown = "0.1" lazy_static = "1.2.0" libc = "0.2.49" -byteorder = "1" time = "0.1.41" wasmer-clif-backend = { path = "../clif-backend", version = "0.4.2" } -wasmer-singlepass-backend = { path = "../singlepass-backend", version = "0.4.2", optional = true } wasmer-llvm-backend = { path = "../llvm-backend", version = "0.4.2", optional = true } +wasmer-runtime-core = { path = "../runtime-core", version = "0.4.2" } +wasmer-singlepass-backend = { path = "../singlepass-backend", version = "0.4.2", optional = true } [target.'cfg(windows)'.dependencies] rand = "0.6" diff --git a/lib/emscripten/src/lib.rs b/lib/emscripten/src/lib.rs index f60156bab0d..0e600575500 100644 --- a/lib/emscripten/src/lib.rs +++ b/lib/emscripten/src/lib.rs @@ -3,8 +3,10 @@ #[macro_use] extern crate wasmer_runtime_core; +use hashbrown::HashMap; use lazy_static::lazy_static; use std::cell::UnsafeCell; +use std::path::PathBuf; use std::{f64, ffi::c_void}; use wasmer_runtime_core::{ error::CallResult, @@ -141,10 +143,14 @@ pub struct EmscriptenData<'a> { pub stack_save: Option>, pub stack_restore: Option>, pub set_threw: Option>, + pub mapped_dirs: HashMap, } impl<'a> EmscriptenData<'a> { - pub fn new(instance: &'a mut Instance) -> EmscriptenData<'a> { + pub fn new( + instance: &'a mut Instance, + mapped_dirs: HashMap, + ) -> EmscriptenData<'a> { let malloc = instance.func("_malloc").unwrap(); let free = instance.func("_free").unwrap(); let memalign = instance.func("_memalign").ok(); @@ -272,6 +278,7 @@ impl<'a> EmscriptenData<'a> { stack_save, stack_restore, set_threw, + mapped_dirs, } } } @@ -282,8 +289,9 @@ pub fn run_emscripten_instance( path: &str, args: Vec<&str>, entrypoint: Option, + mapped_dirs: Vec<(String, PathBuf)>, ) -> CallResult<()> { - let mut data = EmscriptenData::new(instance); + let mut data = EmscriptenData::new(instance, mapped_dirs.into_iter().collect()); let data_ptr = &mut data as *mut _ as *mut c_void; instance.context_mut().data = data_ptr; diff --git a/lib/emscripten/src/syscalls/mod.rs b/lib/emscripten/src/syscalls/mod.rs index 5654a12995d..18808f0add3 100644 --- a/lib/emscripten/src/syscalls/mod.rs +++ b/lib/emscripten/src/syscalls/mod.rs @@ -10,7 +10,8 @@ pub use self::unix::*; #[cfg(windows)] pub use self::windows::*; -use super::utils::copy_stat_into_wasm; +use crate::utils::{copy_stat_into_wasm, get_cstr_path, get_current_directory}; + use super::varargs::VarArgs; use byteorder::{ByteOrder, LittleEndian}; /// NOTE: TODO: These syscalls only support wasm_32 for now because they assume offsets are u32 @@ -94,13 +95,17 @@ pub fn ___syscall6(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int pub fn ___syscall12(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int { debug!("emscripten::___syscall12 (chdir) {}", _which); let path_addr: i32 = varargs.get(ctx); - unsafe { - let path_ptr = emscripten_memory_pointer!(ctx.memory(0), path_addr) as *const i8; - let _path = std::ffi::CStr::from_ptr(path_ptr); - let ret = chdir(path_ptr); - debug!("=> path: {:?}, ret: {}", _path, ret); + let path_ptr = emscripten_memory_pointer!(ctx.memory(0), path_addr) as *const i8; + let real_path = get_cstr_path(ctx, path_ptr) + .map(|cstr| cstr.as_c_str() as *const _ as *const i8) + .unwrap_or(path_ptr); + let ret = unsafe { chdir(real_path) }; + debug!( + "=> path: {:?}, ret: {}", + unsafe { std::ffi::CStr::from_ptr(real_path) }, ret - } + ); + ret } pub fn ___syscall10(_ctx: &mut Ctx, _one: i32, _two: i32) -> i32 { @@ -126,11 +131,17 @@ pub fn ___syscall38(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> i32 { let new_path_addr: u32 = varargs.get(ctx); let old_path = emscripten_memory_pointer!(ctx.memory(0), old_path_addr) as *const i8; let new_path = emscripten_memory_pointer!(ctx.memory(0), new_path_addr) as *const i8; - let result = unsafe { rename(old_path, new_path) }; + let real_old_path = get_cstr_path(ctx, old_path) + .map(|cstr| cstr.as_c_str() as *const _ as *const i8) + .unwrap_or(old_path); + let real_new_path = get_cstr_path(ctx, new_path) + .map(|cstr| cstr.as_c_str() as *const _ as *const i8) + .unwrap_or(new_path); + let result = unsafe { rename(real_old_path, real_new_path) }; debug!( "=> old_path: {}, new_path: {}, result: {}", - unsafe { std::ffi::CStr::from_ptr(old_path).to_str().unwrap() }, - unsafe { std::ffi::CStr::from_ptr(new_path).to_str().unwrap() }, + unsafe { std::ffi::CStr::from_ptr(real_old_path).to_str().unwrap() }, + unsafe { std::ffi::CStr::from_ptr(real_new_path).to_str().unwrap() }, result ); result @@ -141,7 +152,10 @@ pub fn ___syscall40(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int debug!("emscripten::___syscall40 (rmdir)"); let pathname: u32 = varargs.get(ctx); let pathname_addr = emscripten_memory_pointer!(ctx.memory(0), pathname) as *const i8; - unsafe { rmdir(pathname_addr) } + let real_path = get_cstr_path(ctx, pathname_addr) + .map(|cstr| cstr.as_c_str() as *const _ as *const i8) + .unwrap_or(pathname_addr); + unsafe { rmdir(real_path) } } // pipe @@ -224,10 +238,9 @@ pub fn ___syscall110(_ctx: &mut Ctx, _one: i32, _two: i32) -> i32 { // getcwd pub fn ___syscall183(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> i32 { debug!("emscripten::___syscall183"); - use std::env; let buf_offset: c_int = varargs.get(ctx); let _size: c_int = varargs.get(ctx); - let path = env::current_dir(); + let path = get_current_directory(ctx); let path_string = path.unwrap().display().to_string(); let len = path_string.len(); unsafe { @@ -401,15 +414,18 @@ pub fn ___syscall195(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_in let buf: u32 = varargs.get(ctx); let pathname_addr = emscripten_memory_pointer!(ctx.memory(0), pathname) as *const i8; + let real_path = get_cstr_path(ctx, pathname_addr) + .map(|cstr| cstr.as_c_str() as *const _ as *const i8) + .unwrap_or(pathname_addr); unsafe { let mut _stat: stat = std::mem::zeroed(); - let ret = stat(pathname_addr, &mut _stat); + let ret = stat(real_path, &mut _stat); debug!( "=> pathname: {}, buf: {}, path: {} = {}\nlast os error: {}", pathname, buf, - std::ffi::CStr::from_ptr(pathname_addr).to_str().unwrap(), + std::ffi::CStr::from_ptr(real_path).to_str().unwrap(), ret, Error::last_os_error() ); @@ -440,11 +456,6 @@ pub fn ___syscall197(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_in 0 } -pub fn ___syscall220(_ctx: &mut Ctx, _one: i32, _two: i32) -> i32 { - debug!("emscripten::___syscall220"); - -1 -} - // fcntl64 pub fn ___syscall221(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int { debug!("emscripten::___syscall221 (fcntl64) {}", _which); @@ -457,7 +468,7 @@ pub fn ___syscall221(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_in // |FNONBLOCK - 0x04 debug!("=> fd: {}, cmd: {}", _fd, cmd); match cmd { - 2 => 0, + 1 | 2 => 0, 13 | 14 => 0, // pretend file locking worked _ => -1, } diff --git a/lib/emscripten/src/syscalls/unix.rs b/lib/emscripten/src/syscalls/unix.rs index ec8b98f3f90..0283a756de4 100644 --- a/lib/emscripten/src/syscalls/unix.rs +++ b/lib/emscripten/src/syscalls/unix.rs @@ -43,6 +43,7 @@ use libc::{ pid_t, pread, pwrite, + readdir, // readv, recvfrom, recvmsg, @@ -108,8 +109,11 @@ pub fn ___syscall5(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int let flags: i32 = varargs.get(ctx); let mode: u32 = varargs.get(ctx); let pathname_addr = emscripten_memory_pointer!(ctx.memory(0), pathname) as *const i8; - let _path_str = unsafe { std::ffi::CStr::from_ptr(pathname_addr).to_str().unwrap() }; - let fd = unsafe { open(pathname_addr, flags, mode) }; + let real_path = utils::get_cstr_path(ctx, pathname_addr) + .map(|cstr| cstr.as_c_str() as *const _ as *const i8) + .unwrap_or(pathname_addr); + let _path_str = unsafe { std::ffi::CStr::from_ptr(real_path).to_str().unwrap() }; + let fd = unsafe { open(real_path, flags, mode) }; debug!( "=> pathname: {}, flags: {}, mode: {} = fd: {}\npath: {}\nlast os error: {}", pathname, @@ -160,11 +164,17 @@ pub fn ___syscall83(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int let path2_ptr: c_int = varargs.get(ctx); let path1 = emscripten_memory_pointer!(ctx.memory(0), path1_ptr) as *mut i8; let path2 = emscripten_memory_pointer!(ctx.memory(0), path2_ptr) as *mut i8; - let result = unsafe { symlink(path1, path2) }; + let real_path1 = utils::get_cstr_path(ctx, path1) + .map(|cstr| cstr.as_c_str() as *const _ as *const i8) + .unwrap_or(path1); + let real_path2 = utils::get_cstr_path(ctx, path2) + .map(|cstr| cstr.as_c_str() as *const _ as *const i8) + .unwrap_or(path2); + let result = unsafe { symlink(real_path1, real_path2) }; debug!( "=> path1: {}, path2: {}, result: {}", - unsafe { std::ffi::CStr::from_ptr(path1).to_str().unwrap() }, - unsafe { std::ffi::CStr::from_ptr(path2).to_str().unwrap() }, + unsafe { std::ffi::CStr::from_ptr(real_path1).to_str().unwrap() }, + unsafe { std::ffi::CStr::from_ptr(real_path2).to_str().unwrap() }, result, ); result @@ -796,6 +806,81 @@ pub fn ___syscall196(ctx: &mut Ctx, _which: i32, mut varargs: VarArgs) -> i32 { 0 } +// getdents +// dirent structure is +// i64, i64, u16 (280), i8, [i8; 256] +pub fn ___syscall220(ctx: &mut Ctx, _which: i32, mut varargs: VarArgs) -> i32 { + debug!("emscripten::___syscall220"); + let fd: i32 = varargs.get(ctx); + let dirp_addr: i32 = varargs.get(ctx); + let count: u32 = varargs.get(ctx); + + //let dir = dbg!(emscripten_memory_pointer!(ctx.memory(0), dbg!(fd)) as *mut libc::DIR); + let dirp = emscripten_memory_pointer!(ctx.memory(0), dirp_addr) as *mut u8; + + let mut pos = 0; + // need to persist stream across calls? + + let dir: *mut libc::DIR = unsafe { libc::fdopendir(fd) }; + + dbg!("Start loop"); + while pos + 280 <= dbg!(count) as usize { + dbg!("Pre readdir"); + let dirent = unsafe { readdir(dir) }; + dbg!("post readdir"); + if dirent.is_null() { + break; + } + dbg!("dirent is not null"); + unsafe { + *(dirp.add(pos) as *mut u64) = dbg!((*dirent).d_ino); + #[cfg(not(target_os = "macos"))] + { + *(dirp.add(pos + 8) as *mut u64) = 280 //dbg!((*dirent).d_off); + } + #[cfg(target_os = "macos")] + { + *(dirp.add(pos + 8) as *mut u64) = if pos + 280 > count as usize { + count.into() + } else { + dbg!((*dirent).d_seekoff); + pos as u64 + 56 //280 + }; //; + } + dbg!((*dirent).d_namlen); + *(dirp.add(pos + 16) as *mut u16) = 280; //dbg!((*dirent).d_reclen); + *(dirp.add(pos + 18) as *mut u8) = dbg!((*dirent).d_type); + let upper_bound = std::cmp::min((*dirent).d_reclen, 255) as usize; + let mut i = 0; + while i < upper_bound { + *(dirp.add(pos + 19 + i) as *mut i8) = (*dirent).d_name[i]; + //dbg!((*dirent).d_name[i] as u8 as char); + //dbg!((*dirent).d_name[i] as u8 as char); + i += 1; + } + *(dirp.add(pos + 19 + i) as *mut i8) = 0 as i8; + } + dbg!("dirent written to memory"); + pos += 280; + /*unsafe { + eprintln!( + "{}", + std::ffi::CStr::from_bytes_with_nul_unchecked({ + let arr = *(dirent as *const u8 as *const [u8; 256]); + &arr.to_vec() + .into_iter() + .map(|b| b as u8) + .collect::>()[..20] + }) + .to_str() + .unwrap() + ); + }*/ + } + + dbg!(pos as i32) +} + /// fallocate pub fn ___syscall324(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int { debug!("emscripten::___syscall324 (fallocate) {}", _which); diff --git a/lib/emscripten/src/syscalls/windows.rs b/lib/emscripten/src/syscalls/windows.rs index a7ac8c11c30..a65fa733ecb 100644 --- a/lib/emscripten/src/syscalls/windows.rs +++ b/lib/emscripten/src/syscalls/windows.rs @@ -251,6 +251,12 @@ pub fn ___syscall196(_ctx: &mut Ctx, _one: i32, _two: i32) -> i32 { -1 } +// getdents +pub fn ___syscall220(_ctx: &mut Ctx, _one: i32, _two: i32) -> i32 { + debug!("emscripten::___syscall220"); + -1 +} + /// fchown pub fn ___syscall207(_ctx: &mut Ctx, _which: c_int, _varargs: VarArgs) -> c_int { debug!("emscripten::___syscall207 (fchown) {}", _which); diff --git a/lib/emscripten/src/utils.rs b/lib/emscripten/src/utils.rs index 2ad5b9a407c..090ac25d709 100644 --- a/lib/emscripten/src/utils.rs +++ b/lib/emscripten/src/utils.rs @@ -5,6 +5,7 @@ use libc::stat; use std::ffi::CStr; use std::mem::size_of; use std::os::raw::c_char; +use std::path::PathBuf; use std::slice; use wasmer_runtime_core::memory::Memory; use wasmer_runtime_core::{ @@ -204,6 +205,37 @@ pub fn read_string_from_wasm(memory: &Memory, offset: u32) -> String { String::from_utf8_lossy(&v).to_owned().to_string() } +/// This function trys to find an entry in mapdir +/// translating paths into their correct value +pub fn get_cstr_path(ctx: &mut Ctx, path: *const i8) -> Option { + let path_str = unsafe { std::ffi::CStr::from_ptr(path).to_str().unwrap() }.to_string(); + if let Some(val) = get_emscripten_data(ctx).mapped_dirs.get(&path_str) { + std::ffi::CString::new(val.to_string_lossy().as_bytes()).ok() + } else { + None + } +} + +/// gets the current directory +/// handles mapdir logic +pub fn get_current_directory(ctx: &mut Ctx) -> Option { + if let Some(val) = get_emscripten_data(ctx).mapped_dirs.get(".") { + return Some(val.clone()); + } + std::env::current_dir() + .map(|cwd| { + if let Some(val) = get_emscripten_data(ctx) + .mapped_dirs + .get(&cwd.to_string_lossy().to_string()) + { + val.clone() + } else { + cwd + } + }) + .ok() +} + #[cfg(test)] mod tests { use super::is_emscripten_module; diff --git a/src/bin/wasmer.rs b/src/bin/wasmer.rs index 00249ebf92c..cd43a5f0789 100644 --- a/src/bin/wasmer.rs +++ b/src/bin/wasmer.rs @@ -458,6 +458,7 @@ fn execute_wasm(options: &Run) -> Result<(), String> { }, options.args.iter().map(|arg| arg.as_str()).collect(), options.em_entrypoint.clone(), + mapped_dirs, ) .map_err(|e| format!("{:?}", e))?; } else { From 3a4517d5d6e0517450426d8fdf85e3a8ab9563f5 Mon Sep 17 00:00:00 2001 From: Mark McCaskey Date: Tue, 28 May 2019 10:06:22 -0700 Subject: [PATCH 2/6] clean up code, verify it behaves the same as emscripten with js --- lib/emscripten/src/syscalls/unix.rs | 57 +++++------------------------ 1 file changed, 10 insertions(+), 47 deletions(-) diff --git a/lib/emscripten/src/syscalls/unix.rs b/lib/emscripten/src/syscalls/unix.rs index 0283a756de4..81d3c667b23 100644 --- a/lib/emscripten/src/syscalls/unix.rs +++ b/lib/emscripten/src/syscalls/unix.rs @@ -815,70 +815,33 @@ pub fn ___syscall220(ctx: &mut Ctx, _which: i32, mut varargs: VarArgs) -> i32 { let dirp_addr: i32 = varargs.get(ctx); let count: u32 = varargs.get(ctx); - //let dir = dbg!(emscripten_memory_pointer!(ctx.memory(0), dbg!(fd)) as *mut libc::DIR); let dirp = emscripten_memory_pointer!(ctx.memory(0), dirp_addr) as *mut u8; - - let mut pos = 0; // need to persist stream across calls? - let dir: *mut libc::DIR = unsafe { libc::fdopendir(fd) }; - dbg!("Start loop"); - while pos + 280 <= dbg!(count) as usize { - dbg!("Pre readdir"); + let mut pos = 0; + let offset = 280; + while pos + offset <= count as usize { let dirent = unsafe { readdir(dir) }; - dbg!("post readdir"); if dirent.is_null() { break; } - dbg!("dirent is not null"); unsafe { - *(dirp.add(pos) as *mut u64) = dbg!((*dirent).d_ino); - #[cfg(not(target_os = "macos"))] - { - *(dirp.add(pos + 8) as *mut u64) = 280 //dbg!((*dirent).d_off); - } - #[cfg(target_os = "macos")] - { - *(dirp.add(pos + 8) as *mut u64) = if pos + 280 > count as usize { - count.into() - } else { - dbg!((*dirent).d_seekoff); - pos as u64 + 56 //280 - }; //; - } - dbg!((*dirent).d_namlen); - *(dirp.add(pos + 16) as *mut u16) = 280; //dbg!((*dirent).d_reclen); - *(dirp.add(pos + 18) as *mut u8) = dbg!((*dirent).d_type); - let upper_bound = std::cmp::min((*dirent).d_reclen, 255) as usize; + *(dirp.add(pos) as *mut u64) = (*dirent).d_ino; + *(dirp.add(pos + 8) as *mut u64) = pos as u64 + offset as u64; + *(dirp.add(pos + 16) as *mut u16) = offset as u16; + *(dirp.add(pos + 18) as *mut u8) = (*dirent).d_type; + let upper_bound = std::cmp::min((*dirent).d_reclen, 254) as usize; let mut i = 0; while i < upper_bound { *(dirp.add(pos + 19 + i) as *mut i8) = (*dirent).d_name[i]; - //dbg!((*dirent).d_name[i] as u8 as char); - //dbg!((*dirent).d_name[i] as u8 as char); i += 1; } *(dirp.add(pos + 19 + i) as *mut i8) = 0 as i8; } - dbg!("dirent written to memory"); - pos += 280; - /*unsafe { - eprintln!( - "{}", - std::ffi::CStr::from_bytes_with_nul_unchecked({ - let arr = *(dirent as *const u8 as *const [u8; 256]); - &arr.to_vec() - .into_iter() - .map(|b| b as u8) - .collect::>()[..20] - }) - .to_str() - .unwrap() - ); - }*/ + pos += offset; } - - dbg!(pos as i32) + pos as i32 } /// fallocate From 6606a7c7b0a73fe21054d487820448177bd81955 Mon Sep 17 00:00:00 2001 From: Mark McCaskey Date: Tue, 28 May 2019 14:06:24 -0700 Subject: [PATCH 3/6] silence clippy on pointer alignment --- lib/emscripten/src/syscalls/unix.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/emscripten/src/syscalls/unix.rs b/lib/emscripten/src/syscalls/unix.rs index 7907e764211..797878fa366 100644 --- a/lib/emscripten/src/syscalls/unix.rs +++ b/lib/emscripten/src/syscalls/unix.rs @@ -814,6 +814,7 @@ pub fn ___syscall220(ctx: &mut Ctx, _which: i32, mut varargs: VarArgs) -> i32 { if dirent.is_null() { break; } + #[allow(clippy::cast_ptr_alignment)] unsafe { *(dirp.add(pos) as *mut u64) = (*dirent).d_ino; *(dirp.add(pos + 8) as *mut u64) = pos as u64 + offset as u64; From 7a7aa4608aa25964b1a948aef2ea1b998c28c834 Mon Sep 17 00:00:00 2001 From: Mark McCaskey Date: Wed, 29 May 2019 11:41:29 -0700 Subject: [PATCH 4/6] fix emscripten regression tests --- lib/emscripten/tests/emtests/_common.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/emscripten/tests/emtests/_common.rs b/lib/emscripten/tests/emtests/_common.rs index 43c1aecbf72..993842a1818 100644 --- a/lib/emscripten/tests/emtests/_common.rs +++ b/lib/emscripten/tests/emtests/_common.rs @@ -56,6 +56,7 @@ macro_rules! assert_emscripten_output { $name, $args, None, + vec![], ).expect("run_emscripten_instance finishes"); let output = capturer.end().unwrap().0; From 281c5ff45d5c066e2e0344347da31e00a4b9de2f Mon Sep 17 00:00:00 2001 From: Mark McCaskey Date: Wed, 29 May 2019 14:20:52 -0700 Subject: [PATCH 5/6] fix bugs in em mapdir, improve it for relative paths, use it more --- lib/emscripten/src/syscalls/mod.rs | 47 +++++++++++------ lib/emscripten/src/syscalls/unix.rs | 73 ++++++++++++++++++++------ lib/emscripten/src/syscalls/windows.rs | 20 +++++-- lib/emscripten/src/utils.rs | 32 +++++++++-- src/bin/wasmer.rs | 3 -- 5 files changed, 131 insertions(+), 44 deletions(-) diff --git a/lib/emscripten/src/syscalls/mod.rs b/lib/emscripten/src/syscalls/mod.rs index 05766bf7943..94f11894732 100644 --- a/lib/emscripten/src/syscalls/mod.rs +++ b/lib/emscripten/src/syscalls/mod.rs @@ -96,9 +96,12 @@ pub fn ___syscall6(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int pub fn ___syscall12(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int { debug!("emscripten::___syscall12 (chdir) {}", _which); let path_ptr = varargs.get_str(ctx); - let real_path = get_cstr_path(ctx, path_ptr) - .map(|cstr| cstr.as_c_str() as *const _ as *const i8) - .unwrap_or(path_ptr); + let real_path_owned = get_cstr_path(ctx, path_ptr); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + path_ptr + }; let ret = unsafe { chdir(real_path) }; debug!( "=> path: {:?}, ret: {}", @@ -129,12 +132,18 @@ pub fn ___syscall38(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> i32 { debug!("emscripten::___syscall38 (rename)"); let old_path = varargs.get_str(ctx); let new_path = varargs.get_str(ctx); - let real_old_path = get_cstr_path(ctx, old_path) - .map(|cstr| cstr.as_c_str() as *const _ as *const i8) - .unwrap_or(old_path); - let real_new_path = get_cstr_path(ctx, new_path) - .map(|cstr| cstr.as_c_str() as *const _ as *const i8) - .unwrap_or(new_path); + let real_old_path_owned = get_cstr_path(ctx, old_path); + let real_old_path = if let Some(ref rp) = real_old_path_owned { + rp.as_c_str().as_ptr() + } else { + old_path + }; + let real_new_path_owned = get_cstr_path(ctx, new_path); + let real_new_path = if let Some(ref rp) = real_new_path_owned { + rp.as_c_str().as_ptr() + } else { + new_path + }; let result = unsafe { rename(real_old_path, real_new_path) }; debug!( "=> old_path: {}, new_path: {}, result: {}", @@ -149,9 +158,12 @@ pub fn ___syscall38(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> i32 { pub fn ___syscall40(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int { debug!("emscripten::___syscall40 (rmdir)"); let pathname_addr = varargs.get_str(ctx); - let real_path = get_cstr_path(ctx, pathname_addr) - .map(|cstr| cstr.as_c_str() as *const _ as *const i8) - .unwrap_or(pathname_addr); + let real_path_owned = get_cstr_path(ctx, pathname_addr); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + pathname_addr + }; unsafe { rmdir(real_path) } } @@ -433,16 +445,19 @@ pub fn ___syscall195(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_in let pathname_addr = varargs.get_str(ctx); let buf: u32 = varargs.get(ctx); - let real_path = get_cstr_path(ctx, pathname_addr) - .map(|cstr| cstr.as_c_str() as *const _ as *const i8) - .unwrap_or(pathname_addr); + let real_path_owned = get_cstr_path(ctx, pathname_addr); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + pathname_addr + }; unsafe { let mut _stat: stat = std::mem::zeroed(); let ret = stat(real_path, &mut _stat); debug!( "=> pathname: {}, buf: {} = {}, last os error: {}", - std::ffi::CStr::from_ptr(pathname_addr).to_str().unwrap(), + std::ffi::CStr::from_ptr(real_path).to_str().unwrap(), buf, ret, Error::last_os_error() diff --git a/lib/emscripten/src/syscalls/unix.rs b/lib/emscripten/src/syscalls/unix.rs index 797878fa366..1c4db071b53 100644 --- a/lib/emscripten/src/syscalls/unix.rs +++ b/lib/emscripten/src/syscalls/unix.rs @@ -108,9 +108,12 @@ pub fn ___syscall5(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int let pathname_addr = varargs.get_str(ctx); let flags: i32 = varargs.get(ctx); let mode: u32 = varargs.get(ctx); - let real_path = utils::get_cstr_path(ctx, pathname_addr) - .map(|cstr| cstr.as_c_str() as *const _ as *const i8) - .unwrap_or(pathname_addr); + let real_path_owned = utils::get_cstr_path(ctx, pathname_addr); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + pathname_addr + }; let _path_str = unsafe { std::ffi::CStr::from_ptr(real_path).to_str().unwrap() }; let fd = unsafe { open(real_path, flags, mode) }; debug!( @@ -158,12 +161,18 @@ pub fn ___syscall83(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int let path1 = varargs.get_str(ctx); let path2 = varargs.get_str(ctx); - let real_path1 = utils::get_cstr_path(ctx, path1) - .map(|cstr| cstr.as_c_str() as *const _ as *const i8) - .unwrap_or(path1); - let real_path2 = utils::get_cstr_path(ctx, path2) - .map(|cstr| cstr.as_c_str() as *const _ as *const i8) - .unwrap_or(path2); + let real_path1_owned = utils::get_cstr_path(ctx, path1); + let real_path1 = if let Some(ref rp) = real_path1_owned { + rp.as_c_str().as_ptr() + } else { + path1 + }; + let real_path2_owned = utils::get_cstr_path(ctx, path2); + let real_path2 = if let Some(ref rp) = real_path2_owned { + rp.as_c_str().as_ptr() + } else { + path2 + }; let result = unsafe { symlink(real_path1, real_path2) }; debug!( "=> path1: {}, path2: {}, result: {}", @@ -191,12 +200,18 @@ pub fn ___syscall194(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_in pub fn ___syscall198(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int { debug!("emscripten::___syscall198 (lchown) {}", _which); let path_ptr = varargs.get_str(ctx); + let real_path_owned = utils::get_cstr_path(ctx, path_ptr); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + path_ptr + }; let uid: uid_t = varargs.get(ctx); let gid: gid_t = varargs.get(ctx); - let result = unsafe { lchown(path_ptr, uid, gid) }; + let result = unsafe { lchown(real_path, uid, gid) }; debug!( "=> path: {}, uid: {}, gid: {}, result: {}", - unsafe { std::ffi::CStr::from_ptr(path_ptr).to_str().unwrap() }, + unsafe { std::ffi::CStr::from_ptr(real_path).to_str().unwrap() }, uid, gid, result, @@ -226,10 +241,16 @@ pub fn ___syscall212(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_in debug!("emscripten::___syscall212 (chown) {}", _which); let pathname_addr = varargs.get_str(ctx); + let real_path_owned = utils::get_cstr_path(ctx, pathname_addr); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + pathname_addr + }; let owner: u32 = varargs.get(ctx); let group: u32 = varargs.get(ctx); - unsafe { chown(pathname_addr, owner, group) } + unsafe { chown(real_path, owner, group) } } /// madvise @@ -249,11 +270,17 @@ pub fn ___syscall219(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_in pub fn ___syscall33(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int { debug!("emscripten::___syscall33 (access) {}", _which); let path = varargs.get_str(ctx); + let real_path_owned = utils::get_cstr_path(ctx, path); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + path + }; let amode: c_int = varargs.get(ctx); - let result = unsafe { access(path, amode) }; + let result = unsafe { access(real_path, amode) }; debug!( "=> path: {}, amode: {}, result: {}", - unsafe { std::ffi::CStr::from_ptr(path).to_str().unwrap() }, + unsafe { std::ffi::CStr::from_ptr(real_path).to_str().unwrap() }, amode, result ); @@ -271,8 +298,14 @@ pub fn ___syscall34(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int pub fn ___syscall39(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_int { debug!("emscripten::___syscall39 (mkdir) {}", _which); let pathname_addr = varargs.get_str(ctx); + let real_path_owned = utils::get_cstr_path(ctx, pathname_addr); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + pathname_addr + }; let mode: u32 = varargs.get(ctx); - unsafe { mkdir(pathname_addr, mode as _) } + unsafe { mkdir(real_path, mode as _) } } /// dup @@ -771,6 +804,12 @@ pub fn ___syscall122(ctx: &mut Ctx, _which: c_int, mut varargs: VarArgs) -> c_in pub fn ___syscall196(ctx: &mut Ctx, _which: i32, mut varargs: VarArgs) -> i32 { debug!("emscripten::___syscall196 (lstat64) {}", _which); let path = varargs.get_str(ctx); + let real_path_owned = utils::get_cstr_path(ctx, path); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + path + }; let buf_ptr: u32 = varargs.get(ctx); unsafe { let mut stat: stat = std::mem::zeroed(); @@ -781,9 +820,9 @@ pub fn ___syscall196(ctx: &mut Ctx, _which: i32, mut varargs: VarArgs) -> i32 { let stat_ptr = &mut stat as *mut stat; #[cfg(target_os = "macos")] - let ret = lstat64(path, stat_ptr); + let ret = lstat64(real_path, stat_ptr); #[cfg(not(target_os = "macos"))] - let ret = lstat(path, stat_ptr); + let ret = lstat(real_path, stat_ptr); debug!("ret: {}", ret); if ret != 0 { diff --git a/lib/emscripten/src/syscalls/windows.rs b/lib/emscripten/src/syscalls/windows.rs index 1355ebb1cdf..b3f7b748fd7 100644 --- a/lib/emscripten/src/syscalls/windows.rs +++ b/lib/emscripten/src/syscalls/windows.rs @@ -1,4 +1,4 @@ -use crate::utils::copy_cstr_into_wasm; +use crate::utils::{copy_cstr_into_wasm, get_cstr_path}; use crate::varargs::VarArgs; use libc::mkdir; use libc::open; @@ -19,9 +19,15 @@ pub fn ___syscall5(ctx: &mut Ctx, which: c_int, mut varargs: VarArgs) -> c_int { #[cfg(not(feature = "debug"))] let _ = which; let pathname_addr = varargs.get_str(ctx); + let real_path_owned = get_cstr_path(ctx, pathname_addr); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + pathname_addr + }; let flags: i32 = varargs.get(ctx); let mode: u32 = varargs.get(ctx); - let path_str = unsafe { std::ffi::CStr::from_ptr(pathname_addr).to_str().unwrap() }; + let path_str = unsafe { std::ffi::CStr::from_ptr(real_path).to_str().unwrap() }; match path_str { "/dev/urandom" => { // create a fake urandom file for windows, super hacky @@ -47,7 +53,7 @@ pub fn ___syscall5(ctx: &mut Ctx, which: c_int, mut varargs: VarArgs) -> c_int { fd } _ => { - let fd = unsafe { open(pathname_addr, flags, mode) }; + let fd = unsafe { open(real_path, flags, mode) }; debug!( "=> pathname: {}, flags: {}, mode: {} = fd: {}\npath: {}", path_str, flags, mode, fd, path_str @@ -95,7 +101,13 @@ pub fn ___syscall39(ctx: &mut Ctx, which: c_int, mut varargs: VarArgs) -> c_int #[cfg(not(feature = "debug"))] let _ = which; let pathname_addr = varargs.get_str(ctx); - unsafe { mkdir(pathname_addr) } + let real_path_owned = get_cstr_path(ctx, pathname_addr); + let real_path = if let Some(ref rp) = real_path_owned { + rp.as_c_str().as_ptr() + } else { + pathname_addr + }; + unsafe { mkdir(real_path) } } /// dup diff --git a/lib/emscripten/src/utils.rs b/lib/emscripten/src/utils.rs index 090ac25d709..1127281307d 100644 --- a/lib/emscripten/src/utils.rs +++ b/lib/emscripten/src/utils.rs @@ -208,12 +208,36 @@ pub fn read_string_from_wasm(memory: &Memory, offset: u32) -> String { /// This function trys to find an entry in mapdir /// translating paths into their correct value pub fn get_cstr_path(ctx: &mut Ctx, path: *const i8) -> Option { + use std::collections::VecDeque; + let path_str = unsafe { std::ffi::CStr::from_ptr(path).to_str().unwrap() }.to_string(); - if let Some(val) = get_emscripten_data(ctx).mapped_dirs.get(&path_str) { - std::ffi::CString::new(val.to_string_lossy().as_bytes()).ok() - } else { - None + let data = get_emscripten_data(ctx); + let path = PathBuf::from(path_str); + let mut prefix_added = false; + let mut components = path.components().collect::>(); + // TODO(mark): handle absolute/non-canonical/non-relative paths too (this + // functionality should be shared among the abis) + if components.len() == 1 { + components.push_front(std::path::Component::CurDir); + prefix_added = true; + } + let mut cumulative_path = PathBuf::new(); + for c in components.into_iter() { + cumulative_path.push(c); + if let Some(val) = data + .mapped_dirs + .get(&cumulative_path.to_string_lossy().to_string()) + { + let rest_of_path = if !prefix_added { + path.strip_prefix(cumulative_path).ok()? + } else { + &path + }; + let rebased_path = val.join(rest_of_path); + return std::ffi::CString::new(rebased_path.to_string_lossy().as_bytes()).ok(); + } } + None } /// gets the current directory diff --git a/src/bin/wasmer.rs b/src/bin/wasmer.rs index cd43a5f0789..6f835b4ecfb 100644 --- a/src/bin/wasmer.rs +++ b/src/bin/wasmer.rs @@ -445,9 +445,6 @@ fn execute_wasm(options: &Run) -> Result<(), String> { .instantiate(&import_object) .map_err(|e| format!("Can't instantiate module: {:?}", e))?; - if !mapped_dirs.is_empty() { - eprintln!("WARN: mapdir is not implemented for emscripten targets"); - } wasmer_emscripten::run_emscripten_instance( &module, &mut instance, From 4c8915efd842a3fb36345ad3af498e9517c36c1b Mon Sep 17 00:00:00 2001 From: Mark McCaskey Date: Wed, 29 May 2019 14:35:58 -0700 Subject: [PATCH 6/6] add mapdir pr to changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93857f7d870..35838a19b60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Blocks of changes will separated by version increments. ## **[Unreleased]** +- [#470](https://github.com/wasmerio/wasmer/pull/470) Add mapdir support to Emscripten, implement getdents for Unix - [#467](https://github.com/wasmerio/wasmer/pull/467) `wasmer_instantiate` returns better error messages in the runtime C API - [#463](https://github.com/wasmerio/wasmer/pull/463) Fix bug in WASI path_open allowing one level above preopened dir to be accessed - [#461](https://github.com/wasmerio/wasmer/pull/461) Prevent passing negative lengths in various places in the runtime C API