Skip to content

Commit

Permalink
analyze: memcpy rewrite improvements (#1176)
Browse files Browse the repository at this point in the history
This branch has several improvements to the rewriting of `memcpy` and
`memset`:

* Support `memcpy` on `Quantity::Single` references (`&T`/`&mut T`, as
opposed to `&[T]`/`&mut [T]`). The reference is automatically converted
to a 1-element slice. The `copy_from_slice` call will panic with an
out-of-bounds error if the number of elements to copy is not 0 or 1.
* Support `memcpy` on `Option` references. If the source or destination
pointer is `None`, then this will panic if the element count is nonzero,
and otherwise will do nothing.
* Use `mem::size_of::<T>()` instead of the original size of `T` for
converting `memcpy`/`memset` byte counts to element counts.

The `size_of` change is a bit subtle. In C, if `sizeof(struct foo) ==
16`, it's legal to `memcpy` an array of `struct foo` using `n * 16` as
the byte length (or `n * SIZE_OF_FOO`, where `SIZE_OF_FOO` is
`#define`'d to be 16) instead of `n * sizeof(struct foo)`. This presents
a problem for us because `c2rust-analyze` may change the size of `foo`
when rewriting its pointer fields. After rewriting, the `n * 16` version
computes a byte length based on the original size for `struct foo`,
while the `n * sizeof(struct foo)` computes it using the rewritten size.
For converting the byte length to an element count, we previously used
the original size, which works for the `n * 16` version but not for `n *
sizeof(struct foo)`. This branch changes the element count calculation
to divide by the rewritten size instead, which works for `n *
sizeof(struct foo)` but not for `n * 16`. The hope is that `n *
sizeof(struct foo)` is more common, since it's usually preferred in C
for portability reasons.
  • Loading branch information
spernsteiner authored Dec 3, 2024
2 parents 33465fe + 888dae9 commit a96f324
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 79 deletions.
14 changes: 14 additions & 0 deletions c2rust-analyze/src/rewrite/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,20 @@ impl<S: Sink> Emitter<'_, S> {
self.emit(rw, 0)
}

Rewrite::Match(ref expr, ref cases) => {
self.emit_str("match ")?;
self.emit(expr, 0)?;
self.emit_str(" {\n")?;
for &(ref pat, ref body) in cases {
self.emit_str(" ")?;
self.emit_str(pat)?;
self.emit_str(" => ")?;
self.emit(body, 0)?;
self.emit_str(",\n")?;
}
self.emit_str("}")
}

Rewrite::TyPtr(ref rw, mutbl) => {
match mutbl {
Mutability::Not => self.emit_str("*const ")?,
Expand Down
138 changes: 107 additions & 31 deletions c2rust-analyze/src/rewrite/expr/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,38 +224,96 @@ impl<'tcx> ConvertVisitor<'tcx> {
}

mir_op::RewriteKind::MemcpySafe {
elem_size,
ref elem_ty,
dest_single,
dest_option,
src_single,
src_option,
} => {
// `memcpy(dest, src, n)` to a `copy_from_slice` call
// `memcpy(dest, src, byte_len)` to a `copy_from_slice` call
assert!(matches!(hir_rw, Rewrite::Identity));
assert!(!dest_single, "&T -> &[T] conversion for memcpy dest NYI");
assert!(!src_single, "&T -> &[T] conversion for memcpy src NYI");
Rewrite::Block(
vec![
Rewrite::Let(vec![
("dest".into(), self.get_subexpr(ex, 0)),
("src".into(), self.get_subexpr(ex, 1)),
("byte_len".into(), self.get_subexpr(ex, 2)),
]),
Rewrite::Let(vec![(
"n".into(),
format_rewrite!("byte_len as usize / {elem_size}"),
)]),
Rewrite::MethodCall(
"copy_from_slice".into(),
Box::new(format_rewrite!("dest[..n]")),
vec![format_rewrite!("&src[..n]")],
),
],
Some(Box::new(format_rewrite!("dest"))),
)
let mut stmts = Vec::with_capacity(6);

stmts.push(Rewrite::Let(vec![
("dest".into(), self.get_subexpr(ex, 0)),
("src".into(), self.get_subexpr(ex, 1)),
("byte_len".into(), self.get_subexpr(ex, 2)),
]));
// Best-effort check to detect size mismatches. This can happen if we infer the
// wrong pointee type, or if the C code used a hardcoded size for `elem_ty` but we
// changed its size during rewriting, or possibly other cases. These errors could
// potentially cause too few items to be copied, introducing a subtle logic error;
// this assertion tries to catch this situation early so it's easier to diagnose.
stmts.push(format_rewrite!(
"assert_eq!(byte_len as usize % std::mem::size_of::<{elem_ty}>(), 0)"
));
stmts.push(Rewrite::Let(vec![(
"n".into(),
format_rewrite!("byte_len as usize / std::mem::size_of::<{elem_ty}>()"),
)]));
let mut convert = |var: &str, is_mut, is_single, is_option| {
let single_to_slice = if is_single {
if is_mut {
format_rewrite!("std::slice::from_mut({var})")
} else {
format_rewrite!("std::slice::from_ref({var})")
}
} else {
Rewrite::Text(var.into())
};
let rhs = if is_option {
// ```
// match var {
// Some(x) => x, // or slice::from_ref(x), etc
// None => { assert_eq!(n, 0); &[] },
// }
// ```
let empty_slice = if is_mut {
format_rewrite!("&mut []")
} else {
format_rewrite!("&[]")
};
Rewrite::Match(
Box::new(Rewrite::Text(var.into())),
vec![
(format!("Some({var})"), single_to_slice),
(
"None".into(),
Rewrite::Block(
vec![format_rewrite!("assert_eq!(n, 0)")],
Some(Box::new(empty_slice)),
),
),
],
)
} else {
single_to_slice
};
stmts.push(Rewrite::Let1(var.into(), Box::new(rhs)));
};
convert("dest", true, dest_single, dest_option);
convert("src", false, src_single, src_option);
// `dest[..n].copy_from_slice(&src[..n]);`
stmts.push(Rewrite::MethodCall(
"copy_from_slice".into(),
Box::new(format_rewrite!("dest[..n]")),
vec![format_rewrite!("&src[..n]")],
));

// TODO: `memcpy` cases that actually use the return value are only partially
// supported. Currently we always return `&mut [T]`, which may not match the
// permissions on the output. Doing this correctly would require saving the
// original `dest` and then applying `slice::from_mut`, `OptionDowngrade`, and/or
// `DynOwnedDowngrade` to get `&mut [T]` for the call to `copy_from_slice`. This
// would allow ownership to flow from `p` to `q` in `q = memcpy(p, ...)`, for
// example. Fortunately, most code just uses `memcpy` for its side effect and
// ignores the return value.
Rewrite::Block(stmts, Some(Box::new(format_rewrite!("dest"))))
}

mir_op::RewriteKind::MemsetZeroize {
ref zero_ty,
elem_size,
ref elem_ty,
dest_single,
} => {
// `memset(dest, 0, n)` to assignments that zero out each field of `*dest`
Expand All @@ -275,9 +333,13 @@ impl<'tcx> ConvertVisitor<'tcx> {
("val".into(), self.get_subexpr(ex, 1)),
("byte_len".into(), self.get_subexpr(ex, 2)),
]),
// Best-effort check to detect size mismatches, as in `MemcpySafe`.
format_rewrite!(
"assert_eq!(byte_len as usize % std::mem::size_of::<{elem_ty}>(), 0)"
),
Rewrite::Let(vec![(
"n".into(),
format_rewrite!("byte_len as usize / {elem_size}"),
format_rewrite!("byte_len as usize / std::mem::size_of::<{elem_ty}>()"),
)]),
format_rewrite!("assert_eq!(val, 0, \"non-zero memset NYI\")"),
zeroize_body,
Expand All @@ -288,12 +350,12 @@ impl<'tcx> ConvertVisitor<'tcx> {

mir_op::RewriteKind::MallocSafe {
ref zero_ty,
elem_size,
ref elem_ty,
single,
}
| mir_op::RewriteKind::CallocSafe {
ref zero_ty,
elem_size,
ref elem_ty,
single,
} => {
// `malloc(n)` -> `Box::new(z)` or similar
Expand All @@ -302,17 +364,25 @@ impl<'tcx> ConvertVisitor<'tcx> {
let mut stmts = match *rw {
mir_op::RewriteKind::MallocSafe { .. } => vec![
Rewrite::Let(vec![("byte_len".into(), self.get_subexpr(ex, 0))]),
// Best-effort check to detect size mismatches, as in `MemcpySafe`.
format_rewrite!(
"assert_eq!(byte_len as usize % std::mem::size_of::<{elem_ty}>(), 0)"
),
Rewrite::Let1(
"n".into(),
Box::new(format_rewrite!("byte_len as usize / {elem_size}")),
Box::new(format_rewrite!(
"byte_len as usize / std::mem::size_of::<{elem_ty}>()"
)),
),
],
mir_op::RewriteKind::CallocSafe { .. } => vec![
Rewrite::Let(vec![
("count".into(), self.get_subexpr(ex, 0)),
("size".into(), self.get_subexpr(ex, 1)),
]),
format_rewrite!("assert_eq!(size, {elem_size})"),
format_rewrite!(
"assert_eq!(size as usize, std::mem::size_of::<{elem_ty}>())"
),
Rewrite::Let1("n".into(), Box::new(format_rewrite!("count as usize"))),
],
_ => unreachable!(),
Expand Down Expand Up @@ -342,7 +412,7 @@ impl<'tcx> ConvertVisitor<'tcx> {

mir_op::RewriteKind::ReallocSafe {
ref zero_ty,
elem_size,
ref elem_ty,
src_single,
dest_single,
option,
Expand All @@ -355,9 +425,15 @@ impl<'tcx> ConvertVisitor<'tcx> {
("src_ptr".into(), self.get_subexpr(ex, 0)),
("dest_byte_len".into(), self.get_subexpr(ex, 1)),
]),
// Best-effort check to detect size mismatches, as in `MemcpySafe`.
format_rewrite!(
"assert_eq!(dest_byte_len as usize % std::mem::size_of::<{elem_ty}>(), 0)"
),
Rewrite::Let1(
"dest_n".into(),
Box::new(format_rewrite!("dest_byte_len as usize / {elem_size}")),
Box::new(format_rewrite!(
"dest_byte_len as usize / std::mem::size_of::<{elem_ty}>()"
)),
),
];
if dest_single {
Expand Down
Loading

0 comments on commit a96f324

Please sign in to comment.