Skip to content

Commit e86d9f8

Browse files
authored
refactor: detect large files in Io functions (#9828)
`Io.read_all` and related functions read the contents of a file in a string, which has a size limit (`Sys.max_string_length`) and can be an issue in 32-bit systems. This makes an explicit check and raises a `Code_error` in these situations. Signed-off-by: Etienne Millon <[email protected]>
1 parent 46624a5 commit e86d9f8

File tree

3 files changed

+30
-6
lines changed

3 files changed

+30
-6
lines changed

otherlibs/stdune/src/io.ml

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ struct
275275
if r = len then Bytes.unsafe_to_string buf else Bytes.sub_string buf ~pos:0 ~len:r
276276
;;
277277

278-
let read_all =
278+
let read_all_unless_large =
279279
(* We use 65536 because that is the size of OCaml's IO buffers. *)
280280
let chunk_size = 65536 in
281281
(* Generic function for channels such that seeking is unsupported or
@@ -286,7 +286,7 @@ struct
286286
loop ()
287287
in
288288
try loop () with
289-
| End_of_file -> Buffer.contents buffer
289+
| End_of_file -> Ok (Buffer.contents buffer)
290290
in
291291
fun t ->
292292
(* Optimisation for regular files: if the channel supports seeking, we
@@ -295,6 +295,7 @@ struct
295295
regular files so this optimizations seems worth it. *)
296296
match in_channel_length t with
297297
| exception Sys_error _ -> read_all_generic t (Buffer.create chunk_size)
298+
| n when n > Sys.max_string_length -> Error ()
298299
| n ->
299300
(* For some files [in_channel_length] returns an invalid value. For
300301
instance for files in /proc it returns [0] and on Windows the
@@ -307,7 +308,7 @@ struct
307308
end of the file *)
308309
let s = eagerly_input_string t n in
309310
(match input_char t with
310-
| exception End_of_file -> s
311+
| exception End_of_file -> Ok s
311312
| c ->
312313
(* The [+ chunk_size] is to make sure there is at least [chunk_size]
313314
free space so that the first [Buffer.add_channel buffer t
@@ -318,7 +319,17 @@ struct
318319
read_all_generic t buffer)
319320
;;
320321

321-
let read_file ?binary fn = with_file_in fn ~f:read_all ?binary
322+
let path_to_dyn path = String.to_dyn (Path.to_string path)
323+
324+
let read_file ?binary fn =
325+
match with_file_in fn ~f:read_all_unless_large ?binary with
326+
| Ok x -> x
327+
| Error () ->
328+
Code_error.raise
329+
"read_file: file is larger than Sys.max_string_length"
330+
[ "fn", path_to_dyn fn ]
331+
;;
332+
322333
let lines_of_file fn = with_file_in fn ~f:input_lines ~binary:false
323334
let zero_strings_of_file fn = with_file_in fn ~f:input_zero_separated ~binary:true
324335

otherlibs/stdune/src/io.mli

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,16 @@ val input_lines : in_channel -> string list
99
unrelated channels because it uses a statically-allocated global buffer. *)
1010
val copy_channels : in_channel -> out_channel -> unit
1111

12-
val read_all : in_channel -> string
12+
(** Try to read everything from a channel. Returns [Error ()] if the contents
13+
are larger than [Sys.max_string_length]. This is generally a problem only
14+
on 32-bit systems.
15+
Overflow detection does not happen in the following cases:
16+
- channel is not a file (for example, a pipe)
17+
- if the detected size is unreliable (/proc)
18+
- race condition with another process changing the size of the underlying
19+
file.
20+
In these cases, an exception might be raised by [Buffer] functions. *)
21+
val read_all_unless_large : in_channel -> (string, unit) result
1322

1423
include Io_intf.S with type path = Path.t
1524
module String_path : Io_intf.S with type path = string

test/blackbox-tests/utils/melc_stdlib_prefix.ml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@ open Stdune
22

33
let command cmd args =
44
let p = Unix.open_process_args_in cmd (Array.of_list (cmd :: args)) in
5-
let output = Io.read_all p in
5+
let output =
6+
match Io.read_all_unless_large p with
7+
| Ok x -> x
8+
| Error () -> assert false
9+
in
610
match Unix.close_process_in p with
711
| WEXITED n when n = 0 -> Ok output
812
| WEXITED n -> Error n

0 commit comments

Comments
 (0)