Skip to content

Commit 0e5b029

Browse files
kpamnanyNHDalyIanButterworth
authored
Redact object data in heap snapshots, with option to opt-out (JuliaLang#55326) (#174)
The contents of strings can contain user data which may be proprietary and emitting them in the heap snapshot makes the heap snapshot a potential vulnerability rather than a useful debugging artifact. There are likely other tweaks necessary to make heap snapshots "safe", but this is one less. --------- Co-authored-by: Nathan Daly <[email protected]> Co-authored-by: Ian Butterworth <[email protected]>
1 parent 53e658c commit 0e5b029

File tree

4 files changed

+40
-18
lines changed

4 files changed

+40
-18
lines changed

src/gc-heap-snapshot.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ struct HeapSnapshot {
183183
// global heap snapshot, mutated by garbage collector
184184
// when snapshotting is on.
185185
int gc_heap_snapshot_enabled = 0;
186+
int gc_heap_snapshot_redact_data = 0;
186187
HeapSnapshot *g_snapshot = nullptr;
187188
extern jl_mutex_t heapsnapshot_lock;
188189

@@ -195,7 +196,7 @@ void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT;
195196

196197

197198
JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
198-
ios_t *strings, ios_t *json, char all_one)
199+
ios_t *strings, ios_t *json, char all_one, char redact_data)
199200
{
200201
HeapSnapshot snapshot;
201202
snapshot.nodes = nodes;
@@ -207,6 +208,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
207208

208209
// Enable snapshotting
209210
g_snapshot = &snapshot;
211+
gc_heap_snapshot_redact_data = redact_data;
210212
gc_heap_snapshot_enabled = true;
211213

212214
_add_synthetic_root_entries(&snapshot);
@@ -216,6 +218,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
216218

217219
// Disable snapshotting
218220
gc_heap_snapshot_enabled = false;
221+
gc_heap_snapshot_redact_data = 0;
219222
g_snapshot = nullptr;
220223

221224
jl_mutex_unlock(&heapsnapshot_lock);
@@ -328,7 +331,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
328331

329332
if (jl_is_string(a)) {
330333
node_type = "String";
331-
name = jl_string_data(a);
334+
name = gc_heap_snapshot_redact_data ? "<redacted>" : jl_string_data(a);
332335
self_size = jl_string_len(a);
333336
}
334337
else if (jl_is_symbol(a)) {

src/gc-heap-snapshot.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t i
121121
// Functions to call from Julia to take heap snapshot
122122
// ---------------------------------------------------------------------
123123
JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
124-
ios_t *strings, ios_t *json, char all_one);
124+
ios_t *strings, ios_t *json, char all_one, char redact_data);
125125

126126

127127
#ifdef __cplusplus

stdlib/Profile/src/Profile.jl

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,8 +1220,10 @@ end
12201220

12211221

12221222
"""
1223-
Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false)
1224-
Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false)
1223+
Profile.take_heap_snapshot(filepath::String, all_one::Bool=false;
1224+
redact_data::Bool=true, streaming::Bool=false)
1225+
Profile.take_heap_snapshot(all_one::Bool=false; redact_data:Bool=true,
1226+
dir::String=nothing, streaming::Bool=false)
12251227
12261228
Write a snapshot of the heap, in the JSON format expected by the Chrome
12271229
Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
@@ -1232,6 +1234,8 @@ full file path, or IO stream.
12321234
If `all_one` is true, then report the size of every object as one so they can be easily
12331235
counted. Otherwise, report the actual size.
12341236
1237+
If `redact_data` is true (default), then do not emit the contents of any object.
1238+
12351239
If `streaming` is true, we will stream the snapshot data out into four files, using filepath
12361240
as the prefix, to avoid having to hold the entire snapshot in memory. This option should be
12371241
used for any setting where your memory is constrained. These files can then be reassembled
@@ -1247,27 +1251,27 @@ backwards-compatibility) and your process is killed, note that this will always
12471251
parts in the same directory as your provided filepath, so you can still reconstruct the
12481252
snapshot after the fact, via `assemble_snapshot()`.
12491253
"""
1250-
function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false)
1254+
function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; redact_data::Bool=true, streaming::Bool=false)
12511255
if streaming
1252-
_stream_heap_snapshot(filepath, all_one)
1256+
_stream_heap_snapshot(filepath, all_one, redact_data)
12531257
else
12541258
# Support the legacy, non-streaming mode, by first streaming the parts, then
12551259
# reassembling it after we're done.
12561260
prefix = filepath
1257-
_stream_heap_snapshot(prefix, all_one)
1261+
_stream_heap_snapshot(prefix, all_one, redact_data)
12581262
Profile.HeapSnapshot.assemble_snapshot(prefix, filepath)
12591263
end
12601264
return filepath
12611265
end
1262-
function take_heap_snapshot(io::IO, all_one::Bool=false)
1266+
function take_heap_snapshot(io::IO, all_one::Bool=false; redact_data::Bool=true)
12631267
# Support the legacy, non-streaming mode, by first streaming the parts to a tempdir,
12641268
# then reassembling it after we're done.
12651269
dir = tempdir()
12661270
prefix = joinpath(dir, "snapshot")
1267-
_stream_heap_snapshot(prefix, all_one)
1271+
_stream_heap_snapshot(prefix, all_one, redact_data)
12681272
Profile.HeapSnapshot.assemble_snapshot(prefix, io)
12691273
end
1270-
function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
1274+
function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool, redact_data::Bool)
12711275
# Nodes and edges are binary files
12721276
open("$prefix.nodes", "w") do nodes
12731277
open("$prefix.edges", "w") do edges
@@ -1280,9 +1284,9 @@ function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
12801284
Base.@_lock_ios(json,
12811285
ccall(:jl_gc_take_heap_snapshot,
12821286
Cvoid,
1283-
(Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar),
1287+
(Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar, Cchar),
12841288
nodes.handle, edges.handle, strings.handle, json.handle,
1285-
Cchar(all_one))
1289+
Cchar(all_one), Cchar(redact_data))
12861290
)
12871291
)
12881292
)
@@ -1292,7 +1296,7 @@ function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
12921296
end
12931297
end
12941298
end
1295-
function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString}
1299+
function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing, kwargs...) where {S <: AbstractString}
12961300
fname = "$(getpid())_$(time_ns()).heapsnapshot"
12971301
if isnothing(dir)
12981302
wd = pwd()
@@ -1307,7 +1311,7 @@ function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing)
13071311
else
13081312
fpath = joinpath(expanduser(dir), fname)
13091313
end
1310-
return take_heap_snapshot(fpath, all_one)
1314+
return take_heap_snapshot(fpath, all_one; kwargs...)
13111315
end
13121316

13131317
"""

stdlib/Profile/test/runtests.jl

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,16 +280,31 @@ end
280280

281281
@testset "HeapSnapshot" begin
282282
tmpdir = mktempdir()
283+
284+
# ensure that we can prevent redacting data
283285
fname = cd(tmpdir) do
284-
read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; print(Profile.take_heap_snapshot())"`, String)
286+
read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot(; redact_data=false))"`, String)
285287
end
286288

287289
@test isfile(fname)
288290

289-
open(fname) do fs
290-
@test readline(fs) != ""
291+
sshot = read(fname, String)
292+
@test sshot != ""
293+
@test contains(sshot, "redact_this")
294+
295+
rm(fname)
296+
297+
# ensure that string data is redacted by default
298+
fname = cd(tmpdir) do
299+
read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot())"`, String)
291300
end
292301

302+
@test isfile(fname)
303+
304+
sshot = read(fname, String)
305+
@test sshot != ""
306+
@test !contains(sshot, "redact_this")
307+
293308
rm(fname)
294309
rm(tmpdir, force = true, recursive = true)
295310
end

0 commit comments

Comments
 (0)