Skip to content

Commit ebfcece

Browse files
kpamnanyNHDalyIanButterworth
authored andcommitted
Redact object data in heap snapshots, with option to opt-out (JuliaLang#55326) (#174)
The contents of strings can contain user data which may be proprietary and emitting them in the heap snapshot makes the heap snapshot a potential vulnerability rather than a useful debugging artifact. There are likely other tweaks necessary to make heap snapshots "safe", but this is one less. --------- Co-authored-by: Nathan Daly <[email protected]> Co-authored-by: Ian Butterworth <[email protected]>
1 parent 7ed5559 commit ebfcece

File tree

4 files changed

+40
-18
lines changed

4 files changed

+40
-18
lines changed

src/gc-heap-snapshot.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ struct HeapSnapshot {
183183
// global heap snapshot, mutated by garbage collector
184184
// when snapshotting is on.
185185
int gc_heap_snapshot_enabled = 0;
186+
int gc_heap_snapshot_redact_data = 0;
186187
HeapSnapshot *g_snapshot = nullptr;
187188
extern jl_mutex_t heapsnapshot_lock;
188189

@@ -195,7 +196,7 @@ void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT;
195196

196197

197198
JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
198-
ios_t *strings, ios_t *json, char all_one)
199+
ios_t *strings, ios_t *json, char all_one, char redact_data)
199200
{
200201
HeapSnapshot snapshot;
201202
snapshot.nodes = nodes;
@@ -207,6 +208,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
207208

208209
// Enable snapshotting
209210
g_snapshot = &snapshot;
211+
gc_heap_snapshot_redact_data = redact_data;
210212
gc_heap_snapshot_enabled = true;
211213

212214
_add_synthetic_root_entries(&snapshot);
@@ -216,6 +218,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
216218

217219
// Disable snapshotting
218220
gc_heap_snapshot_enabled = false;
221+
gc_heap_snapshot_redact_data = 0;
219222
g_snapshot = nullptr;
220223

221224
jl_mutex_unlock(&heapsnapshot_lock);
@@ -328,7 +331,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
328331

329332
if (jl_is_string(a)) {
330333
node_type = "String";
331-
name = jl_string_data(a);
334+
name = gc_heap_snapshot_redact_data ? "<redacted>" : jl_string_data(a);
332335
self_size = jl_string_len(a);
333336
}
334337
else if (jl_is_symbol(a)) {

src/gc-heap-snapshot.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t i
121121
// Functions to call from Julia to take heap snapshot
122122
// ---------------------------------------------------------------------
123123
JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
124-
ios_t *strings, ios_t *json, char all_one);
124+
ios_t *strings, ios_t *json, char all_one, char redact_data);
125125

126126

127127
#ifdef __cplusplus

stdlib/Profile/src/Profile.jl

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,8 +1237,10 @@ end
12371237

12381238

12391239
"""
1240-
Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false)
1241-
Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false)
1240+
Profile.take_heap_snapshot(filepath::String, all_one::Bool=false;
1241+
redact_data::Bool=true, streaming::Bool=false)
1242+
Profile.take_heap_snapshot(all_one::Bool=false; redact_data:Bool=true,
1243+
dir::String=nothing, streaming::Bool=false)
12421244
12431245
Write a snapshot of the heap, in the JSON format expected by the Chrome
12441246
Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
@@ -1249,6 +1251,8 @@ full file path, or IO stream.
12491251
If `all_one` is true, then report the size of every object as one so they can be easily
12501252
counted. Otherwise, report the actual size.
12511253
1254+
If `redact_data` is true (default), then do not emit the contents of any object.
1255+
12521256
If `streaming` is true, we will stream the snapshot data out into four files, using filepath
12531257
as the prefix, to avoid having to hold the entire snapshot in memory. This option should be
12541258
used for any setting where your memory is constrained. These files can then be reassembled
@@ -1264,27 +1268,27 @@ backwards-compatibility) and your process is killed, note that this will always
12641268
parts in the same directory as your provided filepath, so you can still reconstruct the
12651269
snapshot after the fact, via `assemble_snapshot()`.
12661270
"""
1267-
function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false)
1271+
function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; redact_data::Bool=true, streaming::Bool=false)
12681272
if streaming
1269-
_stream_heap_snapshot(filepath, all_one)
1273+
_stream_heap_snapshot(filepath, all_one, redact_data)
12701274
else
12711275
# Support the legacy, non-streaming mode, by first streaming the parts, then
12721276
# reassembling it after we're done.
12731277
prefix = filepath
1274-
_stream_heap_snapshot(prefix, all_one)
1278+
_stream_heap_snapshot(prefix, all_one, redact_data)
12751279
Profile.HeapSnapshot.assemble_snapshot(prefix, filepath)
12761280
end
12771281
return filepath
12781282
end
1279-
function take_heap_snapshot(io::IO, all_one::Bool=false)
1283+
function take_heap_snapshot(io::IO, all_one::Bool=false; redact_data::Bool=true)
12801284
# Support the legacy, non-streaming mode, by first streaming the parts to a tempdir,
12811285
# then reassembling it after we're done.
12821286
dir = tempdir()
12831287
prefix = joinpath(dir, "snapshot")
1284-
_stream_heap_snapshot(prefix, all_one)
1288+
_stream_heap_snapshot(prefix, all_one, redact_data)
12851289
Profile.HeapSnapshot.assemble_snapshot(prefix, io)
12861290
end
1287-
function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
1291+
function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool, redact_data::Bool)
12881292
# Nodes and edges are binary files
12891293
open("$prefix.nodes", "w") do nodes
12901294
open("$prefix.edges", "w") do edges
@@ -1297,9 +1301,9 @@ function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
12971301
Base.@_lock_ios(json,
12981302
ccall(:jl_gc_take_heap_snapshot,
12991303
Cvoid,
1300-
(Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar),
1304+
(Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar, Cchar),
13011305
nodes.handle, edges.handle, strings.handle, json.handle,
1302-
Cchar(all_one))
1306+
Cchar(all_one), Cchar(redact_data))
13031307
)
13041308
)
13051309
)
@@ -1309,7 +1313,7 @@ function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
13091313
end
13101314
end
13111315
end
1312-
function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString}
1316+
function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing, kwargs...) where {S <: AbstractString}
13131317
fname = "$(getpid())_$(time_ns()).heapsnapshot"
13141318
if isnothing(dir)
13151319
wd = pwd()
@@ -1324,7 +1328,7 @@ function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing)
13241328
else
13251329
fpath = joinpath(expanduser(dir), fname)
13261330
end
1327-
return take_heap_snapshot(fpath, all_one)
1331+
return take_heap_snapshot(fpath, all_one; kwargs...)
13281332
end
13291333

13301334
"""

stdlib/Profile/test/runtests.jl

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,16 +280,31 @@ end
280280

281281
@testset "HeapSnapshot" begin
282282
tmpdir = mktempdir()
283+
284+
# ensure that we can prevent redacting data
283285
fname = cd(tmpdir) do
284-
read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; print(Profile.take_heap_snapshot())"`, String)
286+
read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot(; redact_data=false))"`, String)
285287
end
286288

287289
@test isfile(fname)
288290

289-
open(fname) do fs
290-
@test readline(fs) != ""
291+
sshot = read(fname, String)
292+
@test sshot != ""
293+
@test contains(sshot, "redact_this")
294+
295+
rm(fname)
296+
297+
# ensure that string data is redacted by default
298+
fname = cd(tmpdir) do
299+
read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot())"`, String)
291300
end
292301

302+
@test isfile(fname)
303+
304+
sshot = read(fname, String)
305+
@test sshot != ""
306+
@test !contains(sshot, "redact_this")
307+
293308
rm(fname)
294309
rm(tmpdir, force = true, recursive = true)
295310
end

0 commit comments

Comments
 (0)