From 643b1e31de79cf876def6ad4cb3b5dd0211b65c8 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 7 Jun 2017 14:09:22 -0400 Subject: [PATCH 01/14] clean up and export crc32c function --- NEWS.md | 2 ++ base/exports.jl | 1 + base/util.jl | 30 +++++++++++++++++++++++------- doc/src/stdlib/arrays.md | 1 + test/misc.jl | 14 ++++++++++++-- 5 files changed, 39 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index aba5151988789..e1119464b0172 100644 --- a/NEWS.md +++ b/NEWS.md @@ -56,6 +56,8 @@ Library improvements * `resize!` and `sizehint!` methods no longer over-reserve memory when the requested array size is more than double of its current size ([#22038]). + * The `crc32c` function for CRC-32c checksums is now exported. + * The output of `versioninfo()` is now controlled with keyword arguments ([#21974]). Compiler/Runtime improvements diff --git a/base/exports.jl b/base/exports.jl index d88133ca324d7..0aeba4d7b2eb0 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -1047,6 +1047,7 @@ export atexit, atreplinit, clipboard, + crc32c, exit, ntuple, quit, diff --git a/base/util.jl b/base/util.jl index a909b8355cf7a..91428e3f838f1 100644 --- a/base/util.jl +++ b/base/util.jl @@ -765,22 +765,38 @@ if is_windows() end -# compute sizeof correctly for strings, arrays, and subarrays of bytes -_sizeof(a) = sizeof(a) -_sizeof(a::FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N) = length(a) - """ crc32c(data, crc::UInt32=0x00000000) Compute the CRC-32c checksum of the given `data`, which can be -an `Array{UInt8}`, a contiguous subarray thereof, or a `String`. Optionally, you can pass +an `Array{UInt8}`, a contiguous subarray thereof, an `IOBuffer`, or +a filename (whose contents will be checksummed). Optionally, you can pass a starting `crc` integer to be mixed in with the checksum. The `crc` parameter can be used to compute a checksum on data divided into chunks: performing `crc32c(data2, crc32c(data1))` is equivalent to the checksum of `[data1; data2]`. (Technically, a little-endian checksum is computed.) + +To checksum `s::String`, you can do `crc32c(Vector{UInt8}(s))`; note +that the result is specific to the UTF-8 encoding of `String`. To checksum +an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`; +note that the result is endian-dependent. """ -crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N,String}, crc::UInt32=0x00000000) = - ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, _sizeof(a)) +function crc32c end + +crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) = + ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, length(a)) + +crc32c(buf::IOBuffer, crc::UInt32=0x00000000) = crc32c(buf.data, crc) + +function crc32c(filename::AbstractString, crc::UInt32=0x00000000) + open(filename, "r") do f + data = Mmap.mmap(f, Vector{UInt8}, filesize(f), 0) + checksum = crc32c(data, crc) + finalize(data) + checksum + end +end + """ @kwdef typedef diff --git a/doc/src/stdlib/arrays.md b/doc/src/stdlib/arrays.md index 2fc43f52bee53..9221735386c58 100644 --- a/doc/src/stdlib/arrays.md +++ b/doc/src/stdlib/arrays.md @@ -131,6 +131,7 @@ Base.cumprod! Base.cumsum Base.cumsum! Base.cumsum_kbn +Base.crc32c Base.LinAlg.diff Base.LinAlg.gradient Base.rot180 diff --git a/test/misc.jl b/test/misc.jl index f6cc13c538f78..ac7c28a1ca02a 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -571,9 +571,19 @@ for force_software_crc in (1,0) # test that crc parameter is equivalent to checksum of concatenated data, # and test crc of subarrays: a = UInt8[1:255;] - crc_256 = Base.crc32c(UInt8[1:255;]) + crc_256 = crc32c(a) @views for n = 1:255 - @test Base.crc32c(a[n+1:end], Base.crc32c(a[1:n])) == crc_256 + @test crc32c(a[n+1:end], Base.crc32c(a[1:n])) == crc_256 + end + + @test crc32c(IOBuffer(a)) == crc_256 + let f = tempname() + try + write(f, a) + @test crc32c(f) == crc_256 + finally + rm(f, force=true) + end end end From 721bbf530b80231e18aabecac3f4852f6a5a52e2 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 7 Jun 2017 14:21:00 -0400 Subject: [PATCH 02/14] added PR to NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index e1119464b0172..b1a9d4e5e26f5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -56,7 +56,7 @@ Library improvements * `resize!` and `sizehint!` methods no longer over-reserve memory when the requested array size is more than double of its current size ([#22038]). - * The `crc32c` function for CRC-32c checksums is now exported. + * The `crc32c` function for CRC-32c checksums is now exported ([#22274]). * The output of `versioninfo()` is now controlled with keyword arguments ([#21974]). From 184acd1b0d9d9faa020424da40920180f8aa7e93 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 7 Jun 2017 16:26:08 -0400 Subject: [PATCH 03/14] use read, not mmap, for crc --- base/loading.jl | 14 ++++---------- base/util.jl | 26 +++++++++++++++++++------- doc/src/stdlib/io-network.md | 1 + test/misc.jl | 8 ++++++-- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 1a6b5671342d0..052488b7374e6 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -682,10 +682,8 @@ function compilecache(name::String) end if success(create_expr_cache(path, cachefile, concrete_deps)) # append checksum to the end of the .ji file: - open(cachefile, "a+") do f - data = Mmap.mmap(f, Vector{UInt8}, filesize(f), 0) - checksum = crc32c(data) - finalize(data) + checksum = crc32c(cachefile) + open(cachefile, "a") do f write(f, hton(checksum)) end else @@ -809,12 +807,8 @@ function stale_cachefile(modpath::String, cachefile::String) end # finally, verify that the cache file has a valid checksum - data = Mmap.mmap(io, Vector{UInt8}, filesize(io), 0) - # checksum = UInt32 read in bigendian format from the last 4 bytes: - checksum = UInt32(data[end]) + UInt32(data[end-1])<<8 + UInt32(data[end-2])<<16 + UInt32(data[end-3])<<24 - crc = crc32c(@view(data[1:end-4])) - finalize(data) - if checksum != crc + crc = crc32c(seekstart(io), filesize(io)-4) + if crc != ntoh(read(io, UInt32)) DEBUG_LOADING[] && info("JL_DEBUG_LOADING: Rejecting cache file $cachefile because it has an invalid checksum.") return true end diff --git a/base/util.jl b/base/util.jl index 91428e3f838f1..5797e9aa7d8cf 100644 --- a/base/util.jl +++ b/base/util.jl @@ -769,7 +769,7 @@ end crc32c(data, crc::UInt32=0x00000000) Compute the CRC-32c checksum of the given `data`, which can be -an `Array{UInt8}`, a contiguous subarray thereof, an `IOBuffer`, or +an `Array{UInt8}`, a contiguous subarray thereof, or an `IOBuffer`, or a filename (whose contents will be checksummed). Optionally, you can pass a starting `crc` integer to be mixed in with the checksum. The `crc` parameter can be used to compute a checksum on data divided into chunks: performing @@ -788,15 +788,27 @@ crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} wher crc32c(buf::IOBuffer, crc::UInt32=0x00000000) = crc32c(buf.data, crc) -function crc32c(filename::AbstractString, crc::UInt32=0x00000000) - open(filename, "r") do f - data = Mmap.mmap(f, Vector{UInt8}, filesize(f), 0) - checksum = crc32c(data, crc) - finalize(data) - checksum +""" + crc32c(f::IO, nb::Integer, crc::UInt32=0x00000000) + +Read up to `nb` bytes from `f` and return the CRC-32c checksum, optionally +mixed with a starting `crc` integer. +""" +function crc32c(f::IO, nb::Integer, crc::UInt32=0x00000000) + buf = Array{UInt8}(min(nb, 16384)) + while !eof(f) && nb > 0 + n = readbytes!(f, buf, nb) + crc = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, buf, n) + nb -= n end + return crc end +crc32c(filename::AbstractString, crc::UInt32=0x00000000) = + open(filename, "r") do f + crc32c(f, filesize(f), crc) + end + """ @kwdef typedef diff --git a/doc/src/stdlib/io-network.md b/doc/src/stdlib/io-network.md index 88fd2b2cf3bb0..ad24ca445ce4b 100644 --- a/doc/src/stdlib/io-network.md +++ b/doc/src/stdlib/io-network.md @@ -12,6 +12,7 @@ Base.take!(::Base.AbstractIOBuffer) Base.fdio Base.flush Base.close +Base.crc32(::IO, ::Integer, ::UInt32) Base.write Base.read Base.read! diff --git a/test/misc.jl b/test/misc.jl index ac7c28a1ca02a..63740e8668e40 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -566,14 +566,14 @@ end for force_software_crc in (1,0) ccall(:jl_crc32c_init, Void, (Cint,), force_software_crc) for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)] - @test Base.crc32c(UInt8[1:n;]) == crc + @test crc32c(UInt8[1:n;]) == crc end # test that crc parameter is equivalent to checksum of concatenated data, # and test crc of subarrays: a = UInt8[1:255;] crc_256 = crc32c(a) @views for n = 1:255 - @test crc32c(a[n+1:end], Base.crc32c(a[1:n])) == crc_256 + @test crc32c(a[n+1:end], crc32c(a[1:n])) == crc_256 end @test crc32c(IOBuffer(a)) == crc_256 @@ -581,6 +581,10 @@ for force_software_crc in (1,0) try write(f, a) @test crc32c(f) == crc_256 + open(f, "r") do io + @test crc32c(io, 16) == crc32c(a[1:16]) + @test crc32c(io, 16) == crc32c(a[17:32]) + end finally rm(f, force=true) end From c3f5d91e9874c4b02f4974b3fd6680227597aff0 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 7 Jun 2017 16:41:38 -0400 Subject: [PATCH 04/14] slight optimization (don't open cachefile twice for checksum) --- base/loading.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 052488b7374e6..77bea1d098d20 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -682,9 +682,8 @@ function compilecache(name::String) end if success(create_expr_cache(path, cachefile, concrete_deps)) # append checksum to the end of the .ji file: - checksum = crc32c(cachefile) - open(cachefile, "a") do f - write(f, hton(checksum)) + open(cachefile, "a+") do f + write(f, hton(crc32c(seekstart(f), filesize(f)))) end else error("Failed to precompile $name to $cachefile.") From bd6465154943b42ad139f8faeff1f6735b2a1fd7 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 7 Jun 2017 16:57:34 -0400 Subject: [PATCH 05/14] avoid resizing buffer in readbytes --- base/util.jl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/base/util.jl b/base/util.jl index 5797e9aa7d8cf..7e3762297409b 100644 --- a/base/util.jl +++ b/base/util.jl @@ -783,8 +783,10 @@ note that the result is endian-dependent. """ function crc32c end +unsafe_crc32c(a, n, crc) = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n) + crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) = - ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, length(a)) + unsafe_crc32c(a, length(a), crc) crc32c(buf::IOBuffer, crc::UInt32=0x00000000) = crc32c(buf.data, crc) @@ -796,12 +798,12 @@ mixed with a starting `crc` integer. """ function crc32c(f::IO, nb::Integer, crc::UInt32=0x00000000) buf = Array{UInt8}(min(nb, 16384)) - while !eof(f) && nb > 0 - n = readbytes!(f, buf, nb) - crc = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, buf, n) + while !eof(f) && nb > 16384 + n = readbytes!(f, buf) + crc = unsafe_crc32c(buf, n, crc) nb -= n end - return crc + return unsafe_crc32c(buf, readbytes!(f, buf, nb), crc) end crc32c(filename::AbstractString, crc::UInt32=0x00000000) = From 62d7dc454adb2a43c9c884fc194cbb4b5af585cc Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 7 Jun 2017 17:00:39 -0400 Subject: [PATCH 06/14] add an assertion --- base/util.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/base/util.jl b/base/util.jl index 7e3762297409b..c889610c9d406 100644 --- a/base/util.jl +++ b/base/util.jl @@ -803,6 +803,7 @@ function crc32c(f::IO, nb::Integer, crc::UInt32=0x00000000) crc = unsafe_crc32c(buf, n, crc) nb -= n end + @assert 0 ≤ nb ≤ length(buf) return unsafe_crc32c(buf, readbytes!(f, buf, nb), crc) end From ceda720f86fed0e93d15af89442457ac6ad79b5b Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 7 Jun 2017 17:24:57 -0400 Subject: [PATCH 07/14] more crc32c tests --- test/misc.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/misc.jl b/test/misc.jl index 63740e8668e40..1b3271f6c199e 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -584,7 +584,12 @@ for force_software_crc in (1,0) open(f, "r") do io @test crc32c(io, 16) == crc32c(a[1:16]) @test crc32c(io, 16) == crc32c(a[17:32]) + @test crc32c(io, 1000) == crc32c(a[33:end]) + @test crc32c(io, 1000) == 0x00000000 end + a = rand(UInt8, 30000) + write(f, a) + @test crc32c(f) == crc32c(a) finally rm(f, force=true) end From 67ef143c1acad4f2801d5960fadf6ab9401c64e0 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 8 Jun 2017 11:58:56 -0400 Subject: [PATCH 08/14] docs typo --- doc/src/stdlib/io-network.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/stdlib/io-network.md b/doc/src/stdlib/io-network.md index ad24ca445ce4b..173261912b02e 100644 --- a/doc/src/stdlib/io-network.md +++ b/doc/src/stdlib/io-network.md @@ -12,7 +12,7 @@ Base.take!(::Base.AbstractIOBuffer) Base.fdio Base.flush Base.close -Base.crc32(::IO, ::Integer, ::UInt32) +Base.crc32c(::IO, ::Integer, ::UInt32) Base.write Base.read Base.read! From f34d08c19fd31605b3b6008530d75d2081cfe869 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 8 Jun 2017 13:58:04 -0400 Subject: [PATCH 09/14] crc32c(IOBuffer) bugfix and test --- base/util.jl | 2 +- test/misc.jl | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/base/util.jl b/base/util.jl index c889610c9d406..e522745472340 100644 --- a/base/util.jl +++ b/base/util.jl @@ -788,7 +788,7 @@ unsafe_crc32c(a, n, crc) = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_ crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) = unsafe_crc32c(a, length(a), crc) -crc32c(buf::IOBuffer, crc::UInt32=0x00000000) = crc32c(buf.data, crc) +crc32c(buf::IOBuffer, crc::UInt32=0x00000000) = unsafe_crc32c(buf.data, min(buf.size, length(buf.data)), crc) """ crc32c(f::IO, nb::Integer, crc::UInt32=0x00000000) diff --git a/test/misc.jl b/test/misc.jl index 1b3271f6c199e..6a7ed379c1bfe 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -577,6 +577,11 @@ for force_software_crc in (1,0) end @test crc32c(IOBuffer(a)) == crc_256 + let buf = IOBuffer() + write(buf, a[1:3]) + @test crc32c(buf) == crc32c(a[1:3]) + end + let f = tempname() try write(f, a) From 38f6fc04c0fd932b0cfd8f575b0ef02b810e4368 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 8 Jun 2017 14:56:13 -0400 Subject: [PATCH 10/14] restore crc32 of String, add crc32c(io) to read all of a stream, add optimized open(crc32c, filename), make IOBuffer checksums consistent with other streams --- base/util.jl | 42 ++++++++++++++++++++++++++---------------- test/misc.jl | 14 +++++++++----- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/base/util.jl b/base/util.jl index e522745472340..3fdd78eafb596 100644 --- a/base/util.jl +++ b/base/util.jl @@ -769,17 +769,22 @@ end crc32c(data, crc::UInt32=0x00000000) Compute the CRC-32c checksum of the given `data`, which can be -an `Array{UInt8}`, a contiguous subarray thereof, or an `IOBuffer`, or -a filename (whose contents will be checksummed). Optionally, you can pass +an `Array{UInt8}`, a contiguous subarray thereof, or a `String`. Optionally, you can pass a starting `crc` integer to be mixed in with the checksum. The `crc` parameter can be used to compute a checksum on data divided into chunks: performing `crc32c(data2, crc32c(data1))` is equivalent to the checksum of `[data1; data2]`. (Technically, a little-endian checksum is computed.) -To checksum `s::String`, you can do `crc32c(Vector{UInt8}(s))`; note -that the result is specific to the UTF-8 encoding of `String`. To checksum -an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`; -note that the result is endian-dependent. +There is also a method `crc32c(io, nb, crc)` to checksum `nb` bytes from +a stream `io`, or `crc32c(io, crc)` to checksum all the remaining bytes. +Hence you can do [`open(crc32c, filename)`](@ref) to checksum an entire file, +or `crc32c(seekstart(buf))` to checksum an [`IOBuffer`](@ref) without +calling [`take!`](@ref). + +For a `String`, note that the result is specific to the UTF-8 encoding +(a different checksum would be obtained from a different Unicode encoding). +To checksum an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`, +but note that the result may be endian-dependent. """ function crc32c end @@ -788,28 +793,33 @@ unsafe_crc32c(a, n, crc) = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_ crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) = unsafe_crc32c(a, length(a), crc) -crc32c(buf::IOBuffer, crc::UInt32=0x00000000) = unsafe_crc32c(buf.data, min(buf.size, length(buf.data)), crc) +crc32c(s::String, crc::UInt32=0x00000000) = unsafe_crc32c(s, sizeof(s), crc) """ - crc32c(f::IO, nb::Integer, crc::UInt32=0x00000000) + crc32c(io::IO, [nb::Integer,] crc::UInt32=0x00000000) -Read up to `nb` bytes from `f` and return the CRC-32c checksum, optionally -mixed with a starting `crc` integer. +Read up to `nb` bytes from `io` and return the CRC-32c checksum, optionally +mixed with a starting `crc` integer. If `nb` is not supplied, then +`io` will be read until the end of the stream. """ -function crc32c(f::IO, nb::Integer, crc::UInt32=0x00000000) +function crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) + nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0")) buf = Array{UInt8}(min(nb, 16384)) - while !eof(f) && nb > 16384 - n = readbytes!(f, buf) + while !eof(io) && nb > 16384 + n = readbytes!(io, buf) crc = unsafe_crc32c(buf, n, crc) nb -= n end + eof(io) && return crc @assert 0 ≤ nb ≤ length(buf) - return unsafe_crc32c(buf, readbytes!(f, buf, nb), crc) + return unsafe_crc32c(buf, readbytes!(io, buf, nb), crc) end +crc32c(io::IO, crc::UInt32=0x00000000) = crc32c(io, typemax(Int64), crc) -crc32c(filename::AbstractString, crc::UInt32=0x00000000) = +# optimization for `open(crc, filename)` to use the size of the file +open(::typeof(crc32c), filename::AbstractString) = open(filename, "r") do f - crc32c(f, filesize(f), crc) + crc32c(f, filesize(f)) end diff --git a/test/misc.jl b/test/misc.jl index 6a7ed379c1bfe..4a0b481d56acd 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -566,7 +566,7 @@ end for force_software_crc in (1,0) ccall(:jl_crc32c_init, Void, (Cint,), force_software_crc) for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)] - @test crc32c(UInt8[1:n;]) == crc + @test crc32c(UInt8[1:n;]) == crc == crc32c(String(UInt8[1:n;])) end # test that crc parameter is equivalent to checksum of concatenated data, # and test crc of subarrays: @@ -579,22 +579,26 @@ for force_software_crc in (1,0) @test crc32c(IOBuffer(a)) == crc_256 let buf = IOBuffer() write(buf, a[1:3]) - @test crc32c(buf) == crc32c(a[1:3]) + @test crc32c(seekstart(buf)) == crc32c(a[1:3]) + @test crc32c(buf) == 0x00000000 + @test crc32c(seek(buf, 1)) == crc32c(a[2:3]) + @test crc32c(seek(buf, 0), 2) == crc32c(a[1:2]) + @test crc32c(buf) == crc32c(a[3:3]) end let f = tempname() try write(f, a) - @test crc32c(f) == crc_256 + @test open(crc32c, f) == crc_256 open(f, "r") do io @test crc32c(io, 16) == crc32c(a[1:16]) @test crc32c(io, 16) == crc32c(a[17:32]) - @test crc32c(io, 1000) == crc32c(a[33:end]) + @test crc32c(io) == crc32c(a[33:end]) @test crc32c(io, 1000) == 0x00000000 end a = rand(UInt8, 30000) write(f, a) - @test crc32c(f) == crc32c(a) + @test open(crc32c, f) == crc32c(a) == open(io -> crc32c(io, 10^6), f) finally rm(f, force=true) end From 1126d0ffa6c285a33ce04d8439246c1ba21dce38 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 8 Jun 2017 21:39:58 -0400 Subject: [PATCH 11/14] use crc32c block size of 8192*3, matching the underling C library --- base/util.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/base/util.jl b/base/util.jl index 3fdd78eafb596..9a32b836e5294 100644 --- a/base/util.jl +++ b/base/util.jl @@ -804,8 +804,10 @@ mixed with a starting `crc` integer. If `nb` is not supplied, then """ function crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0")) - buf = Array{UInt8}(min(nb, 16384)) - while !eof(io) && nb > 16384 + # use block size 24576=8192*3, since that is the threshold for + # 3-way parallel SIMD code in the underlying jl_crc32c C function. + buf = Array{UInt8}(min(nb, 24576)) + while !eof(io) && nb > 24576 n = readbytes!(io, buf) crc = unsafe_crc32c(buf, n, crc) nb -= n From da2fba1a61523168578baf46589a45c288da96bb Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 9 Jun 2017 11:21:03 -0400 Subject: [PATCH 12/14] optimized IOBuffer crc32c --- base/iobuffer.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index c9d4226462dac..854e39cbc4f17 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -412,3 +412,15 @@ function readuntil(io::AbstractIOBuffer, delim::UInt8) end A end + +# copy-free crc32c of IOBuffer: +function crc32c(io::IOBuffer, nb::Integer, crc::UInt32=0x00000000) + nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0")) + io.readable || throw(ArgumentError("read failed, IOBuffer is not readable")) + n = min(nb, nb_available(io)) + n == 0 && return crc + crc = unsafe_crc32c(pointer(io.data, io.ptr), n, crc) + io.ptr += n + return crc +end +crc32c(io::IOBuffer, crc::UInt32=0x00000000) = crc32c(io, nb_available(io), crc) From a1b0431b1222dfe023948690c1fd98ddc9ea8aa3 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 12 Jun 2017 09:14:06 -0400 Subject: [PATCH 13/14] slight simplification/generalization --- base/loading.jl | 2 +- base/util.jl | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 77bea1d098d20..1382f4f2a00ed 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -683,7 +683,7 @@ function compilecache(name::String) if success(create_expr_cache(path, cachefile, concrete_deps)) # append checksum to the end of the .ji file: open(cachefile, "a+") do f - write(f, hton(crc32c(seekstart(f), filesize(f)))) + write(f, hton(crc32c(seekstart(f)))) end else error("Failed to precompile $name to $cachefile.") diff --git a/base/util.jl b/base/util.jl index 9a32b836e5294..b8b301243beef 100644 --- a/base/util.jl +++ b/base/util.jl @@ -817,12 +817,7 @@ function crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) return unsafe_crc32c(buf, readbytes!(io, buf, nb), crc) end crc32c(io::IO, crc::UInt32=0x00000000) = crc32c(io, typemax(Int64), crc) - -# optimization for `open(crc, filename)` to use the size of the file -open(::typeof(crc32c), filename::AbstractString) = - open(filename, "r") do f - crc32c(f, filesize(f)) - end +crc32c(io::IOStream, crc::UInt32=0x00000000) = crc32c(io, filesize(io)-position(io), crc) """ From 8e058f3f273c5ac7d1174efb97d88f560d636f6e Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 12 Jun 2017 09:17:23 -0400 Subject: [PATCH 14/14] eliminate theoretical race condition in crc32c if file is being written to while we are reading from it --- base/util.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/base/util.jl b/base/util.jl index b8b301243beef..1617b3d6c5df1 100644 --- a/base/util.jl +++ b/base/util.jl @@ -812,9 +812,7 @@ function crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) crc = unsafe_crc32c(buf, n, crc) nb -= n end - eof(io) && return crc - @assert 0 ≤ nb ≤ length(buf) - return unsafe_crc32c(buf, readbytes!(io, buf, nb), crc) + return unsafe_crc32c(buf, readbytes!(io, buf, min(nb, length(buf))), crc) end crc32c(io::IO, crc::UInt32=0x00000000) = crc32c(io, typemax(Int64), crc) crc32c(io::IOStream, crc::UInt32=0x00000000) = crc32c(io, filesize(io)-position(io), crc)