diff --git a/README.md b/README.md index 59c1552..ff4bc42 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ This implementation currently supports the following Zarr features
Arbitrary meta-data
stored as JSON for both groups and datasets.
Compression
-
currently, only the most relevant compression schemes (Blosc, GZip, Zlib, and BZ2) are supported, we can add others later as necessary.
+
currently, only the most relevant compression schemes (Zstandard, Blosc, GZip, Zlib, and BZ2) are supported, we can add others later as necessary.
Primitive types as little and big endian
so far, I have tested unsigned and signed integers with 1, 2, 4 and 8 bytes, and floats with 4 and 8 bytes. The behavior for other types is untested because I did not have meaningful examples. Complex numbers should be mapped into the best matching primitive real type. Other numpy data types such as strings, timedeltas, objects, dates, or others should come out as uncompressed bytes.
diff --git a/pom.xml b/pom.xml index 0f01568..94eec6a 100644 --- a/pom.xml +++ b/pom.xml @@ -154,6 +154,11 @@ org.janelia.saalfeldlab n5-blosc + + org.janelia + n5-zstandard + 1.0.2 + diff --git a/src/main/java/org/janelia/saalfeldlab/n5/zarr/ZarrCompressor.java b/src/main/java/org/janelia/saalfeldlab/n5/zarr/ZarrCompressor.java index 078d484..2ee83e2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/zarr/ZarrCompressor.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/zarr/ZarrCompressor.java @@ -40,6 +40,7 @@ import org.janelia.saalfeldlab.n5.GzipCompression; import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.blosc.BloscCompression; +import org.janelia.scicomp.n5.zstandard.ZstandardCompression; import com.google.gson.JsonDeserializationContext; import com.google.gson.JsonDeserializer; @@ -55,6 +56,7 @@ public interface ZarrCompressor { /* idiotic stream based initialization because Java cannot have static initialization code in interfaces */ public static Map> registry = Stream.of( + new SimpleImmutableEntry<>("zstd", Zstandard.class), new SimpleImmutableEntry<>("blosc", Blosc.class), new SimpleImmutableEntry<>("zlib", Zlib.class), new SimpleImmutableEntry<>("gzip", Gzip.class), @@ -75,6 +77,8 @@ public static ZarrCompressor fromCompression(final Compression compression) { return useZlib != null && useZlib ? new Zlib((GzipCompression)compression) : new Gzip((GzipCompression)compression); } else if (compression instanceof Bzip2Compression) { return new Bz2((Bzip2Compression)compression); + } else if (compression instanceof ZstandardCompression) { + return new Zstandard((ZstandardCompression)compression); } else { return new Raw(); } @@ -85,8 +89,40 @@ public static ZarrCompressor fromCompression(final Compression compression) { public Compression getCompression(); + public static class Zstandard implements ZarrCompressor { + + @SuppressWarnings("unused") + private final String id = "zstd"; + private final int level; + private final transient int nbWorkers; + + public Zstandard(int level) { + this(level, 0); + } + + public Zstandard(int level, int nbWorkers) { + this.level = level; + this.nbWorkers = nbWorkers; + } + + public Zstandard(ZstandardCompression compression) { + this.level = compression.getLevel(); + this.nbWorkers = compression.getNbWorkers(); + } + + @Override + public Compression getCompression() { + ZstandardCompression compression = new ZstandardCompression(level); + if(this.nbWorkers != 0) + compression.setNbWorkers(this.nbWorkers); + return compression; + } + + } + public static class Blosc implements ZarrCompressor { + @SuppressWarnings("unused") private final String id = "blosc"; private final String cname; private final int clevel; @@ -147,6 +183,7 @@ public BloscCompression getCompression() { public static class Zlib implements ZarrCompressor { + @SuppressWarnings("unused") private final String id = "zlib"; private final int level; @@ -174,6 +211,7 @@ public GzipCompression getCompression() { public static class Gzip implements ZarrCompressor { + @SuppressWarnings("unused") private final String id = "gzip"; private final int level; @@ -201,6 +239,7 @@ public GzipCompression getCompression() { public static class Bz2 implements ZarrCompressor { + @SuppressWarnings("unused") private final String id = "bz2"; private final int level; diff --git a/src/test/java/org/janelia/saalfeldlab/n5/zarr/N5ZarrTest.java b/src/test/java/org/janelia/saalfeldlab/n5/zarr/N5ZarrTest.java index 07bb898..4a4bdf8 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/zarr/N5ZarrTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/zarr/N5ZarrTest.java @@ -66,6 +66,7 @@ import org.janelia.saalfeldlab.n5.StringDataBlock; import org.janelia.saalfeldlab.n5.blosc.BloscCompression; import org.janelia.saalfeldlab.n5.imglib2.N5Utils; +import org.janelia.scicomp.n5.zstandard.ZstandardCompression; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; @@ -169,6 +170,10 @@ protected Compression[] getCompressions() { new GzipCompression(5, true), new BloscCompression(), new BloscCompression("lz4", 6, BloscCompression.BITSHUFFLE, 0, 4), + new ZstandardCompression(), + new ZstandardCompression(0), + new ZstandardCompression(-1), + //add new compressions here new RawCompression() }; } diff --git a/src/test/python/zarr-nested-test.py b/src/test/python/zarr-nested-test.py index db2188d..d51abf9 100644 --- a/src/test/python/zarr-nested-test.py +++ b/src/test/python/zarr-nested-test.py @@ -30,7 +30,7 @@ from pathlib import Path import numpy as np import zarr -from numcodecs import Zlib, GZip, BZ2 +from numcodecs import Zlib, GZip, BZ2, Zstd import sys import os diff --git a/src/test/python/zarr-test.py b/src/test/python/zarr-test.py index f1449ce..a647b6a 100644 --- a/src/test/python/zarr-test.py +++ b/src/test/python/zarr-test.py @@ -30,7 +30,7 @@ from pathlib import Path import numpy as np import zarr -from numcodecs import Zlib, GZip, BZ2 +from numcodecs import Zlib, GZip, BZ2, Zstd import sys import os @@ -194,6 +194,13 @@ data=array_30x20_c, chunks=(7, 13), overwrite=True) +group.array( + name='30x20_c_u8_zstd', + dtype='>u8', + compressor=Zstd(level=1), + data=array_30x20_c, + chunks=(7, 13), + overwrite=True) group.array( name='3x2_c_u4_f1', @@ -244,4 +251,10 @@ data=array_3x2_str_c, chunks=(2, 2), overwrite=True) - +group.array( + name='3x2_c_str_zstd', + dtype=str, + compressor=Zstd(level=1), + data=array_3x2_str_c, + chunks=(2, 2), + overwrite=True)