Skip to content

Commit

Permalink
Merge pull request #1795 from girder/vips-cast-convert
Browse files Browse the repository at this point in the history
Be more specific in casting when converting images via vips
  • Loading branch information
manthey authored Jan 30, 2025
2 parents 6bba04d + 7e92d49 commit ae9dc53
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Improvements

- Improve how we use vips to read lower tile levels ([#1794](../../pull/1794))
- Be more specific in casting when converting images via vips ([#1795](../../pull/1795))

## 1.31.0

Expand Down
13 changes: 9 additions & 4 deletions large_image/tilesource/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,8 @@ def _letterboxImage(image: PIL.Image.Image, width: int, height: int, fill: str)
return result


def _vipsCast(image: Any, mustBe8Bit: bool = False) -> Any:
def _vipsCast(image: Any, mustBe8Bit: bool = False,
preferredCast: Optional[Tuple[Any, float, float]] = None) -> Any:
"""
Cast a vips image to a format we want.
Expand All @@ -323,7 +324,7 @@ def _vipsCast(image: Any, mustBe8Bit: bool = False) -> Any:
import pyvips

image = cast(pyvips.Image, image)
formats = {
formats: Dict[pyvips.BandFormat, Tuple[pyvips.BandFormat, float, float]] = {
pyvips.BandFormat.CHAR: (pyvips.BandFormat.UCHAR, 2**7, 1),
pyvips.BandFormat.COMPLEX: (pyvips.BandFormat.USHORT, 0, 65535),
pyvips.BandFormat.DOUBLE: (pyvips.BandFormat.USHORT, 0, 65535),
Expand All @@ -334,9 +335,13 @@ def _vipsCast(image: Any, mustBe8Bit: bool = False) -> Any:
pyvips.BandFormat.SHORT: (pyvips.BandFormat.USHORT, 2**15, 1),
pyvips.BandFormat.UINT: (pyvips.BandFormat.USHORT, 0, 2**-16),
}
if image.format not in formats or (image.format == pyvips.BandFormat.USHORT and not mustBe8Bit):
if (image.format not in formats and preferredCast is None) or (
image.format == pyvips.BandFormat.USHORT and not mustBe8Bit):
return image
target, offset, multiplier = formats[image.format]
if preferredCast is not None:
target, offset, multiplier = preferredCast
else:
target, offset, multiplier = formats[image.format]
if image.format in {pyvips.BandFormat.DOUBLE, pyvips.BandFormat.FLOAT}:
maxVal = image.max()
# These thresholds are higher than 256 and 65536 because bicubic and
Expand Down
4 changes: 2 additions & 2 deletions test/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
# RGB JPEG compression
# Source: TCGA-AA-A02O-11A-01-BS1.8b76f05c-4a8b-44ba-b581-6b8b4f437367.svs
'TCGA-AA-A02O-11A-01-BS1.8b76f05c-4a8b-44ba-b581-6b8b4f437367.svs': 'sha512:1b75a4ec911017aef5c885760a3c6575dacf5f8efb59fb0e011108dce85b1f4e97b8d358f3363c1f5ea6f1c3698f037554aec1620bbdd4cac54e3d5c9c1da1fd', # noqa
# Tiff with floating point pixels
# Tiff with floating point pixels (icc profile stripped)
# Source: d042-353.crop.small.float32.tif
'd042-353.crop.small.float32.tif': 'sha512:ae05dbe6f3330c912893b203b55db27b0fdf3222a0e7f626d372c09668334494d07dc1d35533670cfac51b588d2292eeee7431317741fdb4cbb281c28a289115', # noqa
'd042-353.crop.small.float32.tif': 'sha512:8b640e9adcd0b8aba794666027b80215964d075e76ca2ebebefc7e17c3cd79af7da40a40151e2a2ba0ae48969e54275cf69a3cfc1a2a6b87fbb0d186013e5489', # noqa
# JPEG with progressive compression and restart markers
# Source: d042-353.crop.small.jpg
'd042-353.crop.small.jpg': 'sha512:1353646637c1fae266b87312698aa39eca0311222c3a1399b60efdc13bfe55e2f3db59da005da945dd7e9e816f31ccd18846dd72744faac75215074c3d87414f', # noqa
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,7 @@ filterwarnings =
ignore:::celery.backends.amqp
ignore:Creating a LegacyVersion.*:DeprecationWarning
ignore:setName\(\) is deprecated, set the name attribute instead:DeprecationWarning
asyncio_default_fixture_loop_scope = function

[coverage:paths]
# As of pytest-cov 2.6, all but the first source line is relative to the first
Expand Down
78 changes: 71 additions & 7 deletions utilities/converter/large_image_converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def _generate_tiff(inputPath, outputPath, tempPath, lidata, **kwargs):


def _convert_via_vips(inputPathOrBuffer, outputPath, tempPath, forTiled=True,
status=None, **kwargs):
status=None, preferredVipsCast=None, **kwargs):
"""
Convert a file, buffer, or vips image to a tiff file. This is equivalent
to a vips command line of
Expand All @@ -263,6 +263,7 @@ def _convert_via_vips(inputPathOrBuffer, outputPath, tempPath, forTiled=True,
also stores files in TMPDIR
:param forTiled: True if the output should be tiled, false if not.
:param status: an optional additional string to add to log messages.
:param preferredVipsCast: vips scaling parameters to use in a cast.
:param kwargs: addition arguments that get passed to _vipsParameters
and _convert_to_jp2k.
"""
Expand Down Expand Up @@ -294,7 +295,8 @@ def _convert_via_vips(inputPathOrBuffer, outputPath, tempPath, forTiled=True,
image = _vipsCast(
image,
convertParams['compression'] in {'webp', 'jpeg'} or
kwargs.get('compression') in {'jp2k'})
kwargs.get('compression') in {'jp2k'},
preferredVipsCast)
# TODO: revisit the TMPDIR override; this is not thread safe
# oldtmpdir = os.environ.get('TMPDIR')
# os.environ['TMPDIR'] = os.path.dirname(tempPath)
Expand Down Expand Up @@ -535,8 +537,9 @@ def _convert_large_image_tile(tilelock, strips, tile):
strips[ty] = strips[ty].insert(vimg, x, 0, expand=True)


def _convert_large_image_frame(frame, numFrames, ts, frameOutputPath, tempPath,
parentConcurrency=None, **kwargs):
def _convert_large_image_frame(
frame, numFrames, ts, frameOutputPath, tempPath, preferredVipsCast=None,
parentConcurrency=None, **kwargs):
"""
Convert a single frame from a large_image source. This parallelizes tile
reads. Once all tiles are converted to a composited vips image, a tiff
Expand All @@ -547,6 +550,7 @@ def _convert_large_image_frame(frame, numFrames, ts, frameOutputPath, tempPath,
:param ts: the open tile source.
:param frameOutputPath: the destination name for the tiff file.
:param tempPath: a temporary file in a temporary directory.
:param preferredVipsCast: vips scaling parameters to use in a cast.
:param parentConcurrency: amount of concurrency used by parent task.
"""
# The iterator tile size is a balance between memory use and fewer calls
Expand All @@ -573,7 +577,64 @@ def _convert_large_image_frame(frame, numFrames, ts, frameOutputPath, tempPath,
for stripidx in range(1, len(strips)):
img = img.insert(strips[stripidx], 0, stripidx * _iterTileSize, expand=True)
_convert_via_vips(
img, frameOutputPath, tempPath, status='%d/%d' % (frame + 1, numFrames), **kwargs)
img, frameOutputPath, tempPath, status='%d/%d' % (frame + 1, numFrames),
preferredVipsCast=preferredVipsCast, **kwargs)


def _output_type(lidata): # noqa
"""
Determine how to cast and scale vips data based on actual image contents.
"""
try:
intype = np.dtype(lidata['tilesource'].dtype)
except Exception:
return None
if intype == np.uint8 or intype == np.uint16:
return None
logger.debug('Checking data range')
minval = maxval = None
for frame in range(len(lidata['metadata'].get('frames', [0]))):
h = lidata['tilesource'].histogram(
onlyMinMax=True, output=dict(maxWidth=2048, maxHeight=2048),
resample=0, frame=frame)
if 'max' not in h:
continue
if maxval is None:
maxval = max(h['max'].tolist())
minval = min(h['min'].tolist())
else:
maxval = max(maxval, max(h['max'].tolist()))
minval = min(minval, min(h['min'].tolist()))
lidata['range'] = (minval, maxval)
logger.debug('Data range is [%r, %r]', minval, maxval)
if minval >= 0 and intype == np.int8:
return (pyvips.BandFormat.UCHAR, 0, 1)
if minval >= 0 and intype == np.int16:
return (pyvips.BandFormat.USHORT, 0, 1)
if minval >= 0 and maxval == 0:
return (pyvips.BandFormat.UCHAR, 0, 1)
if minval >= 0 and maxval <= 2 ** -8:
return (pyvips.BandFormat.USHORT, 0,
2 ** -(math.ceil(math.log2(maxval)) - 16) - 2 ** -math.ceil(math.log2(maxval)))
if minval >= 0 and maxval <= 1:
return (pyvips.BandFormat.USHORT, 0, 65535)
if minval >= 0 and maxval < 256:
return (pyvips.BandFormat.UCHAR, 0, 1)
if minval >= 0 and maxval < 65536:
return (pyvips.BandFormat.USHORT, 0, 1)
if minval >= 0:
return (pyvips.BandFormat.USHORT, 0,
2 ** -(math.ceil(math.log2(maxval)) - 16) - 2 ** -math.ceil(math.log2(maxval)))
if minval >= -2 ** -8 and maxval <= 2 ** -8:
return (pyvips.BandFormat.USHORT, 1,
2 ** -(math.ceil(math.log2(maxval)) - 15) - 2 ** -math.ceil(math.log2(maxval)))
if minval >= -1 and maxval <= 1:
return (pyvips.BandFormat.USHORT, 1, 32767)
if minval >= -32768 and maxval < 32768:
return (pyvips.BandFormat.USHORT, 32768, 1)
return (pyvips.BandFormat.USHORT, 0,
2 ** -(math.ceil(math.log2(max(-minval, maxval))) - 16) -
2 ** -math.ceil(math.log2(max(-minval, maxval))))


def _convert_large_image(inputPath, outputPath, tempPath, lidata, **kwargs):
Expand All @@ -588,6 +649,7 @@ def _convert_large_image(inputPath, outputPath, tempPath, lidata, **kwargs):
images.
"""
ts = lidata['tilesource']
lidata['_vips_cast'] = _output_type(lidata)
numFrames = len(lidata['metadata'].get('frames', [0]))
outputList = []
tasks = []
Expand All @@ -603,7 +665,7 @@ def _convert_large_image(inputPath, outputPath, tempPath, lidata, **kwargs):
frame + 1, time.strftime('%Y%m%d-%H%M%S'))
_pool_add(tasks, (pool.submit(
_convert_large_image_frame, frame, numFrames, ts, frameOutputPath,
tempPath, pool._max_workers, **kwargs), ))
tempPath, lidata['_vips_cast'], pool._max_workers, **kwargs), ))
outputList.append(frameOutputPath)
_drain_pool(pool, tasks, 'frames')
_output_tiff(outputList, outputPath, tempPath, lidata, **kwargs)
Expand Down Expand Up @@ -971,7 +1033,9 @@ def convert(inputPath, outputPath=None, **kwargs): # noqa: C901
if lidata and (not is_vips(
inputPath, (lidata['metadata']['sizeX'], lidata['metadata']['sizeY'])) or (
len(lidata['metadata'].get('frames', [])) >= 2 and
not _is_multiframe(inputPath))):
not _is_multiframe(inputPath)) or
(np.dtype(lidata['tilesource'].dtype) != np.uint8 and
np.dtype(lidata['tilesource'].dtype) != np.uint16)):
_convert_large_image(inputPath, outputPath, tempPath, lidata, **kwargs)
elif _is_multiframe(inputPath):
_generate_multiframe_tiff(inputPath, outputPath, tempPath, lidata, **kwargs)
Expand Down

0 comments on commit ae9dc53

Please sign in to comment.