diff --git a/docs/api/python/image/image.md b/docs/api/python/image/image.md index 0622a30f98a6..299c54570b17 100644 --- a/docs/api/python/image/image.md +++ b/docs/api/python/image/image.md @@ -18,7 +18,9 @@ images provided in image.imread image.imdecode + image.imresize image.scale_down + image.copyMakeBorder image.resize_short image.fixed_crop image.random_crop @@ -166,7 +168,9 @@ and a list of augmenters specific for `Object detection` is provided .. automethod:: mxnet.image.imread .. automethod:: mxnet.image.imdecode +.. automethod:: mxnet.image.imresize .. automethod:: mxnet.image.scale_down +.. automethod:: mxnet.image.copyMakeBorder .. automethod:: mxnet.image.resize_short .. automethod:: mxnet.image.fixed_crop .. automethod:: mxnet.image.random_crop diff --git a/python/mxnet/image/image.py b/python/mxnet/image/image.py index 9c2a1cbfba2a..b452aecdb04b 100644 --- a/python/mxnet/image/image.py +++ b/python/mxnet/image/image.py @@ -38,8 +38,6 @@ from ..base import numeric_types from .. import ndarray as nd from ..ndarray import _internal -from ..ndarray._internal import _cvimresize as imresize -from ..ndarray._internal import _cvcopyMakeBorder as copyMakeBorder from .. import io from .. import recordio @@ -47,7 +45,7 @@ def imread(filename, *args, **kwargs): """Read and decode an image to an NDArray. - Note: `imread` uses OpenCV (not the CV2 Python library). + .. note:: `imread` uses OpenCV (not the CV2 Python library). MXNet must have been built with USE_OPENCV=1 for `imdecode` to work. Parameters @@ -85,10 +83,67 @@ def imread(filename, *args, **kwargs): return _internal._cvimread(filename, *args, **kwargs) +def imresize(src, w, h, *args, **kwargs): + r"""Resize image with OpenCV. + + .. note:: `imresize` uses OpenCV (not the CV2 Python library). MXNet must have been built + with USE_OPENCV=1 for `imresize` to work. + + Parameters + ---------- + src : NDArray + source image + w : int, required + Width of resized image. + h : int, required + Height of resized image. + interp : int, optional, default=1 + Interpolation method (default=cv2.INTER_LINEAR). + Possible values: + 0: Nearest Neighbors Interpolation. + 1: Bilinear interpolation. + 2: Area-based (resampling using pixel area relation). It may be a + preferred method for image decimation, as it gives moire-free + results. But when the image is zoomed, it is similar to the Nearest + Neighbors method. (used by default). + 3: Bicubic interpolation over 4x4 pixel neighborhood. + 4: Lanczos interpolation over 8x8 pixel neighborhood. + 9: Cubic for enlarge, area for shrink, bilinear for others + 10: Random select from interpolation method metioned above. + Note: + When shrinking an image, it will generally look best with AREA-based + interpolation, whereas, when enlarging an image, it will generally look best + with Bicubic (slow) or Bilinear (faster but still looks OK). + More details can be found in the documentation of OpenCV, please refer to + http://docs.opencv.org/master/da/d54/group__imgproc__transform.html. + + out : NDArray, optional + The output NDArray to hold the result. + + Returns + ------- + out : NDArray or list of NDArrays + The output of this function. + + Example + ------- + >>> with open("flower.jpeg", 'rb') as fp: + ... str_image = fp.read() + ... + >>> image = mx.img.imdecode(str_image) + >>> image + + >>> new_image = mx.img.resize(image, 240, 360) + >>> new_image + + """ + return _internal._cvimresize(src, w, h, *args, **kwargs) + + def imdecode(buf, *args, **kwargs): """Decode an image to an NDArray. - Note: `imdecode` uses OpenCV (not the CV2 Python library). + .. note:: `imdecode` uses OpenCV (not the CV2 Python library). MXNet must have been built with USE_OPENCV=1 for `imdecode` to work. Parameters @@ -178,6 +233,59 @@ def scale_down(src_size, size): return int(w), int(h) +def copyMakeBorder(src, top, bot, left, right, *args, **kwargs): + """Pad image border with OpenCV. + + Parameters + ---------- + src : NDArray + source image + top : int, required + Top margin. + bot : int, required + Bottom margin. + left : int, required + Left margin. + right : int, required + Right margin. + type : int, optional, default='0' + Filling type (default=cv2.BORDER_CONSTANT). + 0 - cv2.BORDER_CONSTANT - Adds a constant colored border. + 1 - cv2.BORDER_REFLECT - Border will be mirror reflection of the + border elements, like this : fedcba|abcdefgh|hgfedcb + 2 - cv2.BORDER_REFLECT_101 or cv.BORDER_DEFAULT - Same as above, + but with a slight change, like this : gfedcb|abcdefgh|gfedcba + 3 - cv2.BORDER_REPLICATE - Last element is replicated throughout, + like this: aaaaaa|abcdefgh|hhhhhhh + 4 - cv2.BORDER_WRAP - it will look like this : cdefgh|abcdefgh|abcdefg + value : double, optional, default=0 + (Deprecated! Use ``values`` instead.) Fill with single value. + values : tuple of , optional, default=[] + Fill with value(RGB[A] or gray), up to 4 channels. + + out : NDArray, optional + The output NDArray to hold the result. + + Returns + ------- + out : NDArray or list of NDArrays + The output of this function. + + Example + -------- + >>> with open("flower.jpeg", 'rb') as fp: + ... str_image = fp.read() + ... + >>> image = mx.img.imdecode(str_image) + >>> image + + >>> new_image = mx_border = mx.image.copyMakeBorder(mx_img, 1, 2, 3, 4, type=0) + >>> new_image + + """ + return _internal._cvcopyMakeBorder(src, top, bot, left, right, *args, **kwargs) + + def _get_interp_method(interp, sizes=()): """Get the interpolation method for resize functions. The major purpose of this function is to wrap a random interp method selection @@ -236,7 +344,7 @@ def _get_interp_method(interp, sizes=()): def resize_short(src, size, interp=2): """Resizes shorter edge to size. - Note: `resize_short` uses OpenCV (not the CV2 Python library). + .. note:: `resize_short` uses OpenCV (not the CV2 Python library). MXNet must have been built with OpenCV for `resize_short` to work. Resizes the original image by setting the shorter edge to size diff --git a/tests/python/unittest/test_image.py b/tests/python/unittest/test_image.py index 4063027cc1e5..e0abbd75ef8e 100644 --- a/tests/python/unittest/test_image.py +++ b/tests/python/unittest/test_image.py @@ -141,7 +141,7 @@ def test_imdecode(self): try: import cv2 except ImportError: - return + raise unittest.SkipTest("Unable to import cv2.") for img in TestImage.IMAGES: with open(img, 'rb') as fp: str_image = fp.read() @@ -175,11 +175,12 @@ def test_scale_down(self): assert mx.image.scale_down((360, 1000), (480, 500)) == (360, 375) assert mx.image.scale_down((300, 400), (0, 0)) == (0, 0) + @with_seed() def test_resize_short(self): try: import cv2 except ImportError: - return + raise unittest.SkipTest("Unable to import cv2") for img in TestImage.IMAGES: cv_img = cv2.imread(img) mx_img = mx.nd.array(cv_img[:, :, (2, 1, 0)]) @@ -196,6 +197,25 @@ def test_resize_short(self): mx_resized = mx.image.resize_short(mx_img, new_size, interp) assert_almost_equal(mx_resized.asnumpy()[:, :, (2, 1, 0)], cv_resized, atol=3) + @with_seed() + def test_imresize(self): + try: + import cv2 + except ImportError: + raise unittest.SkipTest("Unable to import cv2") + for img in TestImage.IMAGES: + cv_img = cv2.imread(img) + mx_img = mx.nd.array(cv_img[:, :, (2, 1, 0)]) + new_h = np.random.randint(1, 1000) + new_w = np.random.randint(1, 1000) + for interp_val in range(0, 2): + cv_resized = cv2.resize(cv_img, (new_w, new_h), interpolation=interp_val) + mx_resized = mx.image.imresize(mx_img, new_w, new_h, interp=interp_val) + assert_almost_equal(mx_resized.asnumpy()[:, :, (2, 1, 0)], cv_resized, atol=3) + out_img = mx.nd.zeros((new_h, new_w, 3), dtype=mx_img.dtype) + mx.image.imresize(mx_img, new_w, new_h, interp=interp_val, out=out_img) + assert_almost_equal(out_img.asnumpy()[:, :, (2, 1, 0)], cv_resized, atol=3) + def test_color_normalize(self): for _ in range(10): mean = np.random.rand(3) * 255 @@ -235,6 +255,31 @@ def test_imageiter(self): ] _test_imageiter_last_batch(imageiter_list, (2, 3, 224, 224)) + @with_seed() + def test_copyMakeBorder(self): + try: + import cv2 + except ImportError: + raise unittest.SkipTest("Unable to import cv2") + for img in TestImage.IMAGES: + cv_img = cv2.imread(img) + mx_img = mx.nd.array(cv_img) + top = np.random.randint(1, 10) + bot = np.random.randint(1, 10) + left = np.random.randint(1, 10) + right = np.random.randint(1, 10) + new_h, new_w, _ = mx_img.shape + new_h += top + bot + new_w += left + right + val = [np.random.randint(1, 255)] * 3 + for type_val in range(0, 5): + cv_border = cv2.copyMakeBorder(cv_img, top, bot, left, right, borderType=type_val, value=val) + mx_border = mx.image.copyMakeBorder(mx_img, top, bot, left, right, type=type_val, values=val) + assert_almost_equal(mx_border.asnumpy(), cv_border) + out_img = mx.nd.zeros((new_h , new_w, 3), dtype=mx_img.dtype) + mx.image.copyMakeBorder(mx_img, top, bot, left, right, type=type_val, values=val, out=out_img) + assert_almost_equal(out_img.asnumpy(), cv_border) + @with_seed() def test_augmenters(self): # ColorNormalizeAug