Always prefer the PDF.js JPEG decoder for very large images, in order to reduce peak memory usage (issue 11694) #11707

Snuffleupagus · 2020-03-18T11:00:04Z

When JPEG images are decoded by the browser, on the main-thread, there's a handful of short-lived copies of the image data; see

pdf.js/src/display/api.js

Lines 2364 to 2408 in c3f4690

    
             return new Promise(function(resolve, reject) { 
        
               const img = new Image(); 
        
               img.onload = function() { 
        
                 const { width, height } = img; 
        
                 const size = width * height; 
        
                 const rgbaLength = size * 4; 
        
                 const buf = new Uint8ClampedArray(size * components); 
        
                 let tmpCanvas = document.createElement("canvas"); 
        
                 tmpCanvas.width = width; 
        
                 tmpCanvas.height = height; 
        
                 let tmpCtx = tmpCanvas.getContext("2d"); 
        
                 tmpCtx.drawImage(img, 0, 0); 
        
                 const data = tmpCtx.getImageData(0, 0, width, height).data; 
        
                 if (components === 3) { 
        
                   for (let i = 0, j = 0; i < rgbaLength; i += 4, j += 3) { 
        
                     buf[j] = data[i]; 
        
                     buf[j + 1] = data[i + 1]; 
        
                     buf[j + 2] = data[i + 2]; 
        
                   } 
        
                 } else if (components === 1) { 
        
                   for (let i = 0, j = 0; i < rgbaLength; i += 4, j++) { 
        
                     buf[j] = data[i]; 
        
                   } 
        
                 } 
        
                 resolve({ data: buf, width, height }); 
        
                 // Immediately release the image data once decoding has finished. 
        
                 releaseImageResources(img); 
        
                 // Zeroing the width and height cause Firefox to release graphics 
        
                 // resources immediately, which can greatly reduce memory consumption. 
        
                 tmpCanvas.width = 0; 
        
                 tmpCanvas.height = 0; 
        
                 tmpCanvas = null; 
        
                 tmpCtx = null; 
        
               }; 
        
               img.onerror = function() { 
        
                 reject(new Error("JpegDecode failed to load image")); 
        
                 // Always remember to release the image data if errors occurred. 
        
                 releaseImageResources(img); 
        
               }; 
        
               img.src = imageUrl; 
        
             }); 
        
           });

That code thus becomes quite problematic for very big JPEG images, since it increases peak memory usage a lot during decoding. In the referenced issue there's a couple of JPEG images whose dimensions are 10006 x 7088 (i.e. ~68 mega-pixels), which causes the peak memory usage to increase by close to 1 GB (i.e. one giga-byte) in my testing.

By letting the PDF.js JPEG decoder, rather than the browser, handle very large images the peak memory usage is considerably reduced and the allocated memory also seem to be reclaimed faster.

Please note: This will lead to movement in some existing eq tests. Refer to #11523 (comment) for an explanation of the different test "failures".

Fixes #11694 (to the extent that doing so is possible, given the size of the JPEG images).

Snuffleupagus · 2020-03-18T11:05:39Z

/botio test

pdfjsbot · 2020-03-18T11:05:41Z

From: Bot.io (Windows)

Received

Command cmd_test from @Snuffleupagus received. Current queue size: 0

Live output at: http://54.215.176.217:8877/74eab0a8f5f79c3/output.txt

pdfjsbot · 2020-03-18T11:05:41Z

From: Bot.io (Linux m4)

Received

Command cmd_test from @Snuffleupagus received. Current queue size: 0

Live output at: http://54.67.70.0:8877/30747c9310fd381/output.txt

pdfjsbot · 2020-03-18T11:25:37Z

From: Bot.io (Linux m4)

Failed

Full output at http://54.67.70.0:8877/30747c9310fd381/output.txt

Total script time: 19.92 mins

Font tests: Passed
Unit tests: Passed
Regression tests: FAILED

Image differences available at: http://54.67.70.0:8877/30747c9310fd381/reftest-analyzer.html#web=eq.log

pdfjsbot · 2020-03-18T11:30:50Z

From: Bot.io (Windows)

Failed

Full output at http://54.215.176.217:8877/74eab0a8f5f79c3/output.txt

Total script time: 25.14 mins

Font tests: Passed
Unit tests: Passed
Regression tests: FAILED

Image differences available at: http://54.215.176.217:8877/74eab0a8f5f79c3/reftest-analyzer.html#web=eq.log

… to reduce peak memory usage (issue 11694) When JPEG images are decoded by the browser, on the main-thread, there's a handful of short-lived copies of the image data; see https://github.com/mozilla/pdf.js/blob/c3f4690bde8137d80c74203b1ad91476fc2ca160/src/display/api.js#L2364-L2408 That code thus becomes quite problematic for very big JPEG images, since it increases peak memory usage a lot during decoding. In the referenced issue there's a couple of JPEG images whose dimensions are `10006 x 7088` (i.e. ~68 mega-pixels), which causes the *peak* memory usage to increase by close to `1 GB` (i.e. one giga-byte) in my testing. By letting the PDF.js JPEG decoder, rather than the browser, handle very large images the *peak* memory usage is considerably reduced and the allocated memory also seem to be reclaimed faster. *Please note:* This will lead to movement in some existing `eq` tests.

timvandermeij · 2020-03-24T22:38:02Z

/botio-linux preview

pdfjsbot · 2020-03-24T22:38:04Z

From: Bot.io (Linux m4)

Received

Command cmd_preview from @timvandermeij received. Current queue size: 0

Live output at: http://54.67.70.0:8877/57505d60df0d083/output.txt

pdfjsbot · 2020-03-24T22:40:31Z

From: Bot.io (Linux m4)

Success

Full output at http://54.67.70.0:8877/57505d60df0d083/output.txt

Total script time: 2.44 mins

Published

Viewer: http://54.67.70.0:8877/57505d60df0d083/web/viewer.html
Viewer (ES5): http://54.67.70.0:8877/57505d60df0d083/es5/web/viewer.html

timvandermeij · 2020-03-24T22:51:54Z

I can confirm that the tab doesn't crash anymore. In general I also think that this is a better approach for large images. Thanks!

Snuffleupagus force-pushed the issue-11694 branch from a923379 to d647cc0 Compare March 18, 2020 11:02

Snuffleupagus changed the title ~~Always prefer the PDF.js JPEG decoder for very large images, in order to reduced peak memory usage (issue 11694)~~ Always prefer the PDF.js JPEG decoder for very large images, in order to reduce peak memory usage (issue 11694) Mar 18, 2020

Snuffleupagus force-pushed the issue-11694 branch from d647cc0 to 74d3666 Compare March 18, 2020 12:17

timvandermeij added performance image-jpeg labels Mar 18, 2020

Snuffleupagus force-pushed the issue-11694 branch from 74d3666 to 62a9c26 Compare March 20, 2020 15:37

timvandermeij approved these changes Mar 24, 2020

View reviewed changes

timvandermeij merged commit 292b77f into mozilla:master Mar 24, 2020

Snuffleupagus deleted the issue-11694 branch March 24, 2020 22:58

Snuffleupagus mentioned this pull request Apr 17, 2021

Blurry graph in pdf #9648

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Always prefer the PDF.js JPEG decoder for very large images, in order to reduce peak memory usage (issue 11694) #11707

Always prefer the PDF.js JPEG decoder for very large images, in order to reduce peak memory usage (issue 11694) #11707

Snuffleupagus commented Mar 18, 2020 •

edited

Loading

Snuffleupagus commented Mar 18, 2020

pdfjsbot commented Mar 18, 2020

pdfjsbot commented Mar 18, 2020

pdfjsbot commented Mar 18, 2020

pdfjsbot commented Mar 18, 2020

timvandermeij commented Mar 24, 2020

pdfjsbot commented Mar 24, 2020

pdfjsbot commented Mar 24, 2020

timvandermeij commented Mar 24, 2020

	return new Promise(function(resolve, reject) {
	const img = new Image();
	img.onload = function() {
	const { width, height } = img;
	const size = width * height;
	const rgbaLength = size * 4;
	const buf = new Uint8ClampedArray(size * components);
	let tmpCanvas = document.createElement("canvas");
	tmpCanvas.width = width;
	tmpCanvas.height = height;
	let tmpCtx = tmpCanvas.getContext("2d");
	tmpCtx.drawImage(img, 0, 0);
	const data = tmpCtx.getImageData(0, 0, width, height).data;

	if (components === 3) {
	for (let i = 0, j = 0; i < rgbaLength; i += 4, j += 3) {
	buf[j] = data[i];
	buf[j + 1] = data[i + 1];
	buf[j + 2] = data[i + 2];
	}
	} else if (components === 1) {
	for (let i = 0, j = 0; i < rgbaLength; i += 4, j++) {
	buf[j] = data[i];
	}
	}
	resolve({ data: buf, width, height });

	// Immediately release the image data once decoding has finished.
	releaseImageResources(img);
	// Zeroing the width and height cause Firefox to release graphics
	// resources immediately, which can greatly reduce memory consumption.
	tmpCanvas.width = 0;
	tmpCanvas.height = 0;
	tmpCanvas = null;
	tmpCtx = null;
	};
	img.onerror = function() {
	reject(new Error("JpegDecode failed to load image"));

	// Always remember to release the image data if errors occurred.
	releaseImageResources(img);
	};
	img.src = imageUrl;
	});
	});

Always prefer the PDF.js JPEG decoder for very large images, in order to reduce peak memory usage (issue 11694) #11707

Always prefer the PDF.js JPEG decoder for very large images, in order to reduce peak memory usage (issue 11694) #11707

Conversation

Snuffleupagus commented Mar 18, 2020 • edited Loading

Snuffleupagus commented Mar 18, 2020

pdfjsbot commented Mar 18, 2020

From: Bot.io (Windows)

Received

pdfjsbot commented Mar 18, 2020

From: Bot.io (Linux m4)

Received

pdfjsbot commented Mar 18, 2020

From: Bot.io (Linux m4)

Failed

pdfjsbot commented Mar 18, 2020

From: Bot.io (Windows)

Failed

timvandermeij commented Mar 24, 2020

pdfjsbot commented Mar 24, 2020

From: Bot.io (Linux m4)

Received

pdfjsbot commented Mar 24, 2020

From: Bot.io (Linux m4)

Success

Published

timvandermeij commented Mar 24, 2020

Snuffleupagus commented Mar 18, 2020 •

edited

Loading