Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Memory initializer in string literal #3326

Merged
merged 5 commits into from
Jun 16, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 38 additions & 5 deletions emcc
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,10 @@ try:
# Emscripten
logging.debug('LLVM => JS')
extra_args = [] if not js_libraries else ['--libraries', ','.join(map(os.path.abspath, js_libraries))]
if memory_init_file:
shared.Settings.MEM_INIT_METHOD = 1
else:
assert shared.Settings.MEM_INIT_METHOD != 1
final = shared.Building.emscripten(final, append_ext=False, extra_args=extra_args)
if DEBUG: save_intermediate('original')

Expand Down Expand Up @@ -1340,18 +1344,47 @@ try:

js_transform_tempfiles = [final]

if memory_init_file:
if shared.Settings.MEM_INIT_METHOD > 0:
memfile = target + '.mem'
shared.try_delete(memfile)
def repl(m):
# handle chunking of the memory initializer
s = m.groups(0)[0]
if len(s) == 0 and not shared.Settings.EMTERPRETIFY: return m.group(0) # emterpreter must have a mem init file; otherwise, don't emit 0-size ones
open(memfile, 'wb').write(''.join(map(lambda x: chr(int(x or '0')), s.split(','))))
s = m.group(1)
if len(s) == 0: return '' # don't emit 0-size ones
membytes = [int(x or '0') for x in s.split(',')]
while membytes and membytes[-1] == 0:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a new optimization?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, stripping trailing zeros is new. At least in this context; don't know whether the split_initializer code affects trailing zeros as well.

membytes.pop()
if not membytes: return ''
if not memory_init_file:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this section needs a comment

# memory initializer in a string literal
s = list(membytes)
if shared.Settings.ASSERTIONS:
# append checksum of length and content
crcTable = []
for i in range(256):
crc = i
for bit in range(8):
crc = (crc >> 1) ^ ((crc & 1) * 0xedb88320)
crcTable.append(crc)
crc = 0xffffffff
n = len(s)
crc = crcTable[(crc ^ n) & 0xff] ^ (crc >> 8)
crc = crcTable[(crc ^ (n >> 8)) & 0xff] ^ (crc >> 8)
for i in s:
crc = crcTable[(crc ^ i) & 0xff] ^ (crc >> 8)
for i in range(4):
s.append((crc >> (8 * i)) & 0xff)
s = ''.join(map(chr, s))
s = s.replace('\\', '\\\\').replace("'", "\\'")
s = s.replace('\n', '\\n').replace('\r', '\\r')
def escape(x): return '\\x{:02x}'.format(ord(x.group()))
s = re.sub('[\x80-\xff]', escape, s)
return "memoryInitializer = '%s';" % s
open(memfile, 'wb').write(''.join(map(chr, membytes)))
if DEBUG:
# Copy into temp dir as well, so can be run there too
shared.safe_copy(memfile, os.path.join(shared.get_emscripten_temp_dir(), os.path.basename(memfile)))
return 'var memoryInitializer = "%s";' % os.path.basename(memfile)
return 'memoryInitializer = "%s";' % os.path.basename(memfile)
src = re.sub(shared.JS.memory_initializer_pattern, repl, open(final).read(), count=1)
open(final + '.mem.js', 'w').write(src)
final += '.mem.js'
Expand Down
31 changes: 31 additions & 0 deletions src/postamble.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,35 @@

// === Auto-generated postamble setup entry stuff ===

#if MEM_INIT_METHOD == 2
#if USE_PTHREADS
if (memoryInitializer && !ENVIRONMENT_IS_PTHREAD) (function(s) {
#else
if (memoryInitializer) (function(s) {
#endif
var i, n = s.length;
#if ASSERTIONS
n -= 4;
var crc, bit, table = new Int32Array(256);
for (i = 0; i < 256; ++i) {
for (crc = i, bit = 0; bit < 8; ++bit)
crc = (crc >>> 1) ^ ((crc & 1) * 0xedb88320);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curly braces

table[i] = crc >>> 0;
}
crc = -1;
crc = table[(crc ^ n) & 0xff] ^ (crc >>> 8);
crc = table[(crc ^ (n >>> 8)) & 0xff] ^ (crc >>> 8);
for (i = 0; i < s.length; ++i) {
crc = table[(crc ^ s.charCodeAt(i)) & 0xff] ^ (crc >>> 8);
}
assert(crc === 0, "memory initializer checksum");
#endif
for (i = 0; i < n; ++i) {
HEAPU8[STATIC_BASE + i] = s.charCodeAt(i);
}
})(memoryInitializer);
#else
#if MEM_INIT_METHOD == 1
#if USE_PTHREADS
if (memoryInitializer && !ENVIRONMENT_IS_PTHREAD) {
#else
Expand Down Expand Up @@ -52,6 +81,8 @@ if (memoryInitializer) {
}
}
}
#endif
#endif

function ExitStatus(status) {
this.name = "ExitStatus";
Expand Down
5 changes: 5 additions & 0 deletions src/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ var INVOKE_RUN = 1; // Whether we will run the main() function. Disable if you e
// can do with Module.callMain(), with an optional parameter of commandline args).
var NO_EXIT_RUNTIME = 0; // If set, the runtime is not quit when main() completes (allowing code to
// run afterwards, for example from the browser main event loop).
var MEM_INIT_METHOD = 0; // How to represent the initial memory content.
// 0: keep array literal representing the initial memory data
// 1: create a *.mem file containing the binary data of the initial memory;
// use the --memory-init-file command line switch to select this method
// 2: embed a string literal representing that initial memory data
var TOTAL_STACK = 5*1024*1024; // The total stack size. There is no way to enlarge the stack, so this
// value must be large enough for the program's requirements. If
// assertions are on, we will assert on not exceeding this, otherwise,
Expand Down
18 changes: 18 additions & 0 deletions tests/meminit_pairs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
unsigned char problematic[] = { 0x20, 0x7c, 0x02, 0x07, 0x5f, 0xa0, 0xdf };
int main() {
unsigned char a, b;
int result = 0, i, j;
for (i = 0; i < sizeof(problematic); ++i) {
a = problematic[i] ^ 32;
for (j = 0; j < sizeof(problematic); ++j) {
b = problematic[j] ^ 32;
if (((const unsigned char)data[a][2*b]) != a ||
((const unsigned char)data[a][2*b + 1]) != b) {
result = 1;
printf("data[0x%02x][0x%03x]=%x02x\n", a, 2*b, data[a][2*b]);
printf("data[0x%02x][0x%03x]=%x02x\n", a, 2*b + 1, data[a][2*b + 1]);
}
}
}
REPORT_RESULT()
}
2 changes: 1 addition & 1 deletion tests/parallel_test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
assert not os.environ.get('EM_SAVE_DIR'), 'Need separate directories to avoid the parallel tests clashing'

# run slower ones first, to optimize total time
optimal_order = ['asm3i', 'asm1i', 'asm2nn', 'asm3', 'asm2', 'asm2g', 'asm2f', 'asm1', 'default']
optimal_order = ['asm3i', 'asm1i', 'asm2nn', 'asm3', 'asm2', 'asm2m', 'asm2g', 'asm2f', 'asm1', 'default']
assert set(optimal_order) == set(test_modes), 'need to update the list of slowest modes'

# set up a background thread to report progress
Expand Down
18 changes: 10 additions & 8 deletions tests/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def path_from_root(*pathelems):

# Core test runner class, shared between normal tests and benchmarks
checked_sanity = False
test_modes = ['default', 'asm1', 'asm2', 'asm3', 'asm2f', 'asm2g', 'asm1i', 'asm3i', 'asm2nn']
test_modes = ['default', 'asm1', 'asm2', 'asm3', 'asm2f', 'asm2g', 'asm1i', 'asm3i', 'asm2m', 'asm2nn']
test_index = 0

use_all_engines = os.environ.get('EM_ALL_ENGINES') # generally js engines are equivalent, testing 1 is enough. set this
Expand All @@ -62,6 +62,14 @@ def skipme(self): # used by tests we ask on the commandline to be skipped, see r
def is_emterpreter(self):
return False

def uses_memory_init_file(self):
if self.emcc_args is None:
return None
elif '--memory-init-file' in self.emcc_args:
return int(self.emcc_args[self.emcc_args.index('--memory-init-file')+1])
else:
return ('-O2' in self.emcc_args or '-O3' in self.emcc_args or '-Oz' in self.emcc_args) and not Settings.SIDE_MODULE

def setUp(self):
Settings.reset()
self.banned_js_engines = []
Expand Down Expand Up @@ -252,16 +260,10 @@ def build(self, src, dirname, filename, output_processor=None, main_file=None, a
output_processor(open(filename + '.o.js').read())

if self.emcc_args is not None:
if '--memory-init-file' in self.emcc_args:
memory_init_file = int(self.emcc_args[self.emcc_args.index('--memory-init-file')+1])
else:
memory_init_file = ('-O2' in self.emcc_args or '-O3' in self.emcc_args or '-Oz' in self.emcc_args) and not Settings.SIDE_MODULE
src = open(filename + '.o.js').read()
if memory_init_file:
if self.uses_memory_init_file():
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kripken: Judging from your lack of comment on this line, I take it that dropping this particular check is OK?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes.

# side memory init file, or an empty one in the js
assert ('/* memory initializer */' not in src) or ('/* memory initializer */ allocate([]' in src)
else:
assert 'memory initializer */' in src or '/*' not in src # memory initializer comment, or cleaned-up source with no comments

def validate_asmjs(self, err):
if 'uccessfully compiled asm.js code' in err and 'asm.js link error' not in err:
Expand Down
23 changes: 23 additions & 0 deletions tests/test_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2637,3 +2637,26 @@ def test_pthread_file_io(self):
# Test that it is possible to send a signal via calling alarm(timeout), which in turn calls to the signal handler set by signal(SIGALRM, func);
def test_sigalrm(self):
self.btest(path_from_root('tests', 'sigalrm.cpp'), expected='0', args=['-O3'])

def test_meminit_pairs(self):
d = 'const char *data[] = {\n "'
d += '",\n "'.join(''.join('\\x{:02x}\\x{:02x}'.format(i, j)
for j in range(256)) for i in range(256))
with open(path_from_root('tests', 'meminit_pairs.c')) as f:
d += '"\n};\n' + f.read()
args = ["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"]
self.btest(d, expected='0', args=args + ["--closure", "0"])
self.btest(d, expected='0', args=args + ["--closure", "0", "-g"])
self.btest(d, expected='0', args=args + ["--closure", "1"])

def test_meminit_big(self):
d = 'const char *data[] = {\n "'
d += '",\n "'.join([''.join('\\x{:02x}\\x{:02x}'.format(i, j)
for j in range(256)) for i in range(256)]*256)
with open(path_from_root('tests', 'meminit_pairs.c')) as f:
d += '"\n};\n' + f.read()
assert len(d) > (1 << 27) # more than 32M memory initializer
args = ["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"]
self.btest(d, expected='0', args=args + ["--closure", "0"])
self.btest(d, expected='0', args=args + ["--closure", "0", "-g"])
self.btest(d, expected='0', args=args + ["--closure", "1"])
10 changes: 9 additions & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4374,6 +4374,13 @@ def test_strstr(self):
self.do_run_from_file(src, output)

def test_fnmatch(self):
# Run one test without assertions, for additional coverage
assert 'asm2m' in test_modes
if self.run_name == 'asm2m':
i = self.emcc_args.index('ASSERTIONS=1')
assert i > 0 and self.emcc_args[i-1] == '-s'
self.emcc_args[i] = 'ASSERTIONS=0'

test_path = path_from_root('tests', 'core', 'fnmatch')
src, output = (test_path + s for s in ('.c', '.out'))
self.do_run_from_file(src, output)
Expand Down Expand Up @@ -4549,7 +4556,7 @@ def process(filename):
try_delete(mem_file)
self.do_run(src, ('size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\n5 bytes to dev/null: 5\nok.\n \ntexte\n', 'size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\ntexte\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\n5 bytes to dev/null: 5\nok.\n'),
post_build=post, extra_emscripten_args=['-H', 'libc/fcntl.h'])
if '-O2' in self.emcc_args:
if self.uses_memory_init_file():
assert os.path.exists(mem_file)

def test_files_m(self):
Expand Down Expand Up @@ -7387,6 +7394,7 @@ def setUp(self):
asm2g = make_run("asm2g", compiler=CLANG, emcc_args=["-O2", "-g", "-s", "ASSERTIONS=1", "-s", "SAFE_HEAP=1"])
asm1i = make_run("asm1i", compiler=CLANG, emcc_args=["-O1", '-s', 'EMTERPRETIFY=1'])
asm3i = make_run("asm3i", compiler=CLANG, emcc_args=["-O3", '-s', 'EMTERPRETIFY=1'])
asm2m = make_run("asm2m", compiler=CLANG, emcc_args=["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"])

# Legacy test modes -
asm2nn = make_run("asm2nn", compiler=CLANG, emcc_args=["-O2"], env={"EMCC_NATIVE_OPTIMIZER": "0"})
Expand Down
14 changes: 14 additions & 0 deletions tests/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -4879,3 +4879,17 @@ def test_debug_asmLastOpts(self):
out, err = Popen([PYTHON, EMCC, 'src.c', '-s', 'EXPORTED_FUNCTIONS=["_main", "_treecount"]', '--minify', '0', '-g4', '-Oz']).communicate()
self.assertContained('hello, world!', run_js('a.out.js'))

def test_meminit_crc(self):
with open('src.c', 'w') as f:
f.write(r'''
#include<stdio.h>
int main() { printf("Mary had a little lamb.\n"); }
''')
out, err = Popen([PYTHON, EMCC, 'src.c', '-O2', '--memory-init-file', '0', '-s', 'MEM_INIT_METHOD=2', '-s', 'ASSERTIONS=1']).communicate()
with open('a.out.js', 'r') as f:
d = f.read()
d = d.replace('Mary had', 'Paul had')
with open('a.out.js', 'w') as f:
f.write(d)
out = run_js('a.out.js', assert_returncode=None, stderr=subprocess.STDOUT)
self.assertContained('Assertion failed: memory initializer checksum', out)