Skip to content

Commit 4fc1036

Browse files
authored
Optimize binaryEncode() string quoting (#25613)
One of the optimizations of Closure compiler is to choose the string quote character that results in fewer escaping in the string itself. I.e. Closure compiler will turn ```js var x = '\'\'\'\'\''; var y = "\"\"\"\"\""; ``` into ```js var x = "'''''"; var y = '"""""'; ``` by selecting the string quote char that allows the contents to have fewer escapse. However, in the SINGLE_FILE mode, we emit the Wasm code inside the .js file after Closure has run. So all Closure sees is ```js return binaryDecode("<<< WASM_BINARY_DATA >>>"); ``` when optimizing. This PR implements the same smart string quote selection optimization directly into the `binaryEncode()` function, by checking if there are fewer `'`s or `"`s in the binary content that is to be encoded.
1 parent 2e4fdb0 commit 4fc1036

File tree

2 files changed

+30
-23
lines changed

2 files changed

+30
-23
lines changed

test/codesize/test_codesize_hello_single_file.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
2-
"a.out.js": 5404,
3-
"a.out.js.gz": 2989,
2+
"a.out.js": 5366,
3+
"a.out.js.gz": 2982,
44
"sent": [
55
"a (fd_write)"
66
]

tools/link.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,6 @@ def base64_encode(filename):
163163
return b64.decode('ascii')
164164

165165

166-
def base64_or_binary_encode(b):
167-
return binary_encode(b) if settings.SINGLE_FILE_BINARY_ENCODE else base64_encode(b)
168-
169-
170166
def align_to_wasm_page_boundary(address):
171167
page_size = webassembly.WASM_PAGE_SIZE
172168
return ((address + (page_size - 1)) // page_size) * page_size
@@ -2439,7 +2435,10 @@ def phase_binaryen(target, options, wasm_target):
24392435
if final_js and settings.SINGLE_FILE and not settings.WASM2JS:
24402436
js = read_file(final_js)
24412437

2442-
js = do_replace(js, '<<< WASM_BINARY_DATA >>>', base64_or_binary_encode(wasm_target))
2438+
if settings.SINGLE_FILE_BINARY_ENCODE:
2439+
js = do_replace(js, '"<<< WASM_BINARY_DATA >>>"', binary_encode(wasm_target))
2440+
else:
2441+
js = do_replace(js, '<<< WASM_BINARY_DATA >>>', base64_encode(wasm_target))
24432442
delete_file(wasm_target)
24442443
write_file(final_js, js)
24452444

@@ -2502,7 +2501,7 @@ def generate_traditional_runtime_html(target, options, js_target, target_basenam
25022501
base_js_target = os.path.basename(js_target)
25032502

25042503
if settings.PROXY_TO_WORKER:
2505-
proxy_worker_filename = (settings.PROXY_TO_WORKER_FILENAME or target_basename) + '.js'
2504+
proxy_worker_filename = f'"{settings.PROXY_TO_WORKER_FILENAME or target_basename}.js"'
25062505
script.inline = worker_js_script(proxy_worker_filename)
25072506
else:
25082507
# Normal code generation path
@@ -2521,7 +2520,7 @@ def generate_traditional_runtime_html(target, options, js_target, target_basenam
25212520
# has be synchronously ready.
25222521
script.un_src()
25232522
script.inline = '''
2524-
fetch('%s').then((result) => result.arrayBuffer())
2523+
fetch(%s).then((result) => result.arrayBuffer())
25252524
.then((buf) => {
25262525
Module.wasmBinary = buf;
25272526
%s;
@@ -2540,7 +2539,7 @@ def generate_traditional_runtime_html(target, options, js_target, target_basenam
25402539
// Current browser does not support WebAssembly, load the .wasm.js JavaScript fallback
25412540
// before the main JS runtime.
25422541
var wasm2js = document.createElement('script');
2543-
wasm2js.src = '%s';
2542+
wasm2js.src = %s;
25442543
wasm2js.onload = loadMainJs;
25452544
document.body.appendChild(wasm2js);
25462545
} else {
@@ -2661,18 +2660,18 @@ def generate_worker_js(target, options, js_target, target_basename):
26612660
# compiler output goes in .worker.js file
26622661
move_file(js_target, utils.replace_suffix(js_target, get_worker_js_suffix()))
26632662
worker_target_basename = target_basename + '.worker'
2664-
proxy_worker_filename = (settings.PROXY_TO_WORKER_FILENAME or worker_target_basename) + '.js'
2663+
proxy_worker_filename = f'"{settings.PROXY_TO_WORKER_FILENAME or worker_target_basename}.js"'
26652664

26662665
target_contents = worker_js_script(proxy_worker_filename)
26672666
utils.write_file(target, target_contents, options.output_eol)
26682667

26692668

2670-
def worker_js_script(proxy_worker_filename):
2669+
def worker_js_script(proxy_worker_filename_enclosed_in_quotes):
26712670
web_gl_client_src = read_file(utils.path_from_root('src/webGLClient.js'))
26722671
proxy_client_src = building.read_and_preprocess(utils.path_from_root('src/proxyClient.js'), expand_macros=True)
2673-
if not settings.SINGLE_FILE and not os.path.dirname(proxy_worker_filename):
2674-
proxy_worker_filename = './' + proxy_worker_filename
2675-
proxy_client_src = do_replace(proxy_client_src, '<<< filename >>>', proxy_worker_filename)
2672+
if not settings.SINGLE_FILE and not os.path.dirname(proxy_worker_filename_enclosed_in_quotes[1:-1]):
2673+
proxy_worker_filename_enclosed_in_quotes = f'"./{proxy_worker_filename_enclosed_in_quotes[1:-1]}"'
2674+
proxy_client_src = do_replace(proxy_client_src, '"<<< filename >>>"', proxy_worker_filename_enclosed_in_quotes)
26762675
return web_gl_client_src + '\n' + proxy_client_src
26772676

26782677

@@ -2992,13 +2991,19 @@ def binary_encode(filename):
29922991

29932992
data = utils.read_binary(filename)
29942993

2995-
out = bytearray(len(data) * 2) # Size output buffer conservatively
2996-
i = 0
2994+
# Decide whether to enclose the generated binary data in single-quotes '' or
2995+
# double-quotes "" by looking at which character ends up requiring fewer
2996+
# escapes of that string character.
2997+
num_single_quotes = data.count(ord("'"))
2998+
num_double_quotes = data.count(ord('"'))
2999+
quote_char = ord("'") if num_single_quotes < num_double_quotes else ord('"')
3000+
3001+
out = bytearray(len(data) * 2 + 2) # Size output buffer conservatively
3002+
out[0] = quote_char # Emit string start quote
3003+
i = 1
29973004
for d in data:
2998-
if d == ord('"'):
2999-
# Escape double quote " character with a backspace since we are writing the binary string inside double quotes.
3000-
# Also closure optimizer will turn the string into being delimited with double quotes, even if it were single quotes to start with. (" -> 2 bytes)
3001-
buf = [ord('\\'), d]
3005+
if d == quote_char:
3006+
buf = [ord('\\'), d] # Escape the string quote character with a backslash since we are writing the binary data inside a string.
30023007
elif d == ord('\r'):
30033008
buf = [ord('\\'), ord('r')] # Escape carriage return 0x0D as \r -> 2 bytes
30043009
elif d == ord('\n'):
@@ -3010,6 +3015,8 @@ def binary_encode(filename):
30103015
for b in buf: # Write the bytes to output buffer
30113016
out[i] = b
30123017
i += 1
3018+
out[i] = quote_char # Emit string end quote
3019+
i += 1
30133020
return out[0:i].decode('utf-8') # Crop output buffer to the actual used size
30143021

30153022

@@ -3018,9 +3025,9 @@ def get_subresource_location(path, mimetype='application/octet-stream'):
30183025
if settings.SINGLE_FILE:
30193026
if settings.SINGLE_FILE_BINARY_ENCODE:
30203027
return binary_encode(path)
3021-
return f'data:{mimetype};base64,{base64_encode(path)}'
3028+
return f'"data:{mimetype};base64,{base64_encode(path)}"'
30223029
else:
3023-
return os.path.basename(path)
3030+
return f'"{os.path.basename(path)}"'
30243031

30253032

30263033
def get_subresource_location_js(path):

0 commit comments

Comments
 (0)