diff --git a/encoding.bs b/encoding.bs
index d348d76..758331d 100644
--- a/encoding.bs
+++ b/encoding.bs
@@ -1294,16 +1294,20 @@ attribute's getter, when invoked, must return "utf-8".
+dictionary TextEncoderEncodeIntoResult {
+ unsigned long long read;
+ unsigned long long written;
+};
+
[Constructor,
Exposed=(Window,Worker)]
interface TextEncoder {
[NewObject] Uint8Array encode(optional USVString input = "");
+ TextEncoderEncodeIntoResult encodeInto(USVString source, Uint8Array destination);
};
TextEncoder includes TextEncoderCommon;
-A {{TextEncoder}} object has an associated encoder. -
A {{TextEncoder}} object offers no label argument as it only
supports UTF-8. It also offers no stream option as no encoder
requires buffering of scalar values.
@@ -1319,18 +1323,17 @@ requires buffering of scalar values.
encoder . encode([input = ""])
encoder . encodeInto(source, destination)
+ Runs the UTF-8 encoder on source, stores the result of that operation into + destination, and returns the progress made as a dictionary whereby + {{TextEncoderEncodeIntoResult/read}} is the number of converted code units of + source and {{TextEncoderEncodeIntoResult/written}} is the number of bytes modified in + destination.
The TextEncoder()
-constructor, when invoked, must run these steps:
-
-
Let enc be a new {{TextEncoder}} object. - -
Return enc. -
The encode(input) method, when invoked,
must run these steps:
@@ -1347,20 +1350,108 @@ must run these steps:
Let token be the result of reading from input. -
Let result be the result of - processing token for - encoder, input, output. +
Let result be the result of processing token for the + UTF-8 encoder, input, output. + +
Assert: result is not error. + +
The UTF-8 encoder cannot return error. + +
If result is finished, convert output into a byte sequence, + and then return a {{Uint8Array}} object wrapping an {{ArrayBuffer}} containing output. + + + + +
The
+encodeInto(source, destination)
+method, when invoked, must run these steps:
+
+
Let read be 0. + +
Let written be 0. + +
Let destinationBytes be the result of + getting a reference to the bytes held by + destination. + +
Let unused be a new stream. + +
The handler algorithm invoked below requires this argument, but it is not + used by the UTF-8 encoder. + +
Convert source to a stream. + +
While true: + +
Let token be the result of reading from source. + +
Let result be the result of running the UTF-8 encoder's handler + on unused and token. + +
If result is finished, convert output into a - byte sequence, and then return a {{Uint8Array}} object wrapping an - {{ArrayBuffer}} containing output. - +
Otherwise: -
If destinationBytes's length − + written is greater than or equal to the number of bytes in result, then: + +
If token is greater than U+FFFF, then increment read by 2. + +
Otherwise, increment read by 1. + +
Write the bytes in result into destinationBytes, from byte + offset written. + +
Increment written by the number of bytes in result. +
Otherwise, break. +
Return a new {{TextEncoderEncodeIntoResult}} dictionary whose + {{TextEncoderEncodeIntoResult/read}} member is read and + {{TextEncoderEncodeIntoResult/written}} member is written.
The encodeInto() method can + be used to encode a string into an existing {{ArrayBuffer}} object. Various details below are left + as an exercise for the reader, but this demonstrates an approach one could take to use this method: + +
+function convertString(buffer, input, callback) {
+ let bufferSize = 256,
+ bufferStart = malloc(buffer, bufferSize),
+ writeOffset = 0,
+ readOffset = 0;
+ while (true) {
+ const view = new Uint8Array(buffer, bufferStart + writeOffset, bufferSize - writeOffset),
+ {read, written} = cachedEncoder.encodeInto(input.substring(readOffset), view);
+ readOffset += read;
+ writeOffset += written;
+ if (readOffset === input.length) {
+ callback(bufferStart, writeOffset);
+ free(buffer, bufferStart);
+ return;
+ }
+ bufferSize *= 2;
+ bufferStart = realloc(buffer, bufferStart, bufferSize);
+ }
+}
+
+