From 4de51a90f75af853f9a4b50b559e204836697029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Bargull?= Date: Wed, 23 Oct 2013 11:42:25 -0500 Subject: [PATCH] Bug 928508 - Change String.prototype.@@iterator to iterate over code points. r=jorendorff. --- js/public/Class.h | 2 +- js/src/builtin/String.js | 45 +++++++++++ js/src/builtin/Utilities.js | 1 + js/src/jit-test/lib/string.js | 24 ++++++ .../tests/for-of/string-iterator-generic.js | 20 +++-- .../tests/for-of/string-iterator-surfaces.js | 76 +++++++++++++++++++ js/src/jit-test/tests/for-of/strings.js | 47 ++++++++---- js/src/jsiter.cpp | 47 ++++++++++++ js/src/jsiter.h | 6 ++ js/src/jsstr.cpp | 2 +- js/src/vm/GlobalObject.h | 7 +- js/src/vm/SelfHosting.cpp | 32 ++++++++ 12 files changed, 288 insertions(+), 21 deletions(-) create mode 100644 js/src/jit-test/lib/string.js create mode 100644 js/src/jit-test/tests/for-of/string-iterator-surfaces.js diff --git a/js/public/Class.h b/js/public/Class.h index ab82ace9f15c..3c0851707371 100644 --- a/js/public/Class.h +++ b/js/public/Class.h @@ -560,7 +560,7 @@ struct JSClass { // with the following flags. Failure to use JSCLASS_GLOBAL_FLAGS was // previously allowed, but is now an ES5 violation and thus unsupported. // -#define JSCLASS_GLOBAL_SLOT_COUNT (3 + JSProto_LIMIT * 3 + 27) +#define JSCLASS_GLOBAL_SLOT_COUNT (3 + JSProto_LIMIT * 3 + 28) #define JSCLASS_GLOBAL_FLAGS_WITH_SLOTS(n) \ (JSCLASS_IS_GLOBAL | JSCLASS_HAS_RESERVED_SLOTS(JSCLASS_GLOBAL_SLOT_COUNT + (n))) #define JSCLASS_GLOBAL_FLAGS \ diff --git a/js/src/builtin/String.js b/js/src/builtin/String.js index fd4c41cae64e..5957532e68bb 100644 --- a/js/src/builtin/String.js +++ b/js/src/builtin/String.js @@ -40,6 +40,51 @@ function String_repeat(count) { return T; } +#define STRING_ITERATOR_SLOT_ITERATED_OBJECT 0 +#define STRING_ITERATOR_SLOT_NEXT_INDEX 1 + +// ES6 draft specification, section 21.1.3.27, version 2013-09-27. +function String_iterator() { + CheckObjectCoercible(this); + var S = ToString(this); + var iterator = NewStringIterator(); + UnsafeSetReservedSlot(iterator, STRING_ITERATOR_SLOT_ITERATED_OBJECT, S); + UnsafeSetReservedSlot(iterator, STRING_ITERATOR_SLOT_NEXT_INDEX, 0); + return iterator; +} + +function StringIteratorIdentity() { + return this; +} + +function StringIteratorNext() { + // FIXME: Cross-compartment wrapper StringIterator objects should pass this test. Bug 924059. + if (!IsObject(this) || !IsStringIterator(this)) + ThrowError(JSMSG_INCOMPATIBLE_METHOD, "StringIterator", "next", ToString(this)); + + var S = UnsafeGetReservedSlot(this, STRING_ITERATOR_SLOT_ITERATED_OBJECT); + var index = UnsafeGetReservedSlot(this, STRING_ITERATOR_SLOT_NEXT_INDEX); + var size = S.length; + + if (index >= size) { + return { value: undefined, done: true }; + } + + var charCount = 1; + var first = callFunction(std_String_charCodeAt, S, index); + if (first >= 0xD800 && first <= 0xDBFF && index + 1 < size) { + var second = callFunction(std_String_charCodeAt, S, index + 1); + if (second >= 0xDC00 && second <= 0xDFFF) { + charCount = 2; + } + } + + UnsafeSetReservedSlot(this, STRING_ITERATOR_SLOT_NEXT_INDEX, index + charCount); + var value = callFunction(std_String_substring, S, index, index + charCount); + + return { value: value, done: false }; +} + /** * Compare this String against that String, using the locale and collation * options provided. diff --git a/js/src/builtin/Utilities.js b/js/src/builtin/Utilities.js index abc76dc96cef..77873cf58acf 100644 --- a/js/src/builtin/Utilities.js +++ b/js/src/builtin/Utilities.js @@ -64,6 +64,7 @@ var std_Object_getOwnPropertyNames = Object.getOwnPropertyNames; var std_Object_hasOwnProperty = Object.prototype.hasOwnProperty; var std_RegExp_test = RegExp.prototype.test; var Std_String = String; +var std_String_charCodeAt = String.prototype.charCodeAt; var std_String_indexOf = String.prototype.indexOf; var std_String_lastIndexOf = String.prototype.lastIndexOf; var std_String_match = String.prototype.match; diff --git a/js/src/jit-test/lib/string.js b/js/src/jit-test/lib/string.js new file mode 100644 index 000000000000..15431ae2c1fb --- /dev/null +++ b/js/src/jit-test/lib/string.js @@ -0,0 +1,24 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + + +if (typeof isHighSurrogate === 'undefined') { + var isHighSurrogate = function isHighSurrogate(s) { + var c = s.charCodeAt(0); + return c >= 0xD800 && c <= 0xDBFF; + } +} + +if (typeof isLowSurrogate === 'undefined') { + var isLowSurrogate = function isLowSurrogate(s) { + var c = s.charCodeAt(0); + return c >= 0xDC00 && c <= 0xDFFF; + } +} + +if (typeof isSurrogatePair === 'undefined') { + var isSurrogatePair = function isSurrogatePair(s) { + return s.length == 2 && isHighSurrogate(s[0]) && isLowSurrogate(s[1]); + } +} diff --git a/js/src/jit-test/tests/for-of/string-iterator-generic.js b/js/src/jit-test/tests/for-of/string-iterator-generic.js index c0658f617bee..50b60d1c53c2 100644 --- a/js/src/jit-test/tests/for-of/string-iterator-generic.js +++ b/js/src/jit-test/tests/for-of/string-iterator-generic.js @@ -2,14 +2,24 @@ load(libdir + "asserts.js"); load(libdir + "iteration.js"); +load(libdir + "string.js"); function test(obj) { var it = String.prototype[std_iterator].call(obj); - for (var i = 0; i < (obj.length >>> 0); i++) - assertIteratorNext(it, obj[i]); + var s = String(obj); + for (var i = 0, length = s.length; i < length;) { + var r = s[i++]; + if (isHighSurrogate(r) && i < length && isLowSurrogate(s[i])) { + r += s[i++]; + } + assertIteratorNext(it, r); + } assertIteratorDone(it, undefined); } -test({length: 0}); -test(Object.create(['x', 'y', 'z'])); -test(Object.create({length: 2, 0: 'x', 1: 'y'})); +test({toString: () => ""}); +test({toString: () => "xyz"}); +test({toString: () => "\ud808\udf45"}); +test({valueOf: () => ""}); +test({valueOf: () => "xyz"}); +test({valueOf: () => "\ud808\udf45"}); diff --git a/js/src/jit-test/tests/for-of/string-iterator-surfaces.js b/js/src/jit-test/tests/for-of/string-iterator-surfaces.js new file mode 100644 index 000000000000..81e5fd841f1c --- /dev/null +++ b/js/src/jit-test/tests/for-of/string-iterator-surfaces.js @@ -0,0 +1,76 @@ +// String.prototype[@@iterator] and StringIterator.prototype surface tests + +load(libdir + "array-compare.js"); +load(libdir + "asserts.js"); +load(libdir + "iteration.js"); + +function assertDataDescriptor(actual, expected) { + assertEq(actual.value, expected.value); + assertEq(actual.writable, expected.writable); + assertEq(actual.enumerable, expected.enumerable); + assertEq(actual.configurable, expected.configurable); +} + +function isConstructor(o) { + try { + new (new Proxy(o, {construct: () => ({})})); + return true; + } catch(e) { + return false; + } +} + +function assertBuiltinFunction(o, name, arity) { + var fn = o[name]; + assertDataDescriptor(Object.getOwnPropertyDescriptor(o, name), { + value: fn, + writable: true, + enumerable: false, + configurable: true, + }); + + assertEq(typeof fn, "function"); + assertEq(Object.getPrototypeOf(fn), Function.prototype); + // FIXME: Proxy should only have [[Construct]] if target has [[Construct]] (bug 929467) + // assertEq(isConstructor(fn), false); + + arraysEqual(Object.getOwnPropertyNames(fn).sort(), ["length", "name", "arguments", "caller"].sort()); + + // Also test "name", "arguments" and "caller" in addition to "length"? + assertDataDescriptor(Object.getOwnPropertyDescriptor(fn, "length"), { + value: arity, + writable: false, + enumerable: false, + configurable: false, + }); +} + + +// String.prototype[@@iterator] is a built-in function +assertBuiltinFunction(String.prototype, std_iterator, 0); + +// Test StringIterator.prototype surface +var iter = ""[std_iterator](); +var iterProto = Object.getPrototypeOf(iter); + +// StringIterator.prototype inherits from Object.prototype +assertEq(Object.getPrototypeOf(iterProto), Object.prototype); + +// Own properties for StringIterator.prototype: "next" and @@iterator +arraysEqual(Object.getOwnPropertyNames(iterProto).sort(), ["next", std_iterator].sort()); + +// StringIterator.prototype[@@iterator] is a built-in function +assertBuiltinFunction(iterProto, std_iterator, 0); + +// StringIterator.prototype.next is a built-in function +assertBuiltinFunction(iterProto, "next", 0); + +// StringIterator.prototype[@@iterator] is generic and returns |this| +for (var v of [void 0, null, true, false, "", 0, 1, {}, [], iter, iterProto]) { + assertEq(iterProto[std_iterator].call(v), v); +} + +// StringIterator.prototype.next is not generic +for (var v of [void 0, null, true, false, "", 0, 1, {}, [], iterProto]) { + assertThrowsInstanceOf(() => iterProto.next.call(v), TypeError); +} diff --git a/js/src/jit-test/tests/for-of/strings.js b/js/src/jit-test/tests/for-of/strings.js index 34553a448434..1dab295b01f0 100644 --- a/js/src/jit-test/tests/for-of/strings.js +++ b/js/src/jit-test/tests/for-of/strings.js @@ -1,26 +1,47 @@ // for-of works on strings and String objects. -function test(s) { +load(libdir + "string.js"); + +function test(s, expectedCodePoints) { var copy = ''; + var codepoints = 0; + var singleHighSurrogate = false; for (var v of s) { assertEq(typeof v, 'string'); - assertEq(v.length, 1); + assertEq(v.length, isSurrogatePair(v) ? 2 : 1); + assertEq(false, singleHighSurrogate && isLowSurrogate(v)); copy += v; + codepoints += 1; + singleHighSurrogate = !isSurrogatePair(v) && isHighSurrogate(v); } assertEq(copy, String(s)); + assertEq(codepoints, expectedCodePoints); } -test(''); -test('abc'); -test('a \0 \ufffe \ufeff'); +test('', 0); +test('abc', 3); +test('a \0 \ufffe \ufeff', 7); // Non-BMP characters are generally passed to JS in UTF-16, as surrogate pairs. -// ES requires that such pairs be treated as two 16-bit "characters" in pretty -// much every circumstance, including string indexing. We anticipate the same -// requirement will be imposed here, though it's not a sure thing. -test('\ud808\udf45'); +// ES6 requires that such pairs be treated as a single code point in for-of. +test('\ud808\udf45', 1); -test(new String('')); -test(new String('abc')); -test(new String('a \0 \ufffe \ufeff')); -test(new String('\ud808\udf45')); +// Also test invalid surrogate pairs: +// (1) High surrogate not followed by low surrogate +test('\ud808', 1); +test('\ud808\u0000', 2); +// (2) Low surrogate not preceded by high surrogate +test('\udf45', 1); +test('\u0000\udf45', 2); +// (3) Low surrogate followed by high surrogate +test('\udf45\ud808', 2); + +test(new String(''), 0); +test(new String('abc'), 3); +test(new String('a \0 \ufffe \ufeff'), 7); +test(new String('\ud808\udf45'), 1); +test(new String('\ud808'), 1); +test(new String('\ud808\u0000'), 2); +test(new String('\udf45'), 1); +test(new String('\u0000\udf45'), 2); +test(new String('\udf45\ud808'), 2); diff --git a/js/src/jsiter.cpp b/js/src/jsiter.cpp index c3daa512393d..2814109c66c7 100644 --- a/js/src/jsiter.cpp +++ b/js/src/jsiter.cpp @@ -868,6 +868,45 @@ static const JSFunctionSpec array_iterator_methods[] = { JS_FS_END }; +static const Class StringIteratorPrototypeClass = { + "String Iterator", + JSCLASS_IMPLEMENTS_BARRIERS, + JS_PropertyStub, /* addProperty */ + JS_DeletePropertyStub, /* delProperty */ + JS_PropertyStub, /* getProperty */ + JS_StrictPropertyStub, /* setProperty */ + JS_EnumerateStub, + JS_ResolveStub, + JS_ConvertStub, + nullptr /* finalize */ +}; + +enum { + StringIteratorSlotIteratedObject, + StringIteratorSlotNextIndex, + StringIteratorSlotCount +}; + +const Class StringIteratorObject::class_ = { + "String Iterator", + JSCLASS_IMPLEMENTS_BARRIERS | + JSCLASS_HAS_RESERVED_SLOTS(StringIteratorSlotCount), + JS_PropertyStub, /* addProperty */ + JS_DeletePropertyStub, /* delProperty */ + JS_PropertyStub, /* getProperty */ + JS_StrictPropertyStub, /* setProperty */ + JS_EnumerateStub, + JS_ResolveStub, + JS_ConvertStub, + nullptr /* finalize */ +}; + +static const JSFunctionSpec string_iterator_methods[] = { + JS_SELF_HOSTED_FN("@@iterator", "StringIteratorIdentity", 0, 0), + JS_SELF_HOSTED_FN("next", "StringIteratorNext", 0, 0), + JS_FS_END +}; + static bool CloseLegacyGenerator(JSContext *cx, HandleObject genobj); @@ -1853,6 +1892,14 @@ GlobalObject::initIteratorClasses(JSContext *cx, Handle global) global->setReservedSlot(ARRAY_ITERATOR_PROTO, ObjectValue(*proto)); } + if (global->getSlot(STRING_ITERATOR_PROTO).isUndefined()) { + const Class *cls = &StringIteratorPrototypeClass; + proto = global->createBlankPrototype(cx, cls); + if (!proto || !DefinePropertiesAndBrand(cx, proto, nullptr, string_iterator_methods)) + return false; + global->setReservedSlot(STRING_ITERATOR_PROTO, ObjectValue(*proto)); + } + if (global->getSlot(LEGACY_GENERATOR_OBJECT_PROTO).isUndefined()) { proto = NewObjectWithObjectPrototype(cx, global); if (!proto || !DefinePropertiesAndBrand(cx, proto, nullptr, legacy_generator_methods)) diff --git a/js/src/jsiter.h b/js/src/jsiter.h index 4fd11c671775..4651c1560358 100644 --- a/js/src/jsiter.h +++ b/js/src/jsiter.h @@ -139,6 +139,12 @@ class ArrayIteratorObject : public JSObject static const Class class_; }; +class StringIteratorObject : public JSObject +{ + public: + static const Class class_; +}; + bool VectorToIdArray(JSContext *cx, AutoIdVector &props, JSIdArray **idap); diff --git a/js/src/jsstr.cpp b/js/src/jsstr.cpp index 9df6c573f3c1..ccc86152a5a7 100644 --- a/js/src/jsstr.cpp +++ b/js/src/jsstr.cpp @@ -3658,7 +3658,7 @@ static const JSFunctionSpec string_methods[] = { JS_FN("sup", str_sup, 0,0), JS_FN("sub", str_sub, 0,0), #endif - JS_SELF_HOSTED_FN("@@iterator", "ArrayValues", 0,0), + JS_SELF_HOSTED_FN("@@iterator", "String_iterator", 0,0), JS_FS_END }; diff --git a/js/src/vm/GlobalObject.h b/js/src/vm/GlobalObject.h index 883a16159d57..564760af4c77 100644 --- a/js/src/vm/GlobalObject.h +++ b/js/src/vm/GlobalObject.h @@ -96,7 +96,8 @@ class GlobalObject : public JSObject /* One-off properties stored after slots for built-ins. */ static const unsigned ARRAY_ITERATOR_PROTO = FROM_BUFFER_UINT8CLAMPED + 1; - static const unsigned LEGACY_GENERATOR_OBJECT_PROTO = ARRAY_ITERATOR_PROTO + 1; + static const unsigned STRING_ITERATOR_PROTO = ARRAY_ITERATOR_PROTO + 1; + static const unsigned LEGACY_GENERATOR_OBJECT_PROTO = STRING_ITERATOR_PROTO + 1; static const unsigned STAR_GENERATOR_OBJECT_PROTO = LEGACY_GENERATOR_OBJECT_PROTO + 1; static const unsigned MAP_ITERATOR_PROTO = STAR_GENERATOR_OBJECT_PROTO + 1; static const unsigned SET_ITERATOR_PROTO = MAP_ITERATOR_PROTO + 1; @@ -466,6 +467,10 @@ class GlobalObject : public JSObject return getOrCreateObject(cx, ARRAY_ITERATOR_PROTO, initIteratorClasses); } + JSObject *getOrCreateStringIteratorPrototype(JSContext *cx) { + return getOrCreateObject(cx, STRING_ITERATOR_PROTO, initIteratorClasses); + } + JSObject *getOrCreateLegacyGeneratorObjectPrototype(JSContext *cx) { return getOrCreateObject(cx, LEGACY_GENERATOR_OBJECT_PROTO, initIteratorClasses); } diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp index 6024256ecdfd..7e19741e6a80 100644 --- a/js/src/vm/SelfHosting.cpp +++ b/js/src/vm/SelfHosting.cpp @@ -505,6 +505,35 @@ intrinsic_IsArrayIterator(JSContext *cx, unsigned argc, Value *vp) return true; } +static bool +intrinsic_NewStringIterator(JSContext *cx, unsigned argc, Value *vp) +{ + CallArgs args = CallArgsFromVp(argc, vp); + JS_ASSERT(args.length() == 0); + + RootedObject proto(cx, cx->global()->getOrCreateStringIteratorPrototype(cx)); + if (!proto) + return false; + + JSObject *obj = NewObjectWithGivenProto(cx, &StringIteratorObject::class_, proto, cx->global()); + if (!obj) + return false; + + args.rval().setObject(*obj); + return true; +} + +static bool +intrinsic_IsStringIterator(JSContext *cx, unsigned argc, Value *vp) +{ + CallArgs args = CallArgsFromVp(argc, vp); + JS_ASSERT(args.length() == 1); + JS_ASSERT(args[0].isObject()); + + args.rval().setBoolean(args[0].toObject().is()); + return true; +} + /* * ParallelTestsShouldPass(): Returns false if we are running in a * mode (such as --ion-eager) that is known to cause additional @@ -588,6 +617,9 @@ static const JSFunctionSpec intrinsic_functions[] = { JS_FN("NewArrayIterator", intrinsic_NewArrayIterator, 0,0), JS_FN("IsArrayIterator", intrinsic_IsArrayIterator, 1,0), + JS_FN("NewStringIterator", intrinsic_NewStringIterator, 0,0), + JS_FN("IsStringIterator", intrinsic_IsStringIterator, 1,0), + JS_FN("ForkJoin", intrinsic_ForkJoin, 2,0), JS_FN("ForkJoinSlices", intrinsic_ForkJoinSlices, 0,0), JS_FN("NewParallelArray", intrinsic_NewParallelArray, 3,0),