// Copyright 2019 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include 'src/builtins/builtins-string-gen.h' @abstract @reserveBitsInInstanceType(7) extern class String extends Name { macro StringInstanceType(): StringInstanceType { return %RawDownCast( Convert(this.map.instance_type)); } macro IsNotInternalized(): bool { return this.StringInstanceType().is_not_internalized; } // Keep this in sync with the C++ String::IsOneByteRepresentation. macro IsOneByteRepresentation(): bool { let s: String; try { const thin = Cast(this) otherwise NotThin; s = thin.actual; } label NotThin { s = this; } return s.StringInstanceType().is_one_byte; } // Keep this in sync with the C++ String::IsOneByteRepresentationUnderneath. macro IsOneByteRepresentationUnderneath(): bool { let string = this; while (true) { typeswitch (string) { case (cons: ConsString): { dcheck(cons.IsFlat()); string = cons.first; } case (thin: ThinString): { string = thin.actual; } case (slice: SlicedString): { string = slice.parent; } case (String): { return string.StringInstanceType().is_one_byte; } } } VerifiedUnreachable(); } const length: int32; } extern enum StringRepresentationTag extends uint32 { kSeqStringTag, kConsStringTag, kExternalStringTag, kSlicedStringTag, kThinStringTag } bitfield struct StringInstanceType extends uint16 { representation: StringRepresentationTag: 3 bit; is_one_byte: bool: 1 bit; is_uncached: bool: 1 bit; is_not_internalized: bool: 1 bit; is_shared: bool: 1 bit; } @generateBodyDescriptor @doNotGenerateCast extern class ConsString extends String { // Corresponds to String::IsFlat() in the C++ runtime. macro IsFlat(): bool { return this.second.length == 0; } macro IsOneByteRepresentation(): bool { return this.StringInstanceType().is_one_byte; } first: String; second: String; } @abstract @doNotGenerateCast extern class ExternalString extends String { resource: ExternalPointer; // WARNING: This field is missing for uncached external strings. resource_data: ExternalPointer; } extern operator '.resource_ptr' macro LoadExternalStringResourcePtr( ExternalString): RawPtr; extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr( ExternalString): RawPtr; extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr( ExternalOneByteString): RawPtr; extern operator '.resource_data_ptr' macro LoadExternalStringResourceDataPtr( ExternalTwoByteString): RawPtr; extern macro ExternalOneByteStringGetChars(ExternalOneByteString): RawPtr; extern macro ExternalTwoByteStringGetChars(ExternalTwoByteString): RawPtr; @doNotGenerateCast extern class ExternalOneByteString extends ExternalString { macro GetChars(): RawPtr { if (this.StringInstanceType().is_uncached) { return ExternalOneByteStringGetChars(this); } else { return this.resource_data_ptr; } } } @doNotGenerateCast extern class ExternalTwoByteString extends ExternalString { macro GetChars(): RawPtr { if (this.StringInstanceType().is_uncached) { return ExternalTwoByteStringGetChars(this); } else { return this.resource_data_ptr; } } } @doNotGenerateCast extern class InternalizedString extends String { } @abstract @doNotGenerateCast extern class SeqString extends String { } @generateBodyDescriptor @doNotGenerateCast extern class SeqOneByteString extends SeqString { const chars[length]: char8; } @generateBodyDescriptor @doNotGenerateCast extern class SeqTwoByteString extends SeqString { const chars[length]: char16; } @generateBodyDescriptor @doNotGenerateCast extern class SlicedString extends String { parent: String; offset: Smi; } @generateBodyDescriptor @doNotGenerateCast extern class ThinString extends String { actual: String; } // A direct string can be accessed directly through CSA without going into the // C++ runtime. See also: ToDirectStringAssembler. type DirectString extends String; macro AllocateNonEmptySeqOneByteString( length: uint32, content: Iterator): SeqOneByteString { dcheck(length != 0 && length <= kStringMaxLength); return new (ClearPadding) SeqOneByteString{ map: kOneByteStringMap, raw_hash_field: kNameEmptyHashField, length: Signed(length), chars: ...content }; } macro AllocateNonEmptySeqTwoByteString( length: uint32, content: Iterator): SeqTwoByteString { dcheck(length > 0 && length <= kStringMaxLength); return new (ClearPadding) SeqTwoByteString{ map: kStringMap, raw_hash_field: kNameEmptyHashField, length: Signed(length), chars: ...content }; } macro AllocateNonEmptySeqOneByteString(length: uint32): SeqOneByteString { return AllocateNonEmptySeqOneByteString(length, UninitializedIterator{}); } macro AllocateNonEmptySeqTwoByteString(length: uint32): SeqTwoByteString { return AllocateNonEmptySeqTwoByteString(length, UninitializedIterator{}); } macro AllocateSeqOneByteString( length: uint32, content: Iterator): SeqOneByteString|EmptyString { if (length == 0) return kEmptyString; return AllocateNonEmptySeqOneByteString(length, content); } macro AllocateSeqTwoByteString( length: uint32, content: Iterator): SeqTwoByteString|EmptyString { if (length == 0) return kEmptyString; return AllocateNonEmptySeqTwoByteString(length, content); } @export macro AllocateSeqOneByteString(length: uint32): SeqOneByteString|EmptyString { return AllocateSeqOneByteString(length, UninitializedIterator{}); } @export macro AllocateSeqTwoByteString(length: uint32): SeqTwoByteString|EmptyString { return AllocateSeqTwoByteString(length, UninitializedIterator{}); } extern macro StringWriteToFlatOneByte( String, RawPtr, int32, int32): void; extern macro StringWriteToFlatTwoByte( String, RawPtr, int32, int32): void; // Corresponds to String::SlowFlatten in the C++ runtime. builtin StringSlowFlatten(cons: ConsString): String { // TurboFan can create cons strings with empty first parts. let cons = cons; while (cons.first.length == 0) { // We do not want to call this function recursively. Therefore we call // String::Flatten only in those cases where String::SlowFlatten is not // called again. try { const second = Cast(cons.second) otherwise FoundFlatString; if (second.IsFlat()) goto FoundFlatString; cons = second; } label FoundFlatString { return Flatten(cons.second); } } let flat: String; if (cons.IsOneByteRepresentation()) { const allocated = AllocateNonEmptySeqOneByteString(Unsigned(cons.length)); StringWriteToFlatOneByte( cons, (&allocated.chars).GCUnsafeStartPointer(), 0, cons.length); flat = allocated; } else { const allocated = UnsafeCast( AllocateNonEmptySeqTwoByteString(Unsigned(cons.length))); StringWriteToFlatTwoByte( cons, (&allocated.chars).GCUnsafeStartPointer(), 0, cons.length); flat = allocated; } cons.first = flat; cons.second = kEmptyString; return flat; } // Corresponds to String::Flatten in the C++ runtime. macro Flatten(string: String): String { typeswitch (string) { case (cons: ConsString): { return Flatten(cons); } case (thin: ThinString): { dcheck(!Is(thin.actual)); return thin.actual; } case (other: String): { return other; } } } macro Flatten(cons: ConsString): String { if (cons.IsFlat()) return cons.first; return StringSlowFlatten(cons); } // Get a slice to the string data, flatten only if unavoidable for this. macro StringToSlice(string: String): never labels OneByte(ConstSlice), TwoByte(ConstSlice) { let string = string; let offset: intptr = 0; const length = Convert(string.length); while (true) { typeswitch (string) { case (s: SeqOneByteString): { goto OneByte(Subslice(&s.chars, offset, length) otherwise unreachable); } case (s: SeqTwoByteString): { goto TwoByte(Subslice(&s.chars, offset, length) otherwise unreachable); } case (s: ThinString): { string = s.actual; } case (s: ConsString): { string = Flatten(s); } case (s: SlicedString): { offset += Convert(s.offset); string = s.parent; } case (s: ExternalOneByteString): { const data = torque_internal::unsafe::NewOffHeapConstSlice( s.GetChars(), Convert(s.length)); goto OneByte(Subslice(data, offset, length) otherwise unreachable); } case (s: ExternalTwoByteString): { const data = torque_internal::unsafe::NewOffHeapConstSlice( s.GetChars(), Convert(s.length)); goto TwoByte(Subslice(data, offset, length) otherwise unreachable); } case (String): { unreachable; } } } VerifiedUnreachable(); } // Dispatch on the slice type of two different strings. macro TwoStringsToSlices( s1: String, s2: String, f: Functor): Result { try { StringToSlice(s1) otherwise FirstOneByte, FirstTwoByte; } label FirstOneByte(s1Slice: ConstSlice) { try { StringToSlice(s2) otherwise SecondOneByte, SecondTwoByte; } label SecondOneByte(s2Slice: ConstSlice) { return Call(f, s1Slice, s2Slice); } label SecondTwoByte(s2Slice: ConstSlice) { return Call(f, s1Slice, s2Slice); } } label FirstTwoByte(s1Slice: ConstSlice) { try { StringToSlice(s2) otherwise SecondOneByte, SecondTwoByte; } label SecondOneByte(s2Slice: ConstSlice) { return Call(f, s1Slice, s2Slice); } label SecondTwoByte(s2Slice: ConstSlice) { return Call(f, s1Slice, s2Slice); } } } macro StaticAssertStringLengthFitsSmi(): void { const kMaxStringLengthFitsSmi: constexpr bool = kStringMaxLengthUintptr < kSmiMaxValue; static_assert(kMaxStringLengthFitsSmi); } extern macro StringBuiltinsAssembler::SearchOneByteStringInTwoByteString( RawPtr, intptr, RawPtr, intptr, intptr): intptr; extern macro StringBuiltinsAssembler::SearchOneByteStringInOneByteString( RawPtr, intptr, RawPtr, intptr, intptr): intptr; extern macro StringBuiltinsAssembler::SearchTwoByteStringInTwoByteString( RawPtr, intptr, RawPtr, intptr, intptr): intptr; extern macro StringBuiltinsAssembler::SearchTwoByteStringInOneByteString( RawPtr, intptr, RawPtr, intptr, intptr): intptr; extern macro StringBuiltinsAssembler::SearchOneByteInOneByteString( RawPtr, intptr, RawPtr, intptr): intptr; macro AbstractStringIndexOf( subject: RawPtr, subjectLen: intptr, search: RawPtr, searchLen: intptr, fromIndex: intptr): intptr { return SearchOneByteStringInTwoByteString( subject, subjectLen, search, searchLen, fromIndex); } macro AbstractStringIndexOf( subject: RawPtr, subjectLen: intptr, search: RawPtr, searchLen: intptr, fromIndex: intptr): intptr { if (searchLen == 1) { return SearchOneByteInOneByteString(subject, subjectLen, search, fromIndex); } return SearchOneByteStringInOneByteString( subject, subjectLen, search, searchLen, fromIndex); } macro AbstractStringIndexOf( subject: RawPtr, subjectLen: intptr, search: RawPtr, searchLen: intptr, fromIndex: intptr): intptr { return SearchTwoByteStringInTwoByteString( subject, subjectLen, search, searchLen, fromIndex); } macro AbstractStringIndexOf( subject: RawPtr, subjectLen: intptr, search: RawPtr, searchLen: intptr, fromIndex: intptr): intptr { return SearchTwoByteStringInOneByteString( subject, subjectLen, search, searchLen, fromIndex); } struct AbstractStringIndexOfFunctor { fromIndex: Smi; } // Ideally, this would be a method of AbstractStringIndexOfFunctor, but // currently methods don't support templates. macro Call( self: AbstractStringIndexOfFunctor, string: ConstSlice, searchStr: ConstSlice): Smi { return Convert(AbstractStringIndexOf( string.GCUnsafeStartPointer(), string.length, searchStr.GCUnsafeStartPointer(), searchStr.length, Convert(self.fromIndex))); } macro AbstractStringIndexOf(implicit context: Context)( string: String, searchString: String, fromIndex: Smi): Smi { // Special case the empty string. const searchStringLength = searchString.length_intptr; const stringLength = string.length_intptr; if (searchStringLength == 0 && SmiUntag(fromIndex) <= stringLength) { return fromIndex; } // Don't bother to search if the searchString would go past the end // of the string. This is actually necessary because of runtime // checks. if (SmiUntag(fromIndex) + searchStringLength > stringLength) { return -1; } return TwoStringsToSlices( string, searchString, AbstractStringIndexOfFunctor{fromIndex: fromIndex}); } builtin StringIndexOf(s: String, searchString: String, start: Smi): Smi { return AbstractStringIndexOf(s, searchString, SmiMax(start, 0)); }