Browse Source

Implement get for string

refactor/interner
Haled Odat 1 year ago
parent
commit
92f641b0d2
  1. 6
      boa_engine/src/builtins/string/mod.rs
  2. 15
      boa_engine/src/builtins/uri/mod.rs
  3. 36
      boa_engine/src/string/mod.rs
  4. 60
      boa_engine/src/string/str.rs

6
boa_engine/src/builtins/string/mod.rs

@ -633,9 +633,11 @@ impl String {
match position { match position {
// 4. Let size be the length of S. // 4. Let size be the length of S.
IntegerOrInfinity::Integer(i) if i >= 0 && i < string.len() as i64 => { IntegerOrInfinity::Integer(i) if i >= 0 => {
// 6. Return the Number value for the numeric value of the code unit at index position within the String S. // 6. Return the Number value for the numeric value of the code unit at index position within the String S.
Ok(u32::from(string.to_vec()[i as usize]).into()) Ok(string
.get(i as usize)
.map_or_else(JsValue::nan, JsValue::from))
} }
// 5. If position < 0 or position ≥ size, return NaN. // 5. If position < 0 or position ≥ size, return NaN.
_ => Ok(JsValue::nan()), _ => Ok(JsValue::nan()),

15
boa_engine/src/builtins/uri/mod.rs

@ -309,7 +309,7 @@ where
} }
// b. Let C be the code unit at index k within string. // b. Let C be the code unit at index k within string.
let c = string.to_vec()[k]; let c = string.get_expect(k);
// c. If C is in unescapedSet, then // c. If C is in unescapedSet, then
if unescaped_set(c) { if unescaped_set(c) {
@ -384,7 +384,7 @@ where
} }
// b. Let C be the code unit at index k within string. // b. Let C be the code unit at index k within string.
let c = string.to_vec()[k]; let c = string.get_expect(k);
// c. If C is not the code unit 0x0025 (PERCENT SIGN), then // c. If C is not the code unit 0x0025 (PERCENT SIGN), then
#[allow(clippy::if_not_else)] #[allow(clippy::if_not_else)]
@ -406,9 +406,10 @@ where
// iii. If the code units at index (k + 1) and (k + 2) within string do not represent // iii. If the code units at index (k + 1) and (k + 2) within string do not represent
// hexadecimal digits, throw a URIError exception. // hexadecimal digits, throw a URIError exception.
// iv. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2). // iv. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2).
let b = decode_hex_byte(string.to_vec()[k + 1], string.to_vec()[k + 2]).ok_or_else( let b = decode_hex_byte(string.get_expect(k + 1), string.get_expect(k + 2))
|| JsNativeError::uri().with_message("invalid hexadecimal digit found"), .ok_or_else(|| {
)?; JsNativeError::uri().with_message("invalid hexadecimal digit found")
})?;
// v. Set k to k + 2. // v. Set k to k + 2.
k += 2; k += 2;
@ -456,7 +457,7 @@ where
k += 1; k += 1;
// b. If the code unit at index k within string is not the code unit 0x0025 (PERCENT SIGN), throw a URIError exception. // b. If the code unit at index k within string is not the code unit 0x0025 (PERCENT SIGN), throw a URIError exception.
if string.to_vec()[k] != 0x0025 { if string.get_expect(k) != 0x0025 {
return Err(JsNativeError::uri() return Err(JsNativeError::uri()
.with_message("escape characters must be preceded with a % sign") .with_message("escape characters must be preceded with a % sign")
.into()); .into());
@ -464,7 +465,7 @@ where
// c. If the code units at index (k + 1) and (k + 2) within string do not represent hexadecimal digits, throw a URIError exception. // c. If the code units at index (k + 1) and (k + 2) within string do not represent hexadecimal digits, throw a URIError exception.
// d. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2). // d. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2).
let b = decode_hex_byte(string.to_vec()[k + 1], string.to_vec()[k + 2]) let b = decode_hex_byte(string.get_expect(k + 1), string.get_expect(k + 2))
.ok_or_else(|| { .ok_or_else(|| {
JsNativeError::uri().with_message("invalid hexadecimal digit found") JsNativeError::uri().with_message("invalid hexadecimal digit found")
})?; })?;

36
boa_engine/src/string/mod.rs

@ -46,7 +46,7 @@ use std::{
str::FromStr, str::FromStr,
}; };
use self::common::StaticJsStrings; use self::{common::StaticJsStrings, str::JsSliceIndex};
fn alloc_overflow() -> ! { fn alloc_overflow() -> ! {
panic!("detected overflow during string allocation") panic!("detected overflow during string allocation")
@ -883,17 +883,6 @@ impl JsString {
self.len() == 0 self.len() == 0
} }
// pub fn get<I>(&self, index: I) -> Option<<I::Output as ToOwned>::Owned>
// where
// I: SliceIndex<[u16]>,
// <I as SliceIndex<[u16]>>::Output: ToOwned,
// {
// match self.as_slice() {
// StringSlice::Ascii(v) => v.get(index).map(ToOwned::to_owned),
// StringSlice::U16(v) => v.get(index).map(ToOwned::to_owned),
// }
// }
pub fn to_vec(&self) -> Vec<u16> { pub fn to_vec(&self) -> Vec<u16> {
match self.as_str().variant() { match self.as_str().variant() {
JsStrVariant::Ascii(v) => v.bytes().map(u16::from).collect(), JsStrVariant::Ascii(v) => v.bytes().map(u16::from).collect(),
@ -924,6 +913,20 @@ impl JsString {
pub fn is_static(&self) -> bool { pub fn is_static(&self) -> bool {
self.ptr.is_tagged() self.ptr.is_tagged()
} }
pub fn get<'a, I>(&'a self, index: I) -> Option<I::Value>
where
I: JsSliceIndex<'a>,
{
I::get(self.as_str(), index)
}
pub fn get_expect<'a, I>(&'a self, index: I) -> I::Value
where
I: JsSliceIndex<'a>,
{
self.get(index).expect("Index out of bounds")
}
} }
impl Clone for JsString { impl Clone for JsString {
@ -1089,15 +1092,6 @@ impl Hash for JsString {
} }
} }
// impl<I: SliceIndex<[u16]>> Index<I> for JsString {
// type Output = I::Output;
// #[inline]
// fn index(&self, index: I) -> &Self::Output {
// Index::index(&**self, index)
// }
// }
impl Ord for JsString { impl Ord for JsString {
fn cmp(&self, other: &Self) -> std::cmp::Ordering { fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.to_vec().cmp(&other.to_vec()) self.to_vec().cmp(&other.to_vec())

60
boa_engine/src/string/str.rs

@ -1,3 +1,5 @@
use std::slice::SliceIndex;
use crate::{builtins::string::is_trimmable_whitespace, string::Iter}; use crate::{builtins::string::is_trimmable_whitespace, string::Iter};
use boa_interner::JStrRef; use boa_interner::JStrRef;
@ -150,4 +152,62 @@ impl<'a> JsStr<'a> {
} }
} }
} }
pub fn get<I>(&'a self, index: I) -> Option<I::Value>
where
I: JsSliceIndex<'a>,
{
I::get(*self, index)
}
}
pub trait JsSliceIndex<'a>: SliceIndex<[u8]> + SliceIndex<[u16]> {
type Value;
fn get(_: JsStr<'a>, index: Self) -> Option<Self::Value>;
}
impl<'a> JsSliceIndex<'a> for usize {
type Value = u16;
fn get(value: JsStr<'a>, index: Self) -> Option<Self::Value> {
match value.variant() {
JsStrVariant::Ascii(v) => v.as_bytes().get(index).copied().map(u16::from),
JsStrVariant::U16(v) => v.get(index).copied(),
}
}
}
impl<'a> JsSliceIndex<'a> for std::ops::Range<usize> {
type Value = JsStr<'a>;
fn get(value: JsStr<'a>, index: Self) -> Option<Self::Value> {
match value.variant() {
JsStrVariant::Ascii(v) => {
let slice = v.as_bytes().get(index)?;
// SAFETY: Getting a sub-slice of an ASCII array, retuns an ASCII array, so this is safe.
let str = unsafe { std::str::from_utf8_unchecked(slice) };
// SAFETY: `from_utf8_unchecked` does not alter the string, so this is safe.
Some(unsafe { JsStr::ascii_unchecked(str) })
}
JsStrVariant::U16(v) => {
let slice = v.get(index)?;
// TODO: If we sub-slice an utf16 array, and the sub-slice has only ASCII characters then we need,
// account for that.
//
// SAFETY:
Some(unsafe { JsStr::u16_unchecked(slice) })
}
}
}
}
impl<'a> JsSliceIndex<'a> for std::ops::RangeFull {
type Value = JsStr<'a>;
fn get(value: JsStr<'a>, _index: Self) -> Option<Self::Value> {
Some(value)
}
} }

Loading…
Cancel
Save