Browse Source

Implement `is_identifier_(start/part)` using `icu_properties` (#2865)

As mentioned in https://github.com/boa-dev/boa/pull/2848#issuecomment-1518909512, this uses our new default ICU4X data to replace `char::is_start` and `char::is_continue` from the `boa_unicode` crate with the [`icu_properties`](https://crates.io/crates/icu_properties) crate.

Note that this doesn't deprecate `boa_unicode` yet, since that'll require some discussion about how to proceed with a now unused sub-crate.
pull/2871/head
José Julián Espina 1 year ago
parent
commit
c330005912
  1. 6
      Cargo.lock
  2. 14
      boa_icu_provider/Cargo.toml
  3. BIN
      boa_icu_provider/data/icudata.postcard
  4. 66
      boa_icu_provider/data/min/fallback/likelysubtags_v1/und.rs.data
  5. 4
      boa_icu_provider/data/min/fallback/mod.rs
  6. 13
      boa_icu_provider/data/min/fallback/parents_v1/und.rs.data
  7. 6
      boa_icu_provider/data/min/fallback/supplement/co_v1/mod.rs
  8. 22
      boa_icu_provider/data/min/fallback/supplement/co_v1/und.rs.data
  9. 2
      boa_icu_provider/data/min/fallback/supplement/mod.rs
  10. 67
      boa_icu_provider/data/min/mod.rs
  11. 3
      boa_icu_provider/data/min/props/idc_v1/mod.rs
  12. 9
      boa_icu_provider/data/min/props/idc_v1/und.rs.data
  13. 3
      boa_icu_provider/data/min/props/ids_v1/mod.rs
  14. 9
      boa_icu_provider/data/min/props/ids_v1/und.rs.data
  15. 3
      boa_icu_provider/data/min/props/mod.rs
  16. 30
      boa_icu_provider/src/bin/datagen.rs
  17. 4
      boa_parser/Cargo.toml
  18. 33
      boa_parser/src/lexer/identifier.rs

6
Cargo.lock generated

@ -473,9 +473,11 @@ dependencies = [
name = "boa_icu_provider" name = "boa_icu_provider"
version = "0.16.0" version = "0.16.0"
dependencies = [ dependencies = [
"icu_casemapping",
"icu_collections", "icu_collections",
"icu_datagen", "icu_datagen",
"icu_normalizer", "icu_normalizer",
"icu_properties",
"icu_provider", "icu_provider",
"icu_provider_adapters", "icu_provider_adapters",
"icu_provider_blob", "icu_provider_blob",
@ -525,13 +527,15 @@ version = "0.16.0"
dependencies = [ dependencies = [
"bitflags 2.2.1", "bitflags 2.2.1",
"boa_ast", "boa_ast",
"boa_icu_provider",
"boa_interner", "boa_interner",
"boa_macros", "boa_macros",
"boa_profiler", "boa_profiler",
"boa_unicode",
"fast-float", "fast-float",
"icu_properties",
"num-bigint", "num-bigint",
"num-traits", "num-traits",
"once_cell",
"regress", "regress",
"rustc-hash", "rustc-hash",
] ]

14
boa_icu_provider/Cargo.toml

@ -14,22 +14,26 @@ rust-version.workspace = true
[dependencies] [dependencies]
icu_provider = { version = "1.2.0", features = ["serde", "sync"] } icu_provider = { version = "1.2.0", features = ["serde", "sync"] }
icu_provider_blob = "1.2.0"
icu_provider_adapters = { version = "1.2.0", features = ["serde"] }
icu_collections = "1.2.0" icu_collections = "1.2.0"
icu_normalizer = "1.2.0" icu_normalizer = "1.2.0"
icu_properties = "1.2.0"
zerovec = "0.9.4" zerovec = "0.9.4"
icu_datagen = { version = "1.2.3", optional = true } icu_datagen = { version = "1.2.3", optional = true }
once_cell = {version = "1.17.1", default-features = false, features = ["critical-section"], optional = true } icu_provider_blob = { version = "1.2.0", optional = true }
icu_provider_adapters = { version = "1.2.0", features = ["serde"], optional = true }
once_cell = { version = "1.17.1", default-features = false, features = [
"critical-section",
], optional = true }
log = { version = "0.4.17", optional = true } log = { version = "0.4.17", optional = true }
simple_logger = { version = "4.1.0", optional = true } simple_logger = { version = "4.1.0", optional = true }
icu_casemapping = { version = "0.7.2", optional = true }
[features] [features]
default = ["std"] default = ["std"]
full = ["dep:once_cell"] full = ["dep:once_cell", "dep:icu_provider_blob", "dep:icu_provider_adapters"]
std = ["once_cell?/std"] std = ["once_cell?/std"]
bin = ["dep:icu_datagen", "dep:simple_logger", "dep:log"] bin = ["dep:icu_datagen", "dep:simple_logger", "dep:log", "dep:icu_casemapping"]
[[bin]] [[bin]]
name = "boa-datagen" name = "boa-datagen"

BIN
boa_icu_provider/data/icudata.postcard

Binary file not shown.

66
boa_icu_provider/data/min/fallback/likelysubtags_v1/und.rs.data

@ -1,66 +0,0 @@
::icu_provider_adapters::fallback::provider::LocaleFallbackLikelySubtagsV1 {
l2s: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap::from_parts_unchecked(
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"am\0ar\0as\0be\0bg\0bgcbhobn\0brxchrcv\0doiel\0fa\0gu\0he\0hi\0hy\0ja\0ka\0kk\0km\0kn\0ko\0kokks\0ky\0lo\0maimk\0ml\0mn\0mnimr\0my\0ne\0or\0pa\0ps\0rajru\0sa\0satsd\0si\0sr\0ta\0te\0tg\0th\0ti\0tt\0uk\0ur\0yuezh\0")
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"EthiArabBengCyrlCyrlDevaDevaBengDevaCherCyrlDevaGrekArabGujrHebrDevaArmnJpanGeorCyrlKhmrKndaKoreDevaArabCyrlLaooDevaCyrlMlymCyrlBengDevaMymrDevaOryaGuruArabDevaCyrlDevaOlckArabSinhCyrlTamlTeluCyrlThaiEthiCyrlCyrlArabHantHans")
},
)
},
lr2s: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap2d::from_parts_unchecked(
unsafe {
::zerovec::ZeroVec::from_bytes_unchecked(
b"az\0ha\0kk\0ky\0mn\0ms\0pa\0sd\0sr\0tg\0uz\0yuezh\0",
)
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"\x03\0\0\0\x05\0\0\0\t\0\0\0\x0B\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x13\0\0\0\x14\0\0\0\x16\0\0\0\x17\0\0\0&\0\0\0")
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"IQ\0IR\0RU\0CM\0SD\0AF\0CN\0IR\0MN\0CN\0TR\0CN\0CC\0PK\0IN\0ME\0RO\0RU\0TR\0PK\0AF\0CN\0CN\0AU\0BN\0GB\0GF\0HK\0ID\0MO\0PA\0PF\0PH\0SR\0TH\0TW\0US\0VN\0")
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"ArabArabCyrlArabArabArabArabArabArabArabLatnMongArabArabDevaLatnLatnLatnLatnArabArabCyrlHansHantHantHantHantHantHantHantHantHantHantHantHantHantHantHant")
},
)
},
l2r: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap::from_parts_unchecked(
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"af\0am\0ar\0as\0astaz\0be\0bg\0bgcbhobn\0br\0brxbs\0ca\0cebchrcs\0cv\0cy\0da\0de\0doidsbel\0en\0es\0et\0eu\0fa\0ff\0fi\0filfo\0fr\0ga\0gd\0gl\0gu\0ha\0he\0hi\0hr\0hsbhu\0hy\0ia\0id\0ig\0is\0it\0ja\0jv\0ka\0keakgpkk\0km\0kn\0ko\0kokks\0ky\0lo\0lt\0lv\0maimi\0mk\0ml\0mn\0mnimr\0ms\0my\0ne\0nl\0nn\0no\0or\0pa\0pcmpl\0ps\0pt\0qu\0rajrm\0ro\0ru\0sa\0satsc\0sd\0si\0sk\0sl\0so\0sq\0sr\0su\0sv\0sw\0ta\0te\0tg\0th\0ti\0tk\0to\0tr\0tt\0uk\0ur\0uz\0vi\0wo\0xh\0yo\0yrlyuezh\0zu\0")
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"ZA\0ET\0EG\0IN\0ES\0AZ\0BY\0BG\0IN\0IN\0BD\0FR\0IN\0BA\0ES\0PH\0US\0CZ\0RU\0GB\0DK\0DE\0IN\0DE\0GR\0US\0ES\0EE\0ES\0IR\0SN\0FI\0PH\0FO\0FR\0IE\0GB\0ES\0IN\0NG\0IL\0IN\0HR\0DE\0HU\0AM\x00001ID\0NG\0IS\0IT\0JP\0ID\0GE\0CV\0BR\0KZ\0KH\0IN\0KR\0IN\0IN\0KG\0LA\0LT\0LV\0IN\0NZ\0MK\0IN\0MN\0IN\0IN\0MY\0MM\0NP\0NL\0NO\0NO\0IN\0IN\0NG\0PL\0AF\0BR\0PE\0IN\0CH\0RO\0RU\0IN\0IN\0IT\0PK\0LK\0SK\0SI\0SO\0AL\0RS\0ID\0SE\0TZ\0IN\0IN\0TJ\0TH\0ET\0TM\0TO\0TR\0RU\0UA\0PK\0UZ\0VN\0SN\0ZA\0NG\0BR\0HK\0CN\0ZA\0")
},
)
},
ls2r: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap2d::from_parts_unchecked(
unsafe {
::zerovec::ZeroVec::from_bytes_unchecked(
b"az\0en\0ff\0kk\0ky\0mn\0pa\0sd\0tg\0uz\0yuezh\0",
)
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"\x01\0\0\0\x02\0\0\0\x03\0\0\0\x04\0\0\0\x06\0\0\0\x07\0\0\0\x08\0\0\0\x0B\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x11\0\0\0")
},
unsafe {
::zerovec::ZeroVec::from_bytes_unchecked(
b"ArabShawAdlmArabArabLatnMongArabDevaKhojSindArabArabHansBopoHanbHant",
)
},
unsafe {
::zerovec::ZeroVec::from_bytes_unchecked(
b"IR\0GB\0GN\0CN\0CN\0TR\0CN\0PK\0IN\0IN\0IN\0PK\0AF\0CN\0TW\0TW\0TW\0",
)
},
)
},
}

4
boa_icu_provider/data/min/fallback/mod.rs

@ -1,4 +0,0 @@
// @generated
pub mod likelysubtags_v1;
pub mod parents_v1;
pub mod supplement;

13
boa_icu_provider/data/min/fallback/parents_v1/und.rs.data

@ -1,13 +0,0 @@
::icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1 {
parents: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap::from_parts_unchecked(
unsafe {
:: zerovec :: VarZeroVec :: from_bytes_unchecked (b"\x84\0\0\0\0\0\x06\0\x0B\0\x10\0\x15\0\x1A\0\x1F\0$\0)\0.\x003\08\0=\0B\0G\0L\0Q\0V\0[\0`\0e\0j\0o\0t\0y\0~\0\x83\0\x88\0\x8D\0\x92\0\x97\0\x9C\0\xA1\0\xA6\0\xAB\0\xB0\0\xB5\0\xBA\0\xBF\0\xC4\0\xC9\0\xCE\0\xD3\0\xD8\0\xDD\0\xE2\0\xE7\0\xEC\0\xF1\0\xF6\0\xFB\0\0\x01\x05\x01\n\x01\x0F\x01\x14\x01\x19\x01\x1E\x01#\x01(\x01-\x012\x017\x01<\x01A\x01F\x01K\x01P\x01U\x01Z\x01_\x01d\x01i\x01n\x01s\x01x\x01}\x01\x82\x01\x87\x01\x8C\x01\x91\x01\x96\x01\x9B\x01\xA0\x01\xA5\x01\xAA\x01\xAF\x01\xB4\x01\xB9\x01\xBE\x01\xC3\x01\xC8\x01\xCD\x01\xD2\x01\xD7\x01\xDC\x01\xE1\x01\xE6\x01\xEB\x01\xF0\x01\xF5\x01\xFA\x01\xFF\x01\x04\x02\t\x02\x0E\x02\x13\x02\x18\x02\x1D\x02\"\x02'\x02,\x021\x026\x02;\x02@\x02G\x02I\x02K\x02M\x02R\x02W\x02\\\x02a\x02f\x02k\x02p\x02u\x02z\x02\x7F\x02\x84\x02\x89\x02en-150en-AGen-AIen-ATen-AUen-BBen-BEen-BMen-BSen-BWen-BZen-CCen-CHen-CKen-CMen-CXen-CYen-DEen-DGen-DKen-DMen-ERen-FIen-FJen-FKen-FMen-GBen-GDen-GGen-GHen-GIen-GMen-GYen-HKen-IEen-ILen-IMen-INen-IOen-JEen-JMen-KEen-KIen-KNen-KYen-LCen-LRen-LSen-MGen-MOen-MSen-MTen-MUen-MVen-MWen-MYen-NAen-NFen-NGen-NLen-NRen-NUen-NZen-PGen-PKen-PNen-PWen-RWen-SBen-SCen-SDen-SEen-SGen-SHen-SIen-SLen-SSen-SXen-SZen-TCen-TKen-TOen-TTen-TVen-TZen-UGen-VCen-VGen-VUen-WSen-ZAen-ZMen-ZWes-ARes-BOes-BRes-BZes-CLes-COes-CRes-CUes-DOes-ECes-GTes-HNes-MXes-NIes-PAes-PEes-PRes-PYes-SVes-USes-UYes-VEhi-Latnhtnbnnno-NOpt-AOpt-CHpt-CVpt-FRpt-GQpt-GWpt-LUpt-MOpt-MZpt-STpt-TLzh-Hant-MO")
},
unsafe {
:: zerovec :: ZeroVec :: from_bytes_unchecked (b"en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419en\0\0\0\0\0\0\x01IN\0fr\0\0\0\0\0\0\x01HT\0no\0\0\0\0\0\0\0\0\0\0no\0\0\0\0\0\0\0\0\0\0no\0\0\0\0\0\0\0\0\0\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0zh\0\x01Hant\x01HK\0")
},
)
},
}

6
boa_icu_provider/data/min/fallback/supplement/co_v1/mod.rs

@ -1,6 +0,0 @@
// @generated
type DataStruct = < :: icu_provider_adapters :: fallback :: provider :: CollationFallbackSupplementV1Marker as :: icu_provider :: DataMarker > :: Yokeable ;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND)
}
static UND: DataStruct = include!("und.rs.data");

22
boa_icu_provider/data/min/fallback/supplement/co_v1/und.rs.data

@ -1,22 +0,0 @@
::icu_provider_adapters::fallback::provider::LocaleFallbackSupplementV1 {
parents: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap::from_parts_unchecked(
unsafe { ::zerovec::VarZeroVec::from_bytes_unchecked(b"\x01\0\0\0\0\0yue") },
unsafe { ::zerovec::ZeroVec::from_bytes_unchecked(b"zh\0\x01Hant\0\0\0\0") },
)
},
unicode_extension_defaults: unsafe {
#[allow(unused_unsafe)]
::zerovec::ZeroMap2d::from_parts_unchecked(
unsafe { ::zerovec::ZeroVec::from_bytes_unchecked(b"co") },
unsafe { ::zerovec::ZeroVec::from_bytes_unchecked(b"\x02\0\0\0") },
unsafe {
::zerovec::VarZeroVec::from_bytes_unchecked(b"\x02\0\0\0\0\0\x02\0zhzh-Hant")
},
unsafe {
::zerovec::VarZeroVec::from_bytes_unchecked(b"\x02\0\0\0\0\0\x06\0pinyinstroke")
},
)
},
}

2
boa_icu_provider/data/min/fallback/supplement/mod.rs

@ -1,2 +0,0 @@
// @generated
pub mod co_v1;

67
boa_icu_provider/data/min/mod.rs

@ -1,9 +1,9 @@
// @generated // @generated
#[clippy::msrv = "1.61"] #[clippy::msrv = "1.61"]
mod fallback;
#[clippy::msrv = "1.61"]
mod normalizer; mod normalizer;
#[clippy::msrv = "1.61"] #[clippy::msrv = "1.61"]
mod props;
#[clippy::msrv = "1.61"]
use icu_provider::prelude::*; use icu_provider::prelude::*;
/// Implement [`DataProvider<M>`] on the given struct using the data /// Implement [`DataProvider<M>`] on the given struct using the data
/// hardcoded in this module. This allows the struct to be used with /// hardcoded in this module. This allows the struct to be used with
@ -96,63 +96,29 @@ macro_rules! impl_data_provider {
} }
} }
#[clippy::msrv = "1.61"] #[clippy::msrv = "1.61"]
impl DataProvider<::icu_provider_adapters::fallback::provider::CollationFallbackSupplementV1Marker> for $provider { impl DataProvider<::icu_properties::provider::IdContinueV1Marker> for $provider {
fn load( fn load(&self, req: DataRequest) -> Result<DataResponse<::icu_properties::provider::IdContinueV1Marker>, DataError> {
&self, props::idc_v1::lookup(&req.locale)
req: DataRequest,
) -> Result<DataResponse<::icu_provider_adapters::fallback::provider::CollationFallbackSupplementV1Marker>, DataError> {
fallback::supplement::co_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned)
.map(|payload| DataResponse {
metadata: Default::default(),
payload: Some(payload),
})
.ok_or_else(|| {
DataErrorKind::MissingLocale.with_req(
::icu_provider_adapters::fallback::provider::CollationFallbackSupplementV1Marker::KEY,
req,
)
})
}
}
#[clippy::msrv = "1.61"]
impl DataProvider<::icu_provider_adapters::fallback::provider::LocaleFallbackLikelySubtagsV1Marker> for $provider {
fn load(
&self,
req: DataRequest,
) -> Result<DataResponse<::icu_provider_adapters::fallback::provider::LocaleFallbackLikelySubtagsV1Marker>, DataError> {
fallback::likelysubtags_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from) .map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned) .map(DataPayload::from_owned)
.map(|payload| DataResponse { .map(|payload| DataResponse {
metadata: Default::default(), metadata: Default::default(),
payload: Some(payload), payload: Some(payload),
}) })
.ok_or_else(|| { .ok_or_else(|| DataErrorKind::MissingLocale.with_req(::icu_properties::provider::IdContinueV1Marker::KEY, req))
DataErrorKind::MissingLocale.with_req(
::icu_provider_adapters::fallback::provider::LocaleFallbackLikelySubtagsV1Marker::KEY,
req,
)
})
} }
} }
#[clippy::msrv = "1.61"] #[clippy::msrv = "1.61"]
impl DataProvider<::icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1Marker> for $provider { impl DataProvider<::icu_properties::provider::IdStartV1Marker> for $provider {
fn load( fn load(&self, req: DataRequest) -> Result<DataResponse<::icu_properties::provider::IdStartV1Marker>, DataError> {
&self, props::ids_v1::lookup(&req.locale)
req: DataRequest,
) -> Result<DataResponse<::icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1Marker>, DataError> {
fallback::parents_v1::lookup(&req.locale)
.map(zerofrom::ZeroFrom::zero_from) .map(zerofrom::ZeroFrom::zero_from)
.map(DataPayload::from_owned) .map(DataPayload::from_owned)
.map(|payload| DataResponse { .map(|payload| DataResponse {
metadata: Default::default(), metadata: Default::default(),
payload: Some(payload), payload: Some(payload),
}) })
.ok_or_else(|| { .ok_or_else(|| DataErrorKind::MissingLocale.with_req(::icu_properties::provider::IdStartV1Marker::KEY, req))
DataErrorKind::MissingLocale.with_req(::icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1Marker::KEY, req)
})
} }
} }
}; };
@ -185,21 +151,16 @@ macro_rules! impl_any_provider {
::icu_normalizer::provider::CompatibilityDecompositionSupplementV1Marker::KEY.hashed(); ::icu_normalizer::provider::CompatibilityDecompositionSupplementV1Marker::KEY.hashed();
const COMPATIBILITYDECOMPOSITIONTABLESV1MARKER: ::icu_provider::DataKeyHash = const COMPATIBILITYDECOMPOSITIONTABLESV1MARKER: ::icu_provider::DataKeyHash =
::icu_normalizer::provider::CompatibilityDecompositionTablesV1Marker::KEY.hashed(); ::icu_normalizer::provider::CompatibilityDecompositionTablesV1Marker::KEY.hashed();
const COLLATIONFALLBACKSUPPLEMENTV1MARKER: ::icu_provider::DataKeyHash = const IDCONTINUEV1MARKER: ::icu_provider::DataKeyHash = ::icu_properties::provider::IdContinueV1Marker::KEY.hashed();
::icu_provider_adapters::fallback::provider::CollationFallbackSupplementV1Marker::KEY.hashed(); const IDSTARTV1MARKER: ::icu_provider::DataKeyHash = ::icu_properties::provider::IdStartV1Marker::KEY.hashed();
const LOCALEFALLBACKLIKELYSUBTAGSV1MARKER: ::icu_provider::DataKeyHash =
::icu_provider_adapters::fallback::provider::LocaleFallbackLikelySubtagsV1Marker::KEY.hashed();
const LOCALEFALLBACKPARENTSV1MARKER: ::icu_provider::DataKeyHash =
::icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1Marker::KEY.hashed();
match key.hashed() { match key.hashed() {
CANONICALCOMPOSITIONSV1MARKER => normalizer::comp_v1::lookup(&req.locale).map(AnyPayload::from_static_ref), CANONICALCOMPOSITIONSV1MARKER => normalizer::comp_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
CANONICALDECOMPOSITIONDATAV1MARKER => normalizer::nfd_v1::lookup(&req.locale).map(AnyPayload::from_static_ref), CANONICALDECOMPOSITIONDATAV1MARKER => normalizer::nfd_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
CANONICALDECOMPOSITIONTABLESV1MARKER => normalizer::nfdex_v1::lookup(&req.locale).map(AnyPayload::from_static_ref), CANONICALDECOMPOSITIONTABLESV1MARKER => normalizer::nfdex_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
COMPATIBILITYDECOMPOSITIONSUPPLEMENTV1MARKER => normalizer::nfkd_v1::lookup(&req.locale).map(AnyPayload::from_static_ref), COMPATIBILITYDECOMPOSITIONSUPPLEMENTV1MARKER => normalizer::nfkd_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
COMPATIBILITYDECOMPOSITIONTABLESV1MARKER => normalizer::nfkdex_v1::lookup(&req.locale).map(AnyPayload::from_static_ref), COMPATIBILITYDECOMPOSITIONTABLESV1MARKER => normalizer::nfkdex_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
COLLATIONFALLBACKSUPPLEMENTV1MARKER => fallback::supplement::co_v1::lookup(&req.locale).map(AnyPayload::from_static_ref), IDCONTINUEV1MARKER => props::idc_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
LOCALEFALLBACKLIKELYSUBTAGSV1MARKER => fallback::likelysubtags_v1::lookup(&req.locale).map(AnyPayload::from_static_ref), IDSTARTV1MARKER => props::ids_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
LOCALEFALLBACKPARENTSV1MARKER => fallback::parents_v1::lookup(&req.locale).map(AnyPayload::from_static_ref),
_ => return Err(DataErrorKind::MissingDataKey.with_req(key, req)), _ => return Err(DataErrorKind::MissingDataKey.with_req(key, req)),
} }
.map(|payload| AnyResponse { .map(|payload| AnyResponse {

3
boa_icu_provider/data/min/fallback/parents_v1/mod.rs → boa_icu_provider/data/min/props/idc_v1/mod.rs

@ -1,5 +1,6 @@
// @generated // @generated
type DataStruct = < :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackParentsV1Marker as :: icu_provider :: DataMarker > :: Yokeable ; type DataStruct =
<::icu_properties::provider::IdContinueV1Marker as ::icu_provider::DataMarker>::Yokeable;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> { pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND) locale.is_empty().then(|| &UND)
} }

9
boa_icu_provider/data/min/props/idc_v1/und.rs.data

File diff suppressed because one or more lines are too long

3
boa_icu_provider/data/min/fallback/likelysubtags_v1/mod.rs → boa_icu_provider/data/min/props/ids_v1/mod.rs

@ -1,5 +1,6 @@
// @generated // @generated
type DataStruct = < :: icu_provider_adapters :: fallback :: provider :: LocaleFallbackLikelySubtagsV1Marker as :: icu_provider :: DataMarker > :: Yokeable ; type DataStruct =
<::icu_properties::provider::IdStartV1Marker as ::icu_provider::DataMarker>::Yokeable;
pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> { pub fn lookup(locale: &icu_provider::DataLocale) -> Option<&'static DataStruct> {
locale.is_empty().then(|| &UND) locale.is_empty().then(|| &UND)
} }

9
boa_icu_provider/data/min/props/ids_v1/und.rs.data

File diff suppressed because one or more lines are too long

3
boa_icu_provider/data/min/props/mod.rs

@ -0,0 +1,3 @@
// @generated
pub mod idc_v1;
pub mod ids_v1;

30
boa_icu_provider/src/bin/datagen.rs

@ -6,19 +6,15 @@
use std::{error::Error, fs::File}; use std::{error::Error, fs::File};
use boa_icu_provider::data_root; use boa_icu_provider::data_root;
use icu_datagen::{ use icu_casemapping::provider::CaseMappingV1Marker;
all_keys_with_experimental, datagen, BakedOptions, CldrLocaleSubset, Out, SourceData, use icu_datagen::{all_keys, datagen, BakedOptions, CoverageLevel, Out, SourceData};
};
use icu_normalizer::provider::{ use icu_normalizer::provider::{
CanonicalCompositionsV1Marker, CanonicalDecompositionDataV1Marker, CanonicalCompositionsV1Marker, CanonicalDecompositionDataV1Marker,
CanonicalDecompositionTablesV1Marker, CompatibilityDecompositionSupplementV1Marker, CanonicalDecompositionTablesV1Marker, CompatibilityDecompositionSupplementV1Marker,
CompatibilityDecompositionTablesV1Marker, CompatibilityDecompositionTablesV1Marker,
}; };
use icu_properties::provider::{IdContinueV1Marker, IdStartV1Marker};
use icu_provider::KeyedDataMarker; use icu_provider::KeyedDataMarker;
use icu_provider_adapters::fallback::provider::{
CollationFallbackSupplementV1Marker, LocaleFallbackLikelySubtagsV1Marker,
LocaleFallbackParentsV1Marker,
};
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
simple_logger::SimpleLogger::new() simple_logger::SimpleLogger::new()
@ -27,7 +23,7 @@ fn main() -> Result<(), Box<dyn Error>> {
.init()?; .init()?;
let source_data = SourceData::default() let source_data = SourceData::default()
.with_cldr_for_tag(SourceData::LATEST_TESTED_CLDR_TAG, CldrLocaleSubset::Modern)? .with_cldr_for_tag(SourceData::LATEST_TESTED_CLDR_TAG, Default::default())?
.with_icuexport_for_tag(SourceData::LATEST_TESTED_ICUEXPORT_TAG)? .with_icuexport_for_tag(SourceData::LATEST_TESTED_ICUEXPORT_TAG)?
.with_collations(vec![String::from("search*")]); .with_collations(vec![String::from("search*")]);
@ -46,28 +42,26 @@ fn main() -> Result<(), Box<dyn Error>> {
}, },
}; };
let locales = source_data.locales(&[CoverageLevel::Modern])?;
datagen( datagen(
None, Some(&locales),
&[ &[
CanonicalDecompositionDataV1Marker::KEY, CanonicalDecompositionDataV1Marker::KEY,
CanonicalDecompositionTablesV1Marker::KEY, CanonicalDecompositionTablesV1Marker::KEY,
CanonicalCompositionsV1Marker::KEY, CanonicalCompositionsV1Marker::KEY,
CompatibilityDecompositionSupplementV1Marker::KEY, CompatibilityDecompositionSupplementV1Marker::KEY,
CompatibilityDecompositionTablesV1Marker::KEY, CompatibilityDecompositionTablesV1Marker::KEY,
LocaleFallbackLikelySubtagsV1Marker::KEY, IdContinueV1Marker::KEY,
LocaleFallbackParentsV1Marker::KEY, IdStartV1Marker::KEY,
CollationFallbackSupplementV1Marker::KEY,
], ],
&source_data, &source_data,
[normalization_out].into(), [normalization_out].into(),
)?; )?;
datagen( let keys = &mut all_keys();
None, keys.push(CaseMappingV1Marker::KEY);
&all_keys_with_experimental(), datagen(Some(&locales), keys, &source_data, [full_blob_out].into())?;
&source_data,
[full_blob_out].into(),
)?;
Ok(()) Ok(())
} }

4
boa_parser/Cargo.toml

@ -15,13 +15,15 @@ boa_interner.workspace = true
boa_macros.workspace = true boa_macros.workspace = true
boa_ast.workspace = true boa_ast.workspace = true
boa_profiler.workspace = true boa_profiler.workspace = true
boa_unicode.workspace = true boa_icu_provider.workspace = true
rustc-hash = "1.1.0" rustc-hash = "1.1.0"
fast-float = "0.2.0" fast-float = "0.2.0"
num-traits = "0.2.15" num-traits = "0.2.15"
bitflags = "2.2.1" bitflags = "2.2.1"
num-bigint = "0.4.3" num-bigint = "0.4.3"
regress = "0.5.0" regress = "0.5.0"
icu_properties = "1.2.0"
once_cell = "1.17.1"
[features] [features]
annex-b = [] annex-b = []

33
boa_parser/src/lexer/identifier.rs

@ -6,9 +6,35 @@ use crate::lexer::{
use boa_ast::{Keyword, Position, Span}; use boa_ast::{Keyword, Position, Span};
use boa_interner::Interner; use boa_interner::Interner;
use boa_profiler::Profiler; use boa_profiler::Profiler;
use boa_unicode::UnicodeProperties; use icu_properties::sets::{CodePointSetData, CodePointSetDataBorrowed};
use once_cell::sync::Lazy;
use std::io::Read; use std::io::Read;
/// List of codepoint sets that correspond to a specific [Unicode character property].
///
/// [Unicode character property]: https://unicode.org/reports/tr23/
struct PropertySets {
id_start: CodePointSetDataBorrowed<'static>,
id_continue: CodePointSetDataBorrowed<'static>,
}
/// Static `PropertySets` derived from Boa's default ICU4X data.
static PROPERTY_SETS: Lazy<PropertySets> = Lazy::new(|| {
static ID_START: Lazy<CodePointSetData> = Lazy::new(|| {
icu_properties::sets::load_id_start(&boa_icu_provider::minimal())
.expect("data should be valid")
});
static ID_CONTINUE: Lazy<CodePointSetData> = Lazy::new(|| {
icu_properties::sets::load_id_continue(&boa_icu_provider::minimal())
.expect("data should be valid")
});
PropertySets {
id_start: ID_START.as_borrowed(),
id_continue: ID_CONTINUE.as_borrowed(),
}
});
/// Identifier lexing. /// Identifier lexing.
/// ///
/// More information: /// More information:
@ -35,8 +61,7 @@ impl Identifier {
/// ///
/// [spec]: https://tc39.es/ecma262/#sec-names-and-keywords /// [spec]: https://tc39.es/ecma262/#sec-names-and-keywords
pub(super) fn is_identifier_start(ch: u32) -> bool { pub(super) fn is_identifier_start(ch: u32) -> bool {
matches!(ch, 0x0024 /* $ */ | 0x005F /* _ */) matches!(ch, 0x0024 /* $ */ | 0x005F /* _ */) || PROPERTY_SETS.id_start.contains32(ch)
|| char::try_from(ch).map_or(false, char::is_id_start)
} }
/// Checks if a character is `IdentifierPart` as per ECMAScript standards. /// Checks if a character is `IdentifierPart` as per ECMAScript standards.
@ -49,7 +74,7 @@ impl Identifier {
matches!( matches!(
ch, ch,
0x0024 /* $ */ | 0x005F /* _ */ | 0x200C /* <ZWNJ> */ | 0x200D /* <ZWJ> */ 0x0024 /* $ */ | 0x005F /* _ */ | 0x200C /* <ZWNJ> */ | 0x200D /* <ZWJ> */
) || char::try_from(ch).map_or(false, char::is_id_continue) ) || PROPERTY_SETS.id_continue.contains32(ch)
} }
} }

Loading…
Cancel
Save