From 94d08fe4e68791ceca3c4b7d94ccc5f3588feeb3 Mon Sep 17 00:00:00 2001 From: Hans Larsen Date: Wed, 9 Oct 2024 10:54:23 -0700 Subject: [PATCH] Add a URL class to boa_runtime (#4004) * Add a URL class (with caveats) Some methods are NOT currently supported (some don' make sense outside of a browser context). They are still implemented but will throw a JavaScript Error. Supported methods should follow the specification perfectly. * Adding tests and using url::quirks for simpler getters/setters * clippies * Address comments --- Cargo.lock | 1 + cli/src/main.rs | 11 +- core/runtime/Cargo.toml | 5 + core/runtime/src/console/mod.rs | 4 +- core/runtime/src/lib.rs | 53 +++++++ core/runtime/src/url.rs | 236 ++++++++++++++++++++++++++++++++ core/runtime/src/url/tests.rs | 113 +++++++++++++++ core/string/src/lib.rs | 13 ++ 8 files changed, 426 insertions(+), 10 deletions(-) create mode 100644 core/runtime/src/url.rs create mode 100644 core/runtime/src/url/tests.rs diff --git a/Cargo.lock b/Cargo.lock index 68500d8c19..12f26aaccd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -540,6 +540,7 @@ dependencies = [ "indoc", "rustc-hash 2.0.0", "textwrap", + "url", ] [[package]] diff --git a/cli/src/main.rs b/cli/src/main.rs index d43335df52..899b73f313 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -14,15 +14,12 @@ use boa_engine::{ builtins::promise::PromiseState, context::ContextBuilder, job::{FutureJob, JobQueue, NativeJob}, - js_string, module::{Module, SimpleModuleLoader}, optimizer::OptimizerOptions, - property::Attribute, script::Script, vm::flowgraph::{Direction, Graph}, Context, JsError, JsNativeError, JsResult, Source, }; -use boa_runtime::Console; use clap::{Parser, ValueEnum, ValueHint}; use colored::Colorize; use debug::init_boa_debug_object; @@ -442,12 +439,10 @@ fn main() -> Result<(), io::Error> { Ok(()) } -/// Adds the CLI runtime to the context. +/// Adds the CLI runtime to the context with default options. fn add_runtime(context: &mut Context) { - let console = Console::init(context); - context - .register_global_property(js_string!(Console::NAME), console, Attribute::all()) - .expect("the console object shouldn't exist"); + boa_runtime::register(context, boa_runtime::RegisterOptions::new()) + .expect("should not fail while registering the runtime"); } #[derive(Default)] diff --git a/core/runtime/Cargo.toml b/core/runtime/Cargo.toml index 62a0a4907f..95c743b984 100644 --- a/core/runtime/Cargo.toml +++ b/core/runtime/Cargo.toml @@ -15,6 +15,7 @@ boa_engine.workspace = true boa_gc.workspace = true boa_interop.workspace = true rustc-hash = { workspace = true, features = ["std"] } +url = { version = "2.5.2", optional = true } [dev-dependencies] indoc.workspace = true @@ -25,3 +26,7 @@ workspace = true [package.metadata.docs.rs] all-features = true + +[features] +default = ["all"] +all = ["url"] diff --git a/core/runtime/src/console/mod.rs b/core/runtime/src/console/mod.rs index bc2efa8bc8..c9fde6b79d 100644 --- a/core/runtime/src/console/mod.rs +++ b/core/runtime/src/console/mod.rs @@ -58,8 +58,8 @@ pub trait Logger: Trace + Sized { /// Implements the [`Logger`] trait and output errors to stderr and all /// the others to stdout. Will add indentation based on the number of /// groups. -#[derive(Trace, Finalize)] -struct DefaultLogger; +#[derive(Debug, Trace, Finalize)] +pub struct DefaultLogger; impl Logger for DefaultLogger { #[inline] diff --git a/core/runtime/src/lib.rs b/core/runtime/src/lib.rs index 38dea056c2..f5f501dc13 100644 --- a/core/runtime/src/lib.rs +++ b/core/runtime/src/lib.rs @@ -61,8 +61,60 @@ mod text; #[doc(inline)] pub use text::{TextDecoder, TextEncoder}; +pub mod url; + +/// Options used when registering all built-in objects and functions of the `WebAPI` runtime. +#[derive(Debug)] +pub struct RegisterOptions { + console_logger: L, +} + +impl Default for RegisterOptions { + fn default() -> Self { + Self { + console_logger: console::DefaultLogger, + } + } +} + +impl RegisterOptions { + /// Create a new `RegisterOptions` with the default options. + #[must_use] + pub fn new() -> Self { + Self::default() + } +} + +impl RegisterOptions { + /// Set the logger for the console object. + pub fn with_console_logger(self, logger: L2) -> RegisterOptions { + RegisterOptions:: { + console_logger: logger, + } + } +} + +/// Register all the built-in objects and functions of the `WebAPI` runtime. +/// +/// # Errors +/// This will error is any of the built-in objects or functions cannot be registered. +pub fn register( + ctx: &mut boa_engine::Context, + options: RegisterOptions, +) -> boa_engine::JsResult<()> { + Console::register_with_logger(ctx, options.console_logger)?; + TextDecoder::register(ctx)?; + TextEncoder::register(ctx)?; + + #[cfg(feature = "url")] + url::Url::register(ctx)?; + + Ok(()) +} + #[cfg(test)] pub(crate) mod test { + use crate::{register, RegisterOptions}; use boa_engine::{builtins, Context, JsResult, JsValue, Source}; use std::borrow::Cow; @@ -126,6 +178,7 @@ pub(crate) mod test { #[track_caller] pub(crate) fn run_test_actions(actions: impl IntoIterator) { let context = &mut Context::default(); + register(context, RegisterOptions::default()).expect("failed to register WebAPI objects"); run_test_actions_with(actions, context); } diff --git a/core/runtime/src/url.rs b/core/runtime/src/url.rs new file mode 100644 index 0000000000..f634ef96f8 --- /dev/null +++ b/core/runtime/src/url.rs @@ -0,0 +1,236 @@ +//! Boa's implementation of JavaScript's `URL` Web API class. +//! +//! The `URL` class can be instantiated from any global object. +//! This relies on the `url` feature. +//! +//! More information: +//! - [MDN documentation][mdn] +//! - [WHATWG `URL` specification][spec] +//! +//! [spec]: https://url.spec.whatwg.org/ +//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/URL +#![cfg(feature = "url")] + +#[cfg(test)] +mod tests; + +use boa_engine::value::Convert; +use boa_engine::{ + js_error, js_string, Context, Finalize, JsData, JsResult, JsString, JsValue, Trace, +}; +use boa_interop::{js_class, IntoJsFunctionCopied, JsClass}; +use std::fmt::Display; + +/// The `URL` class represents a (properly parsed) Uniform Resource Locator. +#[derive(Debug, Clone, JsData, Trace, Finalize)] +#[boa_gc(unsafe_no_drop)] +pub struct Url(#[unsafe_ignore_trace] url::Url); + +impl Url { + /// Register the `URL` class into the realm. + /// + /// # Errors + /// This will error if the context or realm cannot register the class. + pub fn register(context: &mut Context) -> JsResult<()> { + context.register_global_class::()?; + Ok(()) + } + + /// Create a new `URL` object. Meant to be called from the JavaScript constructor. + /// + /// # Errors + /// Any errors that might occur during URL parsing. + fn js_new(Convert(ref url): Convert, base: &Option>) -> JsResult { + if let Some(Convert(base)) = base { + let base_url = url::Url::parse(base) + .map_err(|e| js_error!(TypeError: "Failed to parse base URL: {}", e))?; + if base_url.cannot_be_a_base() { + return Err(js_error!(TypeError: "Base URL {} cannot be a base", base)); + } + + let url = base_url + .join(url) + .map_err(|e| js_error!(TypeError: "Failed to parse URL: {}", e))?; + Ok(Self(url)) + } else { + let url = url::Url::parse(url) + .map_err(|e| js_error!(TypeError: "Failed to parse URL: {}", e))?; + Ok(Self(url)) + } + } +} + +impl Display for Url { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for Url { + fn from(url: url::Url) -> Self { + Self(url) + } +} + +impl From for url::Url { + fn from(url: Url) -> url::Url { + url.0 + } +} + +js_class! { + class Url as "URL" { + property hash { + fn get(this: JsClass) -> JsString { + JsString::from(url::quirks::hash(&this.borrow().0)) + } + + fn set(this: JsClass, value: Convert) { + url::quirks::set_hash(&mut this.borrow_mut().0, &value.0); + } + } + + property hostname { + fn get(this: JsClass) -> JsString { + JsString::from(url::quirks::hostname(&this.borrow().0)) + } + + fn set(this: JsClass, value: Convert) { + let _ = url::quirks::set_hostname(&mut this.borrow_mut().0, &value.0); + } + } + + property host { + fn get(this: JsClass) -> JsString { + JsString::from(url::quirks::host(&this.borrow().0)) + } + + fn set(this: JsClass, value: Convert) { + let _ = url::quirks::set_host(&mut this.borrow_mut().0, &value.0); + } + } + + property href { + fn get(this: JsClass) -> JsString { + JsString::from(url::quirks::href(&this.borrow().0)) + } + + fn set(this: JsClass, value: Convert) -> JsResult<()> { + url::quirks::set_href(&mut this.borrow_mut().0, &value.0) + .map_err(|e| js_error!(TypeError: "Failed to set href: {}", e)) + } + } + + property origin { + fn get(this: JsClass) -> JsString { + JsString::from(url::quirks::origin(&this.borrow().0)) + } + } + + property password { + fn get(this: JsClass) -> JsString { + JsString::from(url::quirks::password(&this.borrow().0)) + } + + fn set(this: JsClass, value: Convert) { + let _ = url::quirks::set_password(&mut this.borrow_mut().0, &value.0); + } + } + + property pathname { + fn get(this: JsClass) -> JsString { + JsString::from(url::quirks::pathname(&this.borrow().0)) + } + + fn set(this: JsClass, value: Convert) { + let () = url::quirks::set_pathname(&mut this.borrow_mut().0, &value.0); + } + } + + property port { + fn get(this: JsClass) -> JsString { + JsString::from(url::quirks::port(&this.borrow().0)) + } + + fn set(this: JsClass, value: Convert) { + let _ = url::quirks::set_port(&mut this.borrow_mut().0, &value.0.to_std_string_lossy()); + } + } + + property protocol { + fn get(this: JsClass) -> JsString { + JsString::from(url::quirks::protocol(&this.borrow().0)) + } + + fn set(this: JsClass, value: Convert) { + let _ = url::quirks::set_protocol(&mut this.borrow_mut().0, &value.0); + } + } + + property search { + fn get(this: JsClass) -> JsString { + JsString::from(url::quirks::search(&this.borrow().0)) + } + + fn set(this: JsClass, value: Convert) { + url::quirks::set_search(&mut this.borrow_mut().0, &value.0); + } + } + + property search_params as "searchParams" { + fn get() -> JsResult<()> { + Err(js_error!(Error: "URL.searchParams is not implemented")) + } + } + + property username { + fn get(this: JsClass) -> JsString { + JsString::from(this.borrow().0.username()) + } + + fn set(this: JsClass, value: Convert) { + let _ = this.borrow_mut().0.set_username(&value.0); + } + } + + constructor(url: Convert, base: Option>) { + Self::js_new(url, &base) + } + + init(class: &mut ClassBuilder) -> JsResult<()> { + let create_object_url = (|| -> JsResult<()> { + Err(js_error!(Error: "URL.createObjectURL is not implemented")) + }) + .into_js_function_copied(class.context()); + let can_parse = (|url: Convert, base: Option>| { + Url::js_new(url, &base).is_ok() + }) + .into_js_function_copied(class.context()); + let parse = (|url: Convert, base: Option>, context: &mut Context| { + Url::js_new(url, &base) + .map_or(Ok(JsValue::null()), |u| Url::from_data(u, context).map(JsValue::from)) + }) + .into_js_function_copied(class.context()); + let revoke_object_url = (|| -> JsResult<()> { + Err(js_error!(Error: "URL.revokeObjectURL is not implemented")) + }) + .into_js_function_copied(class.context()); + + class + .static_method(js_string!("createObjectURL"), 1, create_object_url) + .static_method(js_string!("canParse"), 2, can_parse) + .static_method(js_string!("parse"), 2, parse) + .static_method(js_string!("revokeObjectUrl"), 1, revoke_object_url); + + Ok(()) + } + + fn to_string as "toString"(this: JsClass) -> JsString { + JsString::from(format!("{}", this.borrow().0)) + } + + fn to_json as "toJSON"(this: JsClass) -> JsString { + JsString::from(format!("{}", this.borrow().0)) + } + } +} diff --git a/core/runtime/src/url/tests.rs b/core/runtime/src/url/tests.rs new file mode 100644 index 0000000000..685efc6df1 --- /dev/null +++ b/core/runtime/src/url/tests.rs @@ -0,0 +1,113 @@ +use crate::test::{run_test_actions, TestAction}; + +const TEST_HARNESS: &str = r#" +function assert(condition, message) { + if (!condition) { + if (!message) { + message = "Assertion failed"; + } + throw new Error(message); + } +} + +function assert_eq(a, b, message) { + if (a !== b) { + throw new Error(`${message} (${JSON.stringify(a)} !== ${JSON.stringify(b)})`); + } +} +"#; + +#[test] +fn url_basic() { + run_test_actions([ + TestAction::run(TEST_HARNESS), + TestAction::run( + r##" + url = new URL("https://example.com:8080/path/to/resource?query#fragment"); + assert(url instanceof URL); + assert_eq(url.href, "https://example.com:8080/path/to/resource?query#fragment"); + assert_eq(url.protocol, "https:"); + assert_eq(url.host, "example.com:8080"); + assert_eq(url.hostname, "example.com"); + assert_eq(url.port, "8080"); + assert_eq(url.pathname, "/path/to/resource"); + assert_eq(url.search, "?query"); + assert_eq(url.hash, "#fragment"); + "##, + ), + ]); +} + +#[test] +fn url_base() { + run_test_actions([ + TestAction::run(TEST_HARNESS), + TestAction::run( + r##" + url = new URL("https://example.com:8080/path/to/resource?query#fragment", "http://example.org/"); + assert_eq(url.href, "https://example.com:8080/path/to/resource?query#fragment"); + assert_eq(url.protocol, "https:"); + assert_eq(url.host, "example.com:8080"); + assert_eq(url.hostname, "example.com"); + assert_eq(url.port, "8080"); + assert_eq(url.pathname, "/path/to/resource"); + assert_eq(url.search, "?query"); + assert_eq(url.hash, "#fragment"); + "##, + ), + TestAction::run( + r##" + url = new URL("/path/to/resource?query#fragment", "http://example.org/"); + assert_eq(url.href, "http://example.org/path/to/resource?query#fragment"); + assert_eq(url.protocol, "http:"); + assert_eq(url.host, "example.org"); + assert_eq(url.hostname, "example.org"); + assert_eq(url.port, ""); + assert_eq(url.pathname, "/path/to/resource"); + assert_eq(url.search, "?query"); + assert_eq(url.hash, "#fragment"); + "##, + ), + ]); +} + +#[test] +fn url_setters() { + // These were double checked against Firefox. + run_test_actions([ + TestAction::run(TEST_HARNESS), + TestAction::run( + r##" + url = new URL("https://example.com:8080/path/to/resource?query#fragment"); + url.protocol = "http:"; + url.host = "example.org:80"; // Since protocol is http, port is removed. + url.pathname = "/new/path"; + url.search = "?new-query"; + url.hash = "#new-fragment"; + assert_eq(url.href, "http://example.org/new/path?new-query#new-fragment"); + assert_eq(url.protocol, "http:"); + assert_eq(url.host, "example.org"); + assert_eq(url.hostname, "example.org"); + assert_eq(url.port, ""); + assert_eq(url.pathname, "/new/path"); + assert_eq(url.search, "?new-query"); + assert_eq(url.hash, "#new-fragment"); + "##, + ), + ]); +} + +#[test] +fn url_static_methods() { + run_test_actions([ + TestAction::run(TEST_HARNESS), + TestAction::run( + r##" + assert(URL.canParse("http://example.org/new/path?new-query#new-fragment")); + assert(!URL.canParse("http//:example.org/new/path?new-query#new-fragment")); + assert(!URL.canParse("http://example.org/new/path?new-query#new-fragment", "http:")); + assert(URL.canParse("/new/path?new-query#new-fragment", "http://example.org/")); + "##, + ), + ]); +} diff --git a/core/string/src/lib.rs b/core/string/src/lib.rs index ecacaa1d56..26922832ae 100644 --- a/core/string/src/lib.rs +++ b/core/string/src/lib.rs @@ -454,6 +454,19 @@ impl JsString { self.to_string_escaped() } + /// Decodes a [`JsString`] into a [`String`], replacing invalid data with the + /// replacement character U+FFFD. + #[inline] + #[must_use] + pub fn to_std_string_lossy(&self) -> String { + self.code_points() + .map(|cp| match cp { + CodePoint::Unicode(c) => c, + CodePoint::UnpairedSurrogate(_) => '\u{FFFD}', + }) + .collect() + } + /// Decodes a [`JsString`] into a [`String`], returning /// /// # Errors