From 5a85c595d4dff8fffd3d7881e4e9bca188691074 Mon Sep 17 00:00:00 2001 From: HalidOdat Date: Wed, 25 Mar 2020 01:12:16 +0100 Subject: [PATCH] Added the ability to dump the token stream or ast in bin. (#278) * Added the ability to dump the token stream or ast in bin. The dump functionality works both for files and REPL. With --dump-tokens or -t for short it dumps the token stream to stdout and --dump-ast or -a for short to dump the ast to stdout. The dumping of tokens and ast is mutually exclusive. and when dumping it wont run the code. * Fixed some issues with rustfmt. * Added serde serialization and deserialization to token and the ast. * Added a dynamic multi-format dumping of token stream and ast in bin. - Changed the --dump-tokens and --dump-ast to be an optional argument that optionally takes a value of format type ([--opt=[val]]). - The default format for --dump-tokens and --dump-ast is Debug format which calls std::fmt::Debug. - Added Json and JsonMinified format for both dumps, use serde_json internally. - It is easy to support other format types, such as Toml with toml-rs for example. * Made serde an optional dependency. - Serde serialization and deserialization can be switched on by using the feature flag "serde-ast". * Changed the JSON dumping format. - Now Json dumping format prints the data in minefied JSON form by default. - Removed JsonMinified. - Added JsonPretty as a way to dump the data in pretty printed JSON format. * Updated the docs. --- Cargo.lock | 4 + README.md | 8 +- boa/Cargo.toml | 2 + boa/src/lib.rs | 3 + boa/src/syntax/ast/constant.rs | 4 + boa/src/syntax/ast/expr.rs | 5 ++ boa/src/syntax/ast/keyword.rs | 4 + boa/src/syntax/ast/op.rs | 10 +++ boa/src/syntax/ast/pos.rs | 4 + boa/src/syntax/ast/punc.rs | 4 + boa/src/syntax/ast/token.rs | 9 +- boa_cli/Cargo.toml | 2 +- boa_cli/src/main.rs | 153 +++++++++++++++++++++++++++++++-- docs/debugging.md | 39 +++++++-- 14 files changed, 231 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b7806729d9..cf9ff81a82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,6 +9,7 @@ dependencies = [ "gc_derive", "rand", "regex", + "serde", "serde_json", "wasm-bindgen", ] @@ -610,6 +611,9 @@ name = "serde" version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "414115f25f818d7dfccec8ee535d76949ae78584fc4f79a6f45a904bf8ab4449" +dependencies = [ + "serde_derive", +] [[package]] name = "serde_derive" diff --git a/README.md b/README.md index 036569874f..d4619aab7c 100644 --- a/README.md +++ b/README.md @@ -86,12 +86,18 @@ see [CHANGELOG](./CHANGELOG.md) ``` USAGE: - boa_cli [FILE]... + boa_cli [OPTIONS] [FILE]... FLAGS: -h, --help Prints help information -V, --version Prints version information +OPTIONS: + -a, --dump-ast Dump the ast to stdout with the given format [possible values: Debug, Json, + JsonPretty] + -t, --dump-tokens Dump the token stream to stdout with the given format [possible values: Debug, Json, + JsonPretty] + ARGS: ... The JavaScript file(s) to be evaluated ``` diff --git a/boa/Cargo.toml b/boa/Cargo.toml index e7212cd62d..c30d4caa45 100644 --- a/boa/Cargo.toml +++ b/boa/Cargo.toml @@ -11,6 +11,7 @@ exclude = ["../.vscode/*", "../Dockerfile", "../Makefile", "../.editorConfig"] edition = "2018" [features] +serde-ast = ["serde"] default = ["wasm-bindgen"] [dependencies] @@ -22,6 +23,7 @@ regex = "1.3.4" # Optional Dependencies wasm-bindgen = { version = "0.2.58", optional = true } +serde = { version = "1.0", features = ["derive"], optional = true } [dev-dependencies] criterion = "0.3.1" diff --git a/boa/src/lib.rs b/boa/src/lib.rs index 7cd1e20cea..b5fae18ff6 100644 --- a/boa/src/lib.rs +++ b/boa/src/lib.rs @@ -19,6 +19,9 @@ use crate::{ syntax::{ast::expr::Expr, lexer::Lexer, parser::Parser}, }; +#[cfg(feature = "serde-ast")] +pub use serde_json; + fn parser_expr(src: &str) -> Result { let mut lexer = Lexer::new(src); lexer.lex().map_err(|e| format!("SyntaxError: {}", e))?; diff --git a/boa/src/syntax/ast/constant.rs b/boa/src/syntax/ast/constant.rs index 222821ac54..9ec126f9ce 100644 --- a/boa/src/syntax/ast/constant.rs +++ b/boa/src/syntax/ast/constant.rs @@ -1,6 +1,10 @@ use gc_derive::{Finalize, Trace}; use std::fmt::{Display, Formatter, Result}; +#[cfg(feature = "serde-ast")] +use serde::{Deserialize, Serialize}; + +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Debug, Trace, Finalize, PartialEq)] /// A Javascript Constant pub enum Const { diff --git a/boa/src/syntax/ast/expr.rs b/boa/src/syntax/ast/expr.rs index 77ef8e6a53..6198a46d56 100644 --- a/boa/src/syntax/ast/expr.rs +++ b/boa/src/syntax/ast/expr.rs @@ -8,6 +8,10 @@ use std::{ fmt::{Display, Formatter, Result}, }; +#[cfg(feature = "serde-ast")] +use serde::{Deserialize, Serialize}; + +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Trace, Finalize, Debug, PartialEq)] pub struct Expr { /// The expression definition @@ -27,6 +31,7 @@ impl Display for Expr { } } +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Debug, Trace, Finalize, PartialEq)] /// A Javascript Expression pub enum ExprDef { diff --git a/boa/src/syntax/ast/keyword.rs b/boa/src/syntax/ast/keyword.rs index 5d3b8f4459..05dc45fd58 100644 --- a/boa/src/syntax/ast/keyword.rs +++ b/boa/src/syntax/ast/keyword.rs @@ -4,6 +4,10 @@ use std::{ str::FromStr, }; +#[cfg(feature = "serde-ast")] +use serde::{Deserialize, Serialize}; + +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Copy, PartialEq, Debug)] /// A Javascript Keyword /// As specificed by diff --git a/boa/src/syntax/ast/op.rs b/boa/src/syntax/ast/op.rs index 29dc422357..59e3d7ac9a 100644 --- a/boa/src/syntax/ast/op.rs +++ b/boa/src/syntax/ast/op.rs @@ -1,6 +1,9 @@ use gc_derive::{Finalize, Trace}; use std::fmt::{Display, Formatter, Result}; +#[cfg(feature = "serde-ast")] +use serde::{Deserialize, Serialize}; + /// Represents an operator pub trait Operator { /// Get the associativity as a boolean that is true if it goes rightwards @@ -13,6 +16,7 @@ pub trait Operator { } } +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Debug, Trace, Finalize, PartialEq)] /// A numeric operation between 2 values pub enum NumOp { @@ -47,6 +51,7 @@ impl Display for NumOp { } } +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Debug, Trace, Finalize, PartialEq)] /// A unary operation on a single value pub enum UnaryOp { @@ -88,6 +93,7 @@ impl Display for UnaryOp { } } +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Debug, Trace, Finalize, PartialEq)] /// A bitwise operation between 2 values pub enum BitOp { @@ -119,6 +125,7 @@ impl Display for BitOp { } } +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Debug, Trace, Finalize, PartialEq)] /// A comparitive operation between 2 values pub enum CompOp { @@ -159,6 +166,7 @@ impl Display for CompOp { } } +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Debug, Trace, Finalize, PartialEq)] /// A logical operation between 2 boolean values pub enum LogOp { @@ -181,6 +189,7 @@ impl Display for LogOp { } } +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Debug, Trace, Finalize, PartialEq)] /// A binary operation between 2 values pub enum BinOp { @@ -240,6 +249,7 @@ impl Display for BinOp { } } +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Debug, Trace, Finalize, PartialEq)] /// A binary operation between 2 values pub enum AssignOp { diff --git a/boa/src/syntax/ast/pos.rs b/boa/src/syntax/ast/pos.rs index 47f7047276..9c99a1d998 100644 --- a/boa/src/syntax/ast/pos.rs +++ b/boa/src/syntax/ast/pos.rs @@ -1,3 +1,7 @@ +#[cfg(feature = "serde-ast")] +use serde::{Deserialize, Serialize}; + +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, Copy, PartialEq, Debug)] /// A position in the Javascript source code /// Stores both the column number and the line number diff --git a/boa/src/syntax/ast/punc.rs b/boa/src/syntax/ast/punc.rs index c471c98a39..a6df171dc2 100644 --- a/boa/src/syntax/ast/punc.rs +++ b/boa/src/syntax/ast/punc.rs @@ -1,5 +1,9 @@ use std::fmt::{Display, Error, Formatter}; +#[cfg(feature = "serde-ast")] +use serde::{Deserialize, Serialize}; + +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(PartialEq, Clone, Copy, Debug)] /// Punctuation pub enum Punctuator { diff --git a/boa/src/syntax/ast/token.rs b/boa/src/syntax/ast/token.rs index 76137914ac..844b488c51 100644 --- a/boa/src/syntax/ast/token.rs +++ b/boa/src/syntax/ast/token.rs @@ -1,9 +1,12 @@ use crate::syntax::ast::{keyword::Keyword, pos::Position, punc::Punctuator}; use std::fmt::{Debug, Display, Formatter, Result}; -#[derive(Clone, PartialEq)] +#[cfg(feature = "serde-ast")] +use serde::{Deserialize, Serialize}; + /// Represents a token -#[derive(Debug)] +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq)] pub struct Token { /// The token Data pub data: TokenData, @@ -38,7 +41,7 @@ impl Debug for VecToken { write!(f, "{}", buffer) } } - +#[cfg_attr(feature = "serde-ast", derive(Serialize, Deserialize))] #[derive(Clone, PartialEq, Debug)] /// Represents the type of Token pub enum TokenData { diff --git a/boa_cli/Cargo.toml b/boa_cli/Cargo.toml index 7c43dc11f6..07edb56865 100644 --- a/boa_cli/Cargo.toml +++ b/boa_cli/Cargo.toml @@ -11,5 +11,5 @@ exclude = ["../.vscode/*", "../Dockerfile", "../Makefile", "../.editorConfig"] edition = "2018" [dependencies] -Boa = { path = "../boa", default-features = false } +Boa = { path = "../boa", features = ["serde-ast"], default-features = false } structopt = "0.3.9" diff --git a/boa_cli/src/main.rs b/boa_cli/src/main.rs index 32a4614d51..1be9d78397 100644 --- a/boa_cli/src/main.rs +++ b/boa_cli/src/main.rs @@ -3,18 +3,140 @@ #![allow(clippy::cognitive_complexity)] use boa::builtins::console::log; +use boa::serde_json; +use boa::syntax::ast::{expr::Expr, token::Token}; use boa::{exec::Executor, forward_val, realm::Realm}; -use std::io; +use std::io::{self, Write}; use std::{fs::read_to_string, path::PathBuf}; +use structopt::clap::arg_enum; use structopt::StructOpt; + /// CLI configuration for Boa. +// +// Added #[allow(clippy::option_option)] because to StructOpt an Option> +// is an optional argument that optionally takes a value ([--opt=[val]]). +// https://docs.rs/structopt/0.3.11/structopt/#type-magic +#[allow(clippy::option_option)] #[derive(Debug, StructOpt)] #[structopt(author, about)] struct Opt { /// The JavaScript file(s) to be evaluated. #[structopt(name = "FILE", parse(from_os_str))] files: Vec, + + /// Dump the token stream to stdout with the given format. + #[structopt( + long, + short = "-t", + value_name = "FORMAT", + possible_values = &DumpFormat::variants(), + case_insensitive = true, + conflicts_with = "dump-ast" + )] + dump_tokens: Option>, + + /// Dump the ast to stdout with the given format. + #[structopt( + long, + short = "-a", + value_name = "FORMAT", + possible_values = &DumpFormat::variants(), + case_insensitive = true + )] + dump_ast: Option>, +} + +impl Opt { + /// Returns whether a dump flag has been used. + fn has_dump_flag(&self) -> bool { + self.dump_tokens.is_some() || self.dump_ast.is_some() + } +} + +arg_enum! { + /// The different types of format available for dumping. + /// + // NOTE: This can easily support other formats just by + // adding a field to this enum and adding the necessary + // implementation. Example: Toml, Html, etc. + // + // NOTE: The fields of this enum are not doc comments because + // arg_enum! macro does not support it. + #[derive(Debug)] + enum DumpFormat { + // This is the default format that you get from std::fmt::Debug. + Debug, + + // This is a minified json format. + Json, + + // This is a pretty printed json format. + JsonPretty, + } +} + +/// Lexes the given source code into a stream of tokens and return it. +/// +/// Returns a error of type String with a message, +/// if the source has a syntax error. +fn lex_source(src: &str) -> Result, String> { + use boa::syntax::lexer::Lexer; + + let mut lexer = Lexer::new(src); + lexer.lex().map_err(|e| format!("SyntaxError: {}", e))?; + Ok(lexer.tokens) +} + +/// Parses the the token stream into a ast and returns it. +/// +/// Returns a error of type String with a message, +/// if the token stream has a parsing error. +fn parse_tokens(tokens: Vec) -> Result { + use boa::syntax::parser::Parser; + + Parser::new(tokens) + .parse_all() + .map_err(|e| format!("ParsingError: {}", e)) } + +/// Dumps the token stream or ast to stdout depending on the given arguments. +/// +/// Returns a error of type String with a error message, +/// if the source has a syntax or parsing error. +fn dump(src: &str, args: &Opt) -> Result<(), String> { + let tokens = lex_source(src)?; + + if let Some(ref arg) = args.dump_tokens { + match arg { + Some(format) => match format { + DumpFormat::Debug => println!("{:#?}", tokens), + DumpFormat::Json => println!("{}", serde_json::to_string(&tokens).unwrap()), + DumpFormat::JsonPretty => { + println!("{}", serde_json::to_string_pretty(&tokens).unwrap()) + } + }, + // Default token stream dumping format. + None => println!("{:#?}", tokens), + } + } else if let Some(ref arg) = args.dump_ast { + let ast = parse_tokens(tokens)?; + + match arg { + Some(format) => match format { + DumpFormat::Debug => println!("{:#?}", ast), + DumpFormat::Json => println!("{}", serde_json::to_string(&ast).unwrap()), + DumpFormat::JsonPretty => { + println!("{}", serde_json::to_string_pretty(&ast).unwrap()) + } + }, + // Default ast dumping format. + None => println!("{:#?}", ast), + } + } + + Ok(()) +} + pub fn main() -> Result<(), std::io::Error> { let args = Opt::from_args(); @@ -25,9 +147,16 @@ pub fn main() -> Result<(), std::io::Error> { for file in &args.files { let buffer = read_to_string(file)?; - match forward_val(&mut engine, &buffer) { - Ok(v) => print!("{}", v.to_string()), - Err(v) => eprint!("{}", v.to_string()), + if args.has_dump_flag() { + match dump(&buffer, &args) { + Ok(_) => {} + Err(e) => eprintln!("{}", e), + } + } else { + match forward_val(&mut engine, &buffer) { + Ok(v) => print!("{}", v.to_string()), + Err(v) => eprint!("{}", v.to_string()), + } } } @@ -37,10 +166,20 @@ pub fn main() -> Result<(), std::io::Error> { io::stdin().read_line(&mut buffer)?; - match forward_val(&mut engine, buffer.trim_end()) { - Ok(v) => println!("{}", v.to_string()), - Err(v) => eprintln!("{}", v.to_string()), + if args.has_dump_flag() { + match dump(&buffer, &args) { + Ok(_) => {} + Err(e) => eprintln!("{}", e), + } + } else { + match forward_val(&mut engine, buffer.trim_end()) { + Ok(v) => println!("{}", v.to_string()), + Err(v) => eprintln!("{}", v.to_string()), + } } + + // The flush is needed because where in a REPL and we do not want buffering. + std::io::stdout().flush().unwrap(); } } diff --git a/docs/debugging.md b/docs/debugging.md index 1bf9e43812..ad9886a812 100644 --- a/docs/debugging.md +++ b/docs/debugging.md @@ -10,10 +10,19 @@ These are added in order of how the code is read: ## Tokens -The first thing boa will do is generate tokens from source code. +The first thing boa will do is generate tokens from source code. If the token generation is wrong the rest of the operation will be wrong, this is usually a good starting place. -Navigate to `parser_expr` in [lib.rs](../src/lib/lib.rs#L48) and add `dbg!(&tokens);` just below tokens to see the array of token output. You code should look like this: +To print the tokens to stdout, you can use the `boa_cli` command-line flag `--dump-tokens`, which can optionally take a format type. Supports these formats: `Debug`, `Json`, `JsonPretty`. By default it is the `Debug` format. +```bash +cargo run -- test.js --dump-tokens # token dump format is Debug by default. +``` +or with interactive mode (REPL): +```bash +cargo run -- --dump-tokens # token dump format is Debug by default. +``` + +Or you can do it manually by navigating to `parser_expr` in [lib.rs](../boa/src/lib.rs#L25) and add `dbg!(&tokens);` just below tokens to see the array of token output. You code should look like this: ```rust let mut lexer = Lexer::new(src); @@ -22,18 +31,32 @@ Navigate to `parser_expr` in [lib.rs](../src/lib/lib.rs#L48) and add `dbg!(&toke dbg!(&tokens); ... ``` - Seeing the order of tokens can be a big help to understanding what the parser is working with. +**Note:** flags `--dump-tokens` and `--dump-ast` are mutually exclusive. When using the flag `--dump-tokens`, the code will not be executed. + ## Expressions Assuming the tokens looks fine, the next step is to see the AST. -You can output the expressions in [forward](../src/lib/lib.rs#L57), add `dbg!(&expr);` -This will print out the entire parse tree. +You can use the `boa_cli` command-line flag `--dump-ast`, which can optionally take a format type. Supports these formats: `Debug`, `Json`, `JsonPretty`. By default it is the `Debug` format. + +Dumping the AST of a file: +```bash +cargo run -- test.js --dump-ast # AST dump format is Debug by default. +``` +or with interactive mode (REPL): +```bash +cargo run -- --dump-ast # AST dump format is Debug by default. +``` +Or manually, you can output the expressions in [forward](../boa/src/lib.rs#L36), add `dbg!(&expr);` + +These methods will print out the entire parse tree. + +**Note:** flags `--dump-tokens` and `--dump-ast` are mutually exclusive. When using the flag `--dump-ast`, the code will not be executed. ## Execution -Once the tree has been generated [exec](../src/lib/exec.rs#L66) will begin to run through each expression. If the tokens and tree looks fine, you can start looking here. +Once the tree has been generated [exec](../boa/src/lib.rs#L67) will begin to run through each expression. If the tokens and tree looks fine, you can start looking here. I usually just add `dbg!()` in the relevent places to see what the output is at the time. ## Debugger @@ -44,6 +67,6 @@ The quickest way to get debugging is to re-open the workspace in the container ( ### LLDB Manually -You can also use rust-lldb. -The `Dockerfile` already has this enabled, you should be able to use that environment to run your code. +You can also use rust-lldb. +The `Dockerfile` already has this enabled, you should be able to use that environment to run your code. `rust-lldb ./target/debug/boa [arguments]`