Browse Source

Refresh vm docs and fix bytecode trace output (#1921)

It changes the following:

- Refreshes the vm and debugging docs to represent the current state
- Fix some bytecode trace output
- Rename a field in the `CodeBlock`
pull/1927/head
raskad 3 years ago
parent
commit
23711a638b
  1. 4
      boa_engine/src/bytecompiler.rs
  2. 24
      boa_engine/src/vm/code_block.rs
  3. 12
      boa_engine/src/vm/mod.rs
  4. 56
      docs/debugging.md
  5. BIN
      docs/img/boa_architecture.drawio.png
  6. 3
      docs/img/boa_architecture.svg
  7. 2
      docs/profiling.md
  8. 78
      docs/vm.md

4
boa_engine/src/bytecompiler.rs

@ -115,8 +115,8 @@ impl<'b> ByteCompiler<'b> {
return *index; return *index;
} }
let index = self.code_block.variables.len() as u32; let index = self.code_block.names.len() as u32;
self.code_block.variables.push(name); self.code_block.names.push(name);
self.names_map.insert(name, index); self.names_map.insert(name, index);
index index
} }

24
boa_engine/src/vm/code_block.rs

@ -78,7 +78,7 @@ pub struct CodeBlock {
pub(crate) literals: Vec<JsValue>, pub(crate) literals: Vec<JsValue>,
/// Property field names. /// Property field names.
pub(crate) variables: Vec<Sym>, pub(crate) names: Vec<Sym>,
/// Locators for all bindings in the codeblock. /// Locators for all bindings in the codeblock.
#[unsafe_ignore_trace] #[unsafe_ignore_trace]
@ -104,7 +104,7 @@ impl CodeBlock {
Self { Self {
code: Vec::new(), code: Vec::new(),
literals: Vec::new(), literals: Vec::new(),
variables: Vec::new(), names: Vec::new(),
bindings: Vec::new(), bindings: Vec::new(),
num_bindings: 0, num_bindings: 0,
functions: Vec::new(), functions: Vec::new(),
@ -242,7 +242,7 @@ impl CodeBlock {
*pc += size_of::<u32>(); *pc += size_of::<u32>();
format!( format!(
"{operand:04}: '{}'", "{operand:04}: '{}'",
interner.resolve_expect(self.variables[operand as usize]), interner.resolve_expect(self.names[operand as usize]),
) )
} }
Opcode::Pop Opcode::Pop
@ -350,10 +350,10 @@ impl ToInternedString for CodeBlock {
let mut count = 0; let mut count = 0;
while pc < self.code.len() { while pc < self.code.len() {
let opcode: Opcode = self.code[pc].try_into().expect("invalid opcode"); let opcode: Opcode = self.code[pc].try_into().expect("invalid opcode");
let opcode = opcode.as_str();
let operands = self.instruction_operands(&mut pc, interner); let operands = self.instruction_operands(&mut pc, interner);
f.push_str(&format!( f.push_str(&format!(
" {pc:06} {count:04} {:<27}\n{operands}", "{pc:06} {count:04} {opcode:<27}{operands}\n",
opcode.as_str(),
)); ));
count += 1; count += 1;
} }
@ -361,7 +361,7 @@ impl ToInternedString for CodeBlock {
f.push_str("\nLiterals:\n"); f.push_str("\nLiterals:\n");
if self.literals.is_empty() { if self.literals.is_empty() {
f.push_str(" <empty>"); f.push_str(" <empty>\n");
} else { } else {
for (i, value) in self.literals.iter().enumerate() { for (i, value) in self.literals.iter().enumerate() {
f.push_str(&format!( f.push_str(&format!(
@ -372,21 +372,21 @@ impl ToInternedString for CodeBlock {
} }
} }
f.push_str("\nNames:\n"); f.push_str("\nBindings:\n");
if self.variables.is_empty() { if self.bindings.is_empty() {
f.push_str(" <empty>"); f.push_str(" <empty>\n");
} else { } else {
for (i, value) in self.variables.iter().enumerate() { for (i, binding_locator) in self.bindings.iter().enumerate() {
f.push_str(&format!( f.push_str(&format!(
" {i:04}: {}\n", " {i:04}: {}\n",
interner.resolve_expect(*value) interner.resolve_expect(binding_locator.name())
)); ));
} }
} }
f.push_str("\nFunctions:\n"); f.push_str("\nFunctions:\n");
if self.functions.is_empty() { if self.functions.is_empty() {
f.push_str(" <empty>"); f.push_str(" <empty>\n");
} else { } else {
for (i, code) in self.functions.iter().enumerate() { for (i, code) in self.functions.iter().enumerate() {
f.push_str(&format!( f.push_str(&format!(

12
boa_engine/src/vm/mod.rs

@ -593,7 +593,7 @@ impl Context {
value.to_object(self)? value.to_object(self)?
}; };
let name = self.vm.frame().code.variables[index as usize]; let name = self.vm.frame().code.names[index as usize];
let name: PropertyKey = self.interner().resolve_expect(name).into(); let name: PropertyKey = self.interner().resolve_expect(name).into();
let result = object.get(name, self)?; let result = object.get(name, self)?;
@ -624,7 +624,7 @@ impl Context {
object.to_object(self)? object.to_object(self)?
}; };
let name = self.vm.frame().code.variables[index as usize]; let name = self.vm.frame().code.names[index as usize];
let name: PropertyKey = self.interner().resolve_expect(name).into(); let name: PropertyKey = self.interner().resolve_expect(name).into();
object.set( object.set(
@ -645,7 +645,7 @@ impl Context {
object.to_object(self)? object.to_object(self)?
}; };
let name = self.vm.frame().code.variables[index as usize]; let name = self.vm.frame().code.names[index as usize];
let name = self.interner().resolve_expect(name); let name = self.interner().resolve_expect(name);
object.__define_own_property__( object.__define_own_property__(
@ -706,7 +706,7 @@ impl Context {
let value = self.vm.pop(); let value = self.vm.pop();
let object = object.to_object(self)?; let object = object.to_object(self)?;
let name = self.vm.frame().code.variables[index as usize]; let name = self.vm.frame().code.names[index as usize];
let name = self.interner().resolve_expect(name).into(); let name = self.interner().resolve_expect(name).into();
let set = object let set = object
.__get_own_property__(&name, self)? .__get_own_property__(&name, self)?
@ -751,7 +751,7 @@ impl Context {
let object = self.vm.pop(); let object = self.vm.pop();
let value = self.vm.pop(); let value = self.vm.pop();
let object = object.to_object(self)?; let object = object.to_object(self)?;
let name = self.vm.frame().code.variables[index as usize]; let name = self.vm.frame().code.names[index as usize];
let name = self.interner().resolve_expect(name).into(); let name = self.interner().resolve_expect(name).into();
let get = object let get = object
.__get_own_property__(&name, self)? .__get_own_property__(&name, self)?
@ -793,7 +793,7 @@ impl Context {
} }
Opcode::DeletePropertyByName => { Opcode::DeletePropertyByName => {
let index = self.vm.read::<u32>(); let index = self.vm.read::<u32>();
let key = self.vm.frame().code.variables[index as usize]; let key = self.vm.frame().code.names[index as usize];
let key = self.interner().resolve_expect(key).into(); let key = self.interner().resolve_expect(key).into();
let object = self.vm.pop(); let object = self.vm.pop();
let result = object.to_object(self)?.__delete__(&key, self)?; let result = object.to_object(self)?.__delete__(&key, self)?;

56
docs/debugging.md

@ -13,38 +13,14 @@ arguments to start a shell to execute JS.
These are added in order of how the code is read: These are added in order of how the code is read:
## Tokens ## Tokens and AST nodes
The first thing boa will do is generate tokens from source code. If the token The first thing boa will do is to generate tokens from the source code.
generation is wrong the rest of the operation will be wrong, this is usually These tokens are then parsed into an abstract syntax tree (AST).
a good starting place. Any syntax errors should be thrown while the AST is generated.
To print the tokens to stdout, you can use the `boa_cli` command-line flag You can use the `boa_cli` command-line flag `--dump-ast` to print the AST.
`--dump-tokens` or `-t`, which can optionally take a format type. Supports The flag supports these formats: `Debug`, `Json`, `JsonPretty`. By default
these formats: `Debug`, `Json`, `JsonPretty`. By default it is the `Debug`
format.
```bash
cargo run -- test.js --dump-tokens # token dump format is Debug by default.
```
or with interactive mode (REPL):
```bash
cargo run -- --dump-tokens # token dump format is Debug by default.
```
Seeing the order of tokens can be a big help to understanding what the parser
is working with.
**Note:** flags `--dump-tokens` and `--dump-ast` are mutually exclusive. When
using the flag `--dump-tokens`, the code will not be executed.
## AST nodes
Assuming the tokens looks fine, the next step is to see the AST. You can use
the `boa_cli` command-line flag `--dump-ast`, which can optionally take a
format type. Supports these formats: `Debug`, `Json`, `JsonPretty`. By default
it is the `Debug` format. it is the `Debug` format.
Dumping the AST of a file: Dumping the AST of a file:
@ -59,23 +35,19 @@ or with interactive mode (REPL):
cargo run -- --dump-ast # AST dump format is Debug by default. cargo run -- --dump-ast # AST dump format is Debug by default.
``` ```
These methods will print out the entire parse tree. ## Bytecode generation and Execution
Once the AST has been generated boa will compile it into bytecode.
The bytecode is then executed by the vm.
You can print the bytecode and the executed instructions with the command-line flag `--trace`.
**Note:** flags `--dump-tokens` and `--dump-ast` are mutually exclusive. When For more detailed information about the vm and the trace output look [here](./vm.md).
using the flag `--dump-ast`, the code will not be executed.
## Compiler panics ## Compiler panics
In the case of a compiler panic, to get a full backtrace you will need to set In the case of a compiler panic, to get a full backtrace you will need to set
the environment variable `RUST_BACKTRACE=1`. the environment variable `RUST_BACKTRACE=1`.
## Execution
Once the tree has been generated [exec](../boa/src/lib.rs#L92) will begin to
run through each node. If the tokens and tree looks fine, you can start looking
here. We usually just add `dbg!()` in the relevent places to see what the
output is at the time.
## Debugger ## Debugger
### VS Code Debugger ### VS Code Debugger
@ -94,7 +66,3 @@ rust-lldb ./target/debug/boa [arguments]
[remote_containers]: https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers [remote_containers]: https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers
[blog_debugging]: https://jason-williams.co.uk/debugging-rust-in-vscode [blog_debugging]: https://jason-williams.co.uk/debugging-rust-in-vscode
## VM
For debugging the new VM see [here](./vm.md)

BIN
docs/img/boa_architecture.drawio.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

3
docs/img/boa_architecture.svg

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 12 KiB

2
docs/profiling.md

@ -10,7 +10,7 @@ We use a crate called [measureme](https://github.com/rust-lang/measureme), which
When the "profiler" flag is enabled, you compile with the profiler and it is called throughout the interpreter. When the "profiler" flag is enabled, you compile with the profiler and it is called throughout the interpreter.
when the feature flag is not enabled, you have an empty dummy implementation that is just no ops. rustc should completely optimize that away. So there should be no performance downgrade from these changes when the feature flag is not enabled, you have an empty dummy implementation that is just no ops. rustc should completely optimize that away. So there should be no performance downgrade from these changes
## Prerequesites ## Prerequisites
- [Crox](https://github.com/rust-lang/measureme/blob/master/crox/README.md) - [Crox](https://github.com/rust-lang/measureme/blob/master/crox/README.md)
- [summarize (Optional)](https://github.com/rust-lang/measureme/blob/master/summarize/README.md) - [summarize (Optional)](https://github.com/rust-lang/measureme/blob/master/summarize/README.md)

78
docs/vm.md

@ -1,16 +1,8 @@
# VM (Beta) # VM
## State Of Play ## Architecture
By default Boa does not use the VM branch; execution is done via walking the AST. This allows us to work on the VM branch whilst not interrupting any progress made on AST execution. ![image](img/boa_architecture.drawio.png)
You can interpret bytecode by passing the "vm" flag (see below). The diagram below should illustrate how things work today (Jan 2021).
![image](img/boa_architecture.svg)
## Enabling ByteCode interpretation
You need to enable this via a feature flag. If using VSCode you can run `Cargo Run (VM)`. If using the command line you can pass `cargo run --features vm ../tests/js/test.js` from within the boa_cli folder. You can also pass the `--trace` optional flag to print the trace of the code.
## Understanding the trace output ## Understanding the trace output
@ -21,34 +13,35 @@ let a = 1;
let b = 2; let b = 2;
``` ```
Should output: Outputs:
```text ```text
Code: ----------------------Compiled Output: '<main>'-----------------------
Location Count Opcode Operands Location Count Opcode Operands
000000 0000 DefLet 0000: 'a'
000005 0001 PushOne 000001 0000 PushOne
000006 0002 InitLexical 0000: 'a' 000006 0001 DefInitLet 0000: 'a'
000011 0003 DefLet 0001: 'b' 000008 0002 PushInt8 2
000016 0004 PushInt8 2 000013 0003 DefInitLet 0001: 'b'
000018 0005 InitLexical 0001: 'b'
Literals: Literals:
<empty> <empty>
Names: Bindings:
0000: a 0000: a
0001: b 0001: b
Functions:
<empty>
-------------------------------------- Vm Start -------------------------------------- ------------------------------------------ VM Start ------------------------------------------
Time Opcode Operands Top Of Stack Time Opcode Operands Top Of Stack
64μs DefLet 0000: 'a' <empty>
3μs PushOne 1 386μs PushOne 1
21μs InitLexical 0000: 'a' <empty> 6μs DefInitLet 0000: 'a' <empty>
32μs DefLet 0001: 'b' <empty> 1μs PushInt8 2 2
2μs PushInt8 2 2 2μs DefInitLet 0001: 'b' <empty>
17μs InitLexical 0001: 'b' <empty>
Stack: Stack:
<empty> <empty>
@ -57,38 +50,25 @@ Stack:
undefined undefined
``` ```
The above will output three sections that are divided into subsections: The above output contains the following information:
- The code that will be executed - The bytecode and properties of the function that will be executed
- `Code`: The bytecode. - `Compiled Output`: The bytecode.
- `Location`: Location of the instruction (instructions are not the same size). - `Location`: Location of the instruction (instructions are not the same size).
- `Count`: Instruction count. - `Count`: Instruction count.
- `Opcode`: Opcode name.
- `Operands`: The operands of the opcode. - `Operands`: The operands of the opcode.
- `Literals`: The literals used by the opcode (like strings). - `Literals`: The literals used by the bytecode (like strings).
- `Names`: Contains variable names. - `Bindings`: Binding names used by the bytecode.
- The code being executed (marked by `"Vm Start"`). - `Functions`: Function names use by the bytecode.
- The code being executed (marked by `Vm Start` or `Call Frame`).
- `Time`: The amount of time that instruction took to execute. - `Time`: The amount of time that instruction took to execute.
- `Opcode`: Opcode name. - `Opcode`: Opcode name.
- `Operands`: The operands this opcode took. - `Operands`: The operands of the opcode.
- `Top Of Stack`: The top element of the stack **after** execution of instruction. - `Top Of Stack`: The top element of the stack **after** execution of instruction.
- `Stack`: The trace of the stack after execution ends. - `Stack`: The trace of the stack after execution ends.
- The result of the execution (The top element of the stack, if the stack is empty then `undefined` is returned). - The result of the execution (The top element of the stack, if the stack is empty then `undefined` is returned).
### Instruction
This shows each instruction being executed and how long it took. This is useful for us to see if a particular instruction is taking too long.
Then you have the instruction itself and its operand. Last you have what is on the top of the stack **after** the instruction is executed, followed by the memory address of that same value. We show the memory address to identify if 2 values are the same or different.
### Literals
JSValues can live on the pool, which acts as our heap. Instructions often have an index of where on the pool it refers to a value.
You can use these values to match up with the instructions above. For e.g (using the above output) `DefLet 0` means take the value off the pool at index `0`, which is `a` and define it in the current scope.
### Stack
The stack view shows what the stack looks like for the JS executed.
Using the above output as an exmaple, after `PushOne` has been executed the next instruction (`InitLexical 0`) has a `1` on the top of the stack. This is because `PushOne` puts `1` on the stack.
### Comparing ByteCode output ### Comparing ByteCode output
If you wanted another engine's bytecode output for the same JS, SpiderMonkey's bytecode output is the best to use. You can follow the setup [here](https://developer.mozilla.org/en-US/docs/Mozilla/Projects/SpiderMonkey/Introduction_to_the_JavaScript_shell). You will need to build from source because the pre-built binarys don't include the debugging utilities which we need. If you wanted another engine's bytecode output for the same JS, SpiderMonkey's bytecode output is the best to use. You can follow the setup [here](https://developer.mozilla.org/en-US/docs/Mozilla/Projects/SpiderMonkey/Introduction_to_the_JavaScript_shell). You will need to build from source because the pre-built binarys don't include the debugging utilities which we need.

Loading…
Cancel
Save