|
|
|
@ -1,7 +1,42 @@
|
|
|
|
|
use std::char::from_u32; |
|
|
|
|
use std::error; |
|
|
|
|
use std::fmt; |
|
|
|
|
use std::iter::Peekable; |
|
|
|
|
use std::str::Chars; |
|
|
|
|
use syntax::ast::punc::Punctuator; |
|
|
|
|
use syntax::ast::token::{Token, TokenData}; |
|
|
|
|
|
|
|
|
|
// Defining an error type
|
|
|
|
|
#[derive(Debug, Clone)] |
|
|
|
|
pub struct LexerError { |
|
|
|
|
details: String, |
|
|
|
|
} |
|
|
|
|
impl LexerError { |
|
|
|
|
fn new(msg: &str) -> LexerError { |
|
|
|
|
LexerError { |
|
|
|
|
details: msg.to_string(), |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
impl fmt::Display for LexerError { |
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
|
|
|
write!(f, "{}", self.details) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// This is important for other errors to wrap this one.
|
|
|
|
|
impl error::Error for LexerError { |
|
|
|
|
fn description(&self) -> &str { |
|
|
|
|
&self.details |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn cause(&self) -> Option<&error::Error> { |
|
|
|
|
// Generic error, underlying cause isn't tracked.
|
|
|
|
|
None |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// A javascript Lexer
|
|
|
|
|
pub struct Lexer<'a> { |
|
|
|
|
// The list fo tokens generated so far
|
|
|
|
@ -11,7 +46,7 @@ pub struct Lexer<'a> {
|
|
|
|
|
// the current column number in the script
|
|
|
|
|
column_number: u64, |
|
|
|
|
// The full string
|
|
|
|
|
buffer: Chars<'a>, |
|
|
|
|
buffer: Peekable<Chars<'a>>, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
impl<'a> Lexer<'a> { |
|
|
|
@ -20,7 +55,7 @@ impl<'a> Lexer<'a> {
|
|
|
|
|
tokens: Vec::new(), |
|
|
|
|
line_number: 1, |
|
|
|
|
column_number: 0, |
|
|
|
|
buffer: buffer.chars(), |
|
|
|
|
buffer: buffer.chars().peekable(), |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
/// Push tokens onto the token queue
|
|
|
|
@ -34,20 +69,100 @@ impl<'a> Lexer<'a> {
|
|
|
|
|
self.push_token(TokenData::TPunctuator(punc)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<char> { |
|
|
|
|
self.buffer.next() |
|
|
|
|
fn next(&mut self) -> Result<char, LexerError> { |
|
|
|
|
self.buffer.next().ok_or(LexerError::new("next failed")) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn preview_next(&mut self) -> Result<&char, ()> { |
|
|
|
|
// ok_or converts Option to a Result
|
|
|
|
|
self.buffer.peek().ok_or(()) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
pub fn lex(&mut self) { |
|
|
|
|
fn next_is(&mut self, peek: char) -> Result<bool, ()> { |
|
|
|
|
let result = try!(self.preview_next()) == &peek; |
|
|
|
|
if result { |
|
|
|
|
self.buffer.next(); |
|
|
|
|
} |
|
|
|
|
Ok(result) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
pub fn lex(&mut self) -> Result<(), LexerError> { |
|
|
|
|
loop { |
|
|
|
|
let ch = self.next(); |
|
|
|
|
let ch = match self.next() { |
|
|
|
|
Ok(ch) => ch, |
|
|
|
|
Err(LexerError) => return Err(LexerError), |
|
|
|
|
}; |
|
|
|
|
self.column_number += 1; |
|
|
|
|
match ch { |
|
|
|
|
Some(c) => { |
|
|
|
|
println!("{}", c); |
|
|
|
|
'"' | '\'' => { |
|
|
|
|
let mut buf = String::new(); |
|
|
|
|
loop { |
|
|
|
|
match self.next()? { |
|
|
|
|
'\'' if ch == '\'' => { |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
None => { |
|
|
|
|
'"' if ch == '"' => { |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
'\\' => { |
|
|
|
|
let escape = self.next()?; |
|
|
|
|
if escape != '\n' { |
|
|
|
|
let escaped_ch = match escape { |
|
|
|
|
'n' => '\n', |
|
|
|
|
'r' => '\r', |
|
|
|
|
't' => '\t', |
|
|
|
|
'b' => '\x08', |
|
|
|
|
'f' => '\x0c', |
|
|
|
|
'0' => '\0', |
|
|
|
|
'x' => { |
|
|
|
|
let mut nums = String::with_capacity(2); |
|
|
|
|
for _ in 0u8..2 { |
|
|
|
|
nums.push(self.next()?); |
|
|
|
|
} |
|
|
|
|
self.column_number += 2; |
|
|
|
|
let as_num = match u64::from_str_radix(&nums, 16) { |
|
|
|
|
Ok(v) => v, |
|
|
|
|
Err(e) => 0, |
|
|
|
|
}; |
|
|
|
|
match from_u32(as_num as u32) { |
|
|
|
|
Some(v) => v, |
|
|
|
|
None => panic!( |
|
|
|
|
"{}:{}: {} is not a valid unicode scalar value", |
|
|
|
|
self.line_number, self.column_number, as_num |
|
|
|
|
), |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
'u' => { |
|
|
|
|
let mut nums = String::new(); |
|
|
|
|
for _ in 0u8..4 { |
|
|
|
|
nums.push(self.next()?); |
|
|
|
|
} |
|
|
|
|
self.column_number += 4; |
|
|
|
|
let as_num = match u64::from_str_radix(&nums, 16) { |
|
|
|
|
Ok(v) => v, |
|
|
|
|
Err(e) => 0, |
|
|
|
|
}; |
|
|
|
|
match from_u32(as_num as u32) { |
|
|
|
|
Some(v) => v, |
|
|
|
|
None => panic!( |
|
|
|
|
"{}:{}: {} is not a valid unicode scalar value", |
|
|
|
|
self.line_number, self.column_number, as_num |
|
|
|
|
), |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
'\'' | '"' => escape, |
|
|
|
|
_ => panic!( |
|
|
|
|
"{}:{}: Invalid escape `{}`", |
|
|
|
|
self.line_number, self.column_number, ch |
|
|
|
|
), |
|
|
|
|
}; |
|
|
|
|
buf.push(escaped_ch); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
ch => buf.push(ch), |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|