From 7b34b1da9bdb88c0e72ed860e518e9407ac3524a Mon Sep 17 00:00:00 2001 From: Jason Williams Date: Mon, 13 Aug 2018 23:12:54 +0100 Subject: [PATCH] adding lexer updates --- src/lib/syntax/lexer.rs | 137 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 126 insertions(+), 11 deletions(-) diff --git a/src/lib/syntax/lexer.rs b/src/lib/syntax/lexer.rs index ab7ad88ace..3b5d63d083 100644 --- a/src/lib/syntax/lexer.rs +++ b/src/lib/syntax/lexer.rs @@ -1,7 +1,42 @@ +use std::char::from_u32; +use std::error; +use std::fmt; +use std::iter::Peekable; use std::str::Chars; use syntax::ast::punc::Punctuator; use syntax::ast::token::{Token, TokenData}; +// Defining an error type +#[derive(Debug, Clone)] +pub struct LexerError { + details: String, +} +impl LexerError { + fn new(msg: &str) -> LexerError { + LexerError { + details: msg.to_string(), + } + } +} + +impl fmt::Display for LexerError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.details) + } +} + +// This is important for other errors to wrap this one. +impl error::Error for LexerError { + fn description(&self) -> &str { + &self.details + } + + fn cause(&self) -> Option<&error::Error> { + // Generic error, underlying cause isn't tracked. + None + } +} + /// A javascript Lexer pub struct Lexer<'a> { // The list fo tokens generated so far @@ -11,7 +46,7 @@ pub struct Lexer<'a> { // the current column number in the script column_number: u64, // The full string - buffer: Chars<'a>, + buffer: Peekable>, } impl<'a> Lexer<'a> { @@ -20,7 +55,7 @@ impl<'a> Lexer<'a> { tokens: Vec::new(), line_number: 1, column_number: 0, - buffer: buffer.chars(), + buffer: buffer.chars().peekable(), } } /// Push tokens onto the token queue @@ -34,19 +69,99 @@ impl<'a> Lexer<'a> { self.push_token(TokenData::TPunctuator(punc)); } - fn next(&mut self) -> Option { - self.buffer.next() + fn next(&mut self) -> Result { + self.buffer.next().ok_or(LexerError::new("next failed")) + } + + fn preview_next(&mut self) -> Result<&char, ()> { + // ok_or converts Option to a Result + self.buffer.peek().ok_or(()) } - pub fn lex(&mut self) { + fn next_is(&mut self, peek: char) -> Result { + let result = try!(self.preview_next()) == &peek; + if result { + self.buffer.next(); + } + Ok(result) + } + + pub fn lex(&mut self) -> Result<(), LexerError> { loop { - let ch = self.next(); + let ch = match self.next() { + Ok(ch) => ch, + Err(LexerError) => return Err(LexerError), + }; + self.column_number += 1; match ch { - Some(c) => { - println!("{}", c); - } - None => { - break; + '"' | '\'' => { + let mut buf = String::new(); + loop { + match self.next()? { + '\'' if ch == '\'' => { + break; + } + '"' if ch == '"' => { + break; + } + '\\' => { + let escape = self.next()?; + if escape != '\n' { + let escaped_ch = match escape { + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'b' => '\x08', + 'f' => '\x0c', + '0' => '\0', + 'x' => { + let mut nums = String::with_capacity(2); + for _ in 0u8..2 { + nums.push(self.next()?); + } + self.column_number += 2; + let as_num = match u64::from_str_radix(&nums, 16) { + Ok(v) => v, + Err(e) => 0, + }; + match from_u32(as_num as u32) { + Some(v) => v, + None => panic!( + "{}:{}: {} is not a valid unicode scalar value", + self.line_number, self.column_number, as_num + ), + } + } + 'u' => { + let mut nums = String::new(); + for _ in 0u8..4 { + nums.push(self.next()?); + } + self.column_number += 4; + let as_num = match u64::from_str_radix(&nums, 16) { + Ok(v) => v, + Err(e) => 0, + }; + match from_u32(as_num as u32) { + Some(v) => v, + None => panic!( + "{}:{}: {} is not a valid unicode scalar value", + self.line_number, self.column_number, as_num + ), + } + } + '\'' | '"' => escape, + _ => panic!( + "{}:{}: Invalid escape `{}`", + self.line_number, self.column_number, ch + ), + }; + buf.push(escaped_ch); + } + } + ch => buf.push(ch), + } + } } } }