Browse Source

adding lexer updates

pull/1/head
Jason Williams 6 years ago
parent
commit
7b34b1da9b
  1. 133
      src/lib/syntax/lexer.rs

133
src/lib/syntax/lexer.rs

@ -1,7 +1,42 @@
use std::char::from_u32;
use std::error;
use std::fmt;
use std::iter::Peekable;
use std::str::Chars; use std::str::Chars;
use syntax::ast::punc::Punctuator; use syntax::ast::punc::Punctuator;
use syntax::ast::token::{Token, TokenData}; use syntax::ast::token::{Token, TokenData};
// Defining an error type
#[derive(Debug, Clone)]
pub struct LexerError {
details: String,
}
impl LexerError {
fn new(msg: &str) -> LexerError {
LexerError {
details: msg.to_string(),
}
}
}
impl fmt::Display for LexerError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.details)
}
}
// This is important for other errors to wrap this one.
impl error::Error for LexerError {
fn description(&self) -> &str {
&self.details
}
fn cause(&self) -> Option<&error::Error> {
// Generic error, underlying cause isn't tracked.
None
}
}
/// A javascript Lexer /// A javascript Lexer
pub struct Lexer<'a> { pub struct Lexer<'a> {
// The list fo tokens generated so far // The list fo tokens generated so far
@ -11,7 +46,7 @@ pub struct Lexer<'a> {
// the current column number in the script // the current column number in the script
column_number: u64, column_number: u64,
// The full string // The full string
buffer: Chars<'a>, buffer: Peekable<Chars<'a>>,
} }
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
@ -20,7 +55,7 @@ impl<'a> Lexer<'a> {
tokens: Vec::new(), tokens: Vec::new(),
line_number: 1, line_number: 1,
column_number: 0, column_number: 0,
buffer: buffer.chars(), buffer: buffer.chars().peekable(),
} }
} }
/// Push tokens onto the token queue /// Push tokens onto the token queue
@ -34,20 +69,100 @@ impl<'a> Lexer<'a> {
self.push_token(TokenData::TPunctuator(punc)); self.push_token(TokenData::TPunctuator(punc));
} }
fn next(&mut self) -> Option<char> { fn next(&mut self) -> Result<char, LexerError> {
self.buffer.next() self.buffer.next().ok_or(LexerError::new("next failed"))
}
fn preview_next(&mut self) -> Result<&char, ()> {
// ok_or converts Option to a Result
self.buffer.peek().ok_or(())
} }
pub fn lex(&mut self) { fn next_is(&mut self, peek: char) -> Result<bool, ()> {
let result = try!(self.preview_next()) == &peek;
if result {
self.buffer.next();
}
Ok(result)
}
pub fn lex(&mut self) -> Result<(), LexerError> {
loop { loop {
let ch = self.next(); let ch = match self.next() {
Ok(ch) => ch,
Err(LexerError) => return Err(LexerError),
};
self.column_number += 1;
match ch { match ch {
Some(c) => { '"' | '\'' => {
println!("{}", c); let mut buf = String::new();
loop {
match self.next()? {
'\'' if ch == '\'' => {
break;
} }
None => { '"' if ch == '"' => {
break; break;
} }
'\\' => {
let escape = self.next()?;
if escape != '\n' {
let escaped_ch = match escape {
'n' => '\n',
'r' => '\r',
't' => '\t',
'b' => '\x08',
'f' => '\x0c',
'0' => '\0',
'x' => {
let mut nums = String::with_capacity(2);
for _ in 0u8..2 {
nums.push(self.next()?);
}
self.column_number += 2;
let as_num = match u64::from_str_radix(&nums, 16) {
Ok(v) => v,
Err(e) => 0,
};
match from_u32(as_num as u32) {
Some(v) => v,
None => panic!(
"{}:{}: {} is not a valid unicode scalar value",
self.line_number, self.column_number, as_num
),
}
}
'u' => {
let mut nums = String::new();
for _ in 0u8..4 {
nums.push(self.next()?);
}
self.column_number += 4;
let as_num = match u64::from_str_radix(&nums, 16) {
Ok(v) => v,
Err(e) => 0,
};
match from_u32(as_num as u32) {
Some(v) => v,
None => panic!(
"{}:{}: {} is not a valid unicode scalar value",
self.line_number, self.column_number, as_num
),
}
}
'\'' | '"' => escape,
_ => panic!(
"{}:{}: Invalid escape `{}`",
self.line_number, self.column_number, ch
),
};
buf.push(escaped_ch);
}
}
ch => buf.push(ch),
}
}
}
} }
} }
} }

Loading…
Cancel
Save