Browse Source

Temporal Parser Cleanup/Fixes (#3521)

* Update/fix iso grammar parser

* Revert handling of no tz-anno && zoned

* cargo clippy --all-features

* Apply review, small fix on MonthDay/YearMonth, more tests
pull/3529/head
Kevin 11 months ago committed by GitHub
parent
commit
c2f145c49c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 37
      core/temporal/src/components/month_day.rs
  2. 36
      core/temporal/src/components/year_month.rs
  3. 77
      core/temporal/src/parser/annotations.rs
  4. 165
      core/temporal/src/parser/datetime.rs
  5. 61
      core/temporal/src/parser/duration.rs
  6. 227
      core/temporal/src/parser/mod.rs
  7. 108
      core/temporal/src/parser/tests.rs
  8. 84
      core/temporal/src/parser/time.rs
  9. 77
      core/temporal/src/parser/time_zone.rs

37
core/temporal/src/components/month_day.rs

@ -1,10 +1,12 @@
//! This module implements `MonthDay` and any directly related algorithms.
use std::str::FromStr;
use crate::{
components::calendar::CalendarSlot,
iso::{IsoDate, IsoDateSlots},
options::ArithmeticOverflow,
TemporalResult,
TemporalError, TemporalResult,
};
/// The native Rust implementation of `Temporal.PlainMonthDay`
@ -22,8 +24,8 @@ impl MonthDay {
Self { iso, calendar }
}
#[inline]
/// Creates a new valid `MonthDay`.
#[inline]
pub fn new(
month: i32,
day: i32,
@ -34,9 +36,23 @@ impl MonthDay {
Ok(Self::new_unchecked(iso, calendar))
}
/// Returns the `month` value of `MonthDay`.
#[inline]
#[must_use]
pub fn month(&self) -> u8 {
self.iso.month()
}
/// Returns the `day` value of `MonthDay`.
#[inline]
#[must_use]
pub fn day(&self) -> u8 {
self.iso.day()
}
/// Returns a reference to `MonthDay`'s `CalendarSlot`
#[inline]
#[must_use]
pub fn calendar(&self) -> &CalendarSlot {
&self.calendar
}
@ -49,3 +65,20 @@ impl IsoDateSlots for MonthDay {
self.iso
}
}
impl FromStr for MonthDay {
type Err = TemporalError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let record = crate::parser::parse_month_day(s)?;
let calendar = record.calendar.unwrap_or("iso8601".into());
Self::new(
record.date.month,
record.date.day,
CalendarSlot::Identifier(calendar),
ArithmeticOverflow::Reject,
)
}
}

36
core/temporal/src/components/year_month.rs

@ -1,10 +1,12 @@
//! This module implements `YearMonth` and any directly related algorithms.
use std::str::FromStr;
use crate::{
components::calendar::CalendarSlot,
iso::{IsoDate, IsoDateSlots},
options::ArithmeticOverflow,
TemporalResult,
TemporalError, TemporalResult,
};
/// The native Rust implementation of `Temporal.YearMonth`.
@ -36,6 +38,20 @@ impl YearMonth {
Ok(Self::new_unchecked(iso, calendar))
}
/// Returns the `year` value for this `YearMonth`.
#[inline]
#[must_use]
pub fn year(&self) -> i32 {
self.iso.year()
}
/// Returns the `month` value for this `YearMonth`.
#[inline]
#[must_use]
pub fn month(&self) -> u8 {
self.iso.month()
}
#[inline]
#[must_use]
/// Returns a reference to `YearMonth`'s `CalendarSlot`
@ -51,3 +67,21 @@ impl IsoDateSlots for YearMonth {
self.iso
}
}
impl FromStr for YearMonth {
type Err = TemporalError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let record = crate::parser::parse_year_month(s)?;
let calendar = record.calendar.unwrap_or("iso8601".into());
Self::new(
record.date.year,
record.date.month,
None,
CalendarSlot::Identifier(calendar),
ArithmeticOverflow::Reject,
)
}
}

77
core/temporal/src/parser/annotations.rs

@ -1,5 +1,6 @@
/// Parsing for Temporal's `Annotations`.
use crate::{
assert_syntax,
parser::{
grammar::{
is_a_key_char, is_a_key_leading_char, is_annotation_close,
@ -38,10 +39,10 @@ pub(crate) fn parse_annotation_set(
) -> TemporalResult<AnnotationSet> {
// Parse the first annotation.
let tz_annotation = time_zone::parse_ambiguous_tz_annotation(cursor)?;
if tz_annotation.is_none() && zoned {
return Err(TemporalError::syntax()
.with_message("iso8601 ZonedDateTime requires a TimeZoneAnnotation."));
return Err(
TemporalError::syntax().with_message("ZonedDateTime must have a TimeZone annotation.")
);
}
// Parse any `Annotations`
@ -97,32 +98,27 @@ pub(crate) fn parse_annotations(cursor: &mut Cursor) -> TemporalResult<Recognize
/// Parse an annotation with an `AnnotationKey`=`AnnotationValue` pair.
fn parse_kv_annotation(cursor: &mut Cursor) -> TemporalResult<KeyValueAnnotation> {
debug_assert!(cursor.check_or(false, is_annotation_open));
let potential_critical = cursor.next().ok_or_else(TemporalError::abrupt_end)?;
let (leading_char, critical) = if is_critical_flag(potential_critical) {
(cursor.next().ok_or_else(TemporalError::abrupt_end)?, true)
} else {
(potential_critical, false)
};
assert_syntax!(
is_annotation_open(cursor.abrupt_next()?),
"Invalid annotation open character."
);
if !is_a_key_leading_char(leading_char) {
return Err(TemporalError::syntax().with_message("Invalid AnnotationKey leading character"));
}
let critical = cursor.check_or(false, is_critical_flag);
cursor.advance_if(critical);
// Parse AnnotationKey.
let annotation_key = parse_annotation_key(cursor)?;
debug_assert!(cursor.check_or(false, is_annotation_key_value_separator));
// Advance past the '=' character.
cursor.advance();
assert_syntax!(
is_annotation_key_value_separator(cursor.abrupt_next()?),
"Invalid annotation key-value separator"
);
// Parse AnnotationValue.
let annotation_value = parse_annotation_value(cursor)?;
// Assert that we are at the annotation close and advance cursor past annotation to close.
debug_assert!(cursor.check_or(false, is_annotation_close));
cursor.advance();
assert_syntax!(
is_annotation_close(cursor.abrupt_next()?),
"Invalid annotion closing character"
);
Ok(KeyValueAnnotation {
key: annotation_key,
@ -134,16 +130,22 @@ fn parse_kv_annotation(cursor: &mut Cursor) -> TemporalResult<KeyValueAnnotation
/// Parse an `AnnotationKey`.
fn parse_annotation_key(cursor: &mut Cursor) -> TemporalResult<String> {
let key_start = cursor.pos();
assert_syntax!(
is_a_key_leading_char(cursor.abrupt_next()?),
"Invalid key leading character."
);
while let Some(potential_key_char) = cursor.next() {
// End of key.
if is_annotation_key_value_separator(potential_key_char) {
if cursor.check_or(false, is_annotation_key_value_separator) {
// Return found key
return Ok(cursor.slice(key_start, cursor.pos()));
}
if !is_a_key_char(potential_key_char) {
return Err(TemporalError::syntax().with_message("Invalid AnnotationKey Character"));
}
assert_syntax!(
is_a_key_char(potential_key_char),
"Invalid annotation key character."
);
}
Err(TemporalError::abrupt_end())
@ -152,29 +154,26 @@ fn parse_annotation_key(cursor: &mut Cursor) -> TemporalResult<String> {
/// Parse an `AnnotationValue`.
fn parse_annotation_value(cursor: &mut Cursor) -> TemporalResult<String> {
let value_start = cursor.pos();
cursor.advance();
while let Some(potential_value_char) = cursor.next() {
if is_annotation_close(potential_value_char) {
if cursor.check_or(false, is_annotation_close) {
// Return the determined AnnotationValue.
return Ok(cursor.slice(value_start, cursor.pos()));
}
if is_hyphen(potential_value_char) {
if !cursor
.peek_n(1)
.map_or(false, is_annotation_value_component)
{
return Err(TemporalError::syntax()
.with_message("Missing AttributeValueComponent after '-'"));
}
assert_syntax!(
cursor.peek().map_or(false, is_annotation_value_component),
"Missing annotation value compoenent after '-'"
);
cursor.advance();
continue;
}
if !is_annotation_value_component(potential_value_char) {
return Err(
TemporalError::syntax().with_message("Invalid character in AnnotationValue")
);
}
assert_syntax!(
is_annotation_value_component(potential_value_char),
"Invalid annotation value component character."
);
}
Err(TemporalError::abrupt_end())

165
core/temporal/src/parser/date_time.rs → core/temporal/src/parser/datetime.rs

@ -1,6 +1,7 @@
//! Parsing for Temporal's ISO8601 `Date` and `DateTime`.
use crate::{
assert_syntax,
parser::{
annotations,
grammar::{is_date_time_separator, is_sign, is_utc_designator},
@ -67,13 +68,14 @@ pub(crate) fn parse_annotated_date_time(
// Peek Annotation presence
// Throw error if annotation does not exist and zoned is true, else return.
let annotation_check = cursor.check_or(false, is_annotation_open);
if !annotation_check {
if !cursor.check_or(false, is_annotation_open) {
if flags.contains(DateTimeFlags::ZONED) {
return Err(TemporalError::syntax()
.with_message("ZonedDateTime must have a TimeZoneAnnotation."));
}
cursor.close()?;
return Ok(IsoParseRecord {
date: date_time.date,
time: date_time.time,
@ -101,6 +103,8 @@ pub(crate) fn parse_annotated_date_time(
None
};
cursor.close()?;
Ok(IsoParseRecord {
date: date_time.date,
time: date_time.time,
@ -134,10 +138,7 @@ fn parse_date_time(
let time = time::parse_time_spec(cursor)?;
let time_zone = if cursor
.check(|ch| is_sign(ch) || is_utc_designator(ch))
.unwrap_or(false)
{
let time_zone = if cursor.check_or(false, |ch| is_sign(ch) || is_utc_designator(ch)) {
Some(time_zone::parse_date_time_utc(cursor)?)
} else {
if utc_required {
@ -156,93 +157,40 @@ fn parse_date_time(
/// Parses `Date` record.
fn parse_date(cursor: &mut Cursor) -> TemporalResult<DateRecord> {
let year = parse_date_year(cursor)?;
let divided = cursor
let hyphenated = cursor
.check(is_hyphen)
.ok_or_else(TemporalError::abrupt_end)?;
if divided {
cursor.advance();
}
cursor.advance_if(hyphenated);
let month = parse_date_month(cursor)?;
if cursor.check_or(false, is_hyphen) {
if !divided {
return Err(TemporalError::syntax().with_message("Invalid date separator"));
}
cursor.advance();
if hyphenated {
assert_syntax!(cursor.check_or(false, is_hyphen), "Invalid hyphen usage.");
}
cursor.advance_if(cursor.check_or(false, is_hyphen));
let day = parse_date_day(cursor)?;
Ok(DateRecord { year, month, day })
}
/// Determines if the string can be parsed as a `DateSpecYearMonth`.
pub(crate) fn peek_year_month(cursor: &Cursor) -> TemporalResult<bool> {
let mut ym_peek = if is_sign(cursor.peek().ok_or_else(TemporalError::abrupt_end)?) {
7
} else {
4
};
if cursor
.peek_n(ym_peek)
.map(is_hyphen)
.ok_or_else(TemporalError::abrupt_end)?
{
ym_peek += 1;
}
ym_peek += 2;
if cursor.peek_n(ym_peek).map_or(true, is_annotation_open) {
Ok(true)
} else {
Ok(false)
}
}
// ==== `YearMonth` and `MonthDay` parsing functions ====
/// Parses a `DateSpecYearMonth`
pub(crate) fn parse_year_month(cursor: &mut Cursor) -> TemporalResult<(i32, i32)> {
let year = parse_date_year(cursor)?;
if cursor.check_or(false, is_hyphen) {
cursor.advance();
}
cursor.advance_if(cursor.check_or(false, is_hyphen));
let month = parse_date_month(cursor)?;
Ok((year, month))
}
/// Determines if the string can be parsed as a `DateSpecYearMonth`.
pub(crate) fn peek_month_day(cursor: &Cursor) -> TemporalResult<bool> {
let mut md_peek = if cursor
.peek_n(1)
.map(is_hyphen)
.ok_or_else(TemporalError::abrupt_end)?
{
4
} else {
2
};
if cursor
.peek_n(md_peek)
.map(is_hyphen)
.ok_or_else(TemporalError::abrupt_end)?
{
md_peek += 1;
}
md_peek += 2;
assert_syntax!(
cursor.check_or(true, is_annotation_open),
"Expected an end or AnnotationOpen"
);
if cursor.peek_n(md_peek).map_or(true, is_annotation_open) {
Ok(true)
} else {
Ok(false)
}
Ok((year, month))
}
/// Parses a `DateSpecMonthDay`
@ -251,7 +199,7 @@ pub(crate) fn parse_month_day(cursor: &mut Cursor) -> TemporalResult<(i32, i32)>
.check(is_hyphen)
.ok_or_else(TemporalError::abrupt_end)?;
let dash_two = cursor
.peek_n(1)
.peek()
.map(is_hyphen)
.ok_or_else(TemporalError::abrupt_end)?;
@ -262,38 +210,36 @@ pub(crate) fn parse_month_day(cursor: &mut Cursor) -> TemporalResult<(i32, i32)>
}
let month = parse_date_month(cursor)?;
if cursor.check_or(false, is_hyphen) {
cursor.advance();
}
cursor.advance_if(cursor.check_or(false, is_hyphen));
let day = parse_date_day(cursor)?;
assert_syntax!(
cursor.check_or(true, is_annotation_open),
"Expected an end or AnnotationOpen"
);
Ok((month, day))
}
// ==== Unit Parsers ====
fn parse_date_year(cursor: &mut Cursor) -> TemporalResult<i32> {
if is_sign(cursor.peek().ok_or_else(TemporalError::abrupt_end)?) {
if cursor.check_or(false, is_sign) {
let sign = if cursor.expect_next() == '+' { 1 } else { -1 };
let year_start = cursor.pos();
let sign = if cursor.check_or(false, |ch| ch == '+') {
1
} else {
-1
};
cursor.advance();
for _ in 0..6 {
let year_digit = cursor.peek().ok_or_else(TemporalError::abrupt_end)?;
if !year_digit.is_ascii_digit() {
return Err(TemporalError::syntax().with_message("DateYear must contain digit"));
}
cursor.advance();
let year_digit = cursor.abrupt_next()?;
assert_syntax!(
year_digit.is_ascii_digit(),
"Year must be made up of digits."
);
}
let year_string = cursor.slice(year_start + 1, cursor.pos());
let year_value = year_string
let year_value = cursor
.slice(year_start, cursor.pos())
.parse::<i32>()
.map_err(|e| TemporalError::syntax().with_message(e.to_string()))?;
@ -304,21 +250,28 @@ fn parse_date_year(cursor: &mut Cursor) -> TemporalResult<i32> {
return Err(TemporalError::syntax().with_message("Cannot have negative 0 years."));
}
return Ok(sign * year_value);
let year = sign * year_value;
if !(-271_820..=275_760).contains(&year) {
return Err(TemporalError::range()
.with_message("Year is outside of the minimum supported range."));
}
return Ok(year);
}
let year_start = cursor.pos();
for _ in 0..4 {
let year_digit = cursor.peek().ok_or_else(TemporalError::abrupt_end)?;
if !year_digit.is_ascii_digit() {
return Err(TemporalError::syntax().with_message("DateYear must contain digit"));
}
cursor.advance();
let year_digit = cursor.abrupt_next()?;
assert_syntax!(
year_digit.is_ascii_digit(),
"Year must be made up of digits."
);
}
let year_string = cursor.slice(year_start, cursor.pos());
let year_value = year_string
let year_value = cursor
.slice(year_start, cursor.pos())
.parse::<i32>()
.map_err(|e| TemporalError::syntax().with_message(e.to_string()))?;
@ -326,25 +279,33 @@ fn parse_date_year(cursor: &mut Cursor) -> TemporalResult<i32> {
}
fn parse_date_month(cursor: &mut Cursor) -> TemporalResult<i32> {
let start = cursor.pos();
for _ in 0..2 {
let digit = cursor.abrupt_next()?;
assert_syntax!(digit.is_ascii_digit(), "Month must be a digit");
}
let month_value = cursor
.slice(cursor.pos(), cursor.pos() + 2)
.slice(start, cursor.pos())
.parse::<i32>()
.map_err(|e| TemporalError::syntax().with_message(e.to_string()))?;
if !(1..=12).contains(&month_value) {
return Err(TemporalError::syntax().with_message("DateMonth must be in a range of 1-12"));
}
cursor.advance_n(2);
Ok(month_value)
}
fn parse_date_day(cursor: &mut Cursor) -> TemporalResult<i32> {
let start = cursor.pos();
for _ in 0..2 {
let digit = cursor.abrupt_next()?;
assert_syntax!(digit.is_ascii_digit(), "Date must be a digit");
}
let day_value = cursor
.slice(cursor.pos(), cursor.pos() + 2)
.slice(start, cursor.pos())
.parse::<i32>()
.map_err(|e| TemporalError::syntax().with_message(e.to_string()))?;
if !(1..=31).contains(&day_value) {
return Err(TemporalError::syntax().with_message("DateDay must be in a range of 1-31"));
}
cursor.advance_n(2);
Ok(day_value)
}

61
core/temporal/src/parser/duration.rs

@ -1,4 +1,5 @@
use crate::{
assert_syntax,
parser::{
grammar::{
is_day_designator, is_decimal_separator, is_duration_designator, is_hour_designator,
@ -57,23 +58,15 @@ pub(crate) fn parse_duration(cursor: &mut Cursor) -> TemporalResult<DurationPars
.check(is_sign)
.ok_or_else(TemporalError::abrupt_end)?
{
let sign = cursor.check_or(false, |ch| ch == '+');
cursor.advance();
sign
cursor.expect_next() == '+'
} else {
true
};
if !cursor
.check(is_duration_designator)
.ok_or_else(TemporalError::abrupt_end)?
{
return Err(
TemporalError::syntax().with_message("DurationString missing DurationDesignator.")
);
}
cursor.advance();
assert_syntax!(
is_duration_designator(cursor.abrupt_next()?),
"DurationDisgnator is missing."
);
let date = if cursor.check_or(false, is_time_designator) {
Some(DateDuration::default())
@ -88,9 +81,7 @@ pub(crate) fn parse_duration(cursor: &mut Cursor) -> TemporalResult<DurationPars
None
};
if cursor.peek().is_some() {
return Err(TemporalError::syntax().with_message("Unrecognized value in DurationString."));
}
cursor.close()?;
Ok(DurationParseRecord {
sign,
@ -115,8 +106,10 @@ pub(crate) fn parse_date_duration(cursor: &mut Cursor) -> TemporalResult<DateDur
while cursor.check_or(false, |ch| ch.is_ascii_digit()) {
let digit_start = cursor.pos();
while cursor.check_or(false, |ch| ch.is_ascii_digit()) {
cursor.advance();
while cursor.next().is_some() {
if !cursor.check_or(false, |ch| ch.is_ascii_digit()) {
break;
}
}
let value = cursor
@ -124,7 +117,7 @@ pub(crate) fn parse_date_duration(cursor: &mut Cursor) -> TemporalResult<DateDur
.parse::<i32>()
.map_err(|err| TemporalError::syntax().with_message(err.to_string()))?;
match cursor.peek() {
match cursor.next() {
Some(ch) if is_year_designator(ch) => {
if previous_unit > DateUnit::Year {
return Err(
@ -163,8 +156,6 @@ pub(crate) fn parse_date_duration(cursor: &mut Cursor) -> TemporalResult<DateDur
}
Some(_) | None => return Err(TemporalError::abrupt_end()),
}
cursor.advance();
}
Ok(date)
@ -181,19 +172,20 @@ enum TimeUnit {
pub(crate) fn parse_time_duration(cursor: &mut Cursor) -> TemporalResult<TimeDuration> {
let mut time = TimeDuration::default();
if !cursor.check_or(false, |ch| ch.is_ascii()) {
return Err(
TemporalError::syntax().with_message("No time values provided after TimeDesignator.")
);
}
assert_syntax!(
cursor.check_or(false, |ch| ch.is_ascii_digit()),
"TimeDuration designator must have values after."
);
let mut previous_unit = TimeUnit::None;
let mut fraction_present = false;
while cursor.check_or(false, |ch| ch.is_ascii_digit()) {
let digit_start = cursor.pos();
while cursor.check_or(false, |ch| ch.is_ascii_digit()) {
cursor.advance();
while cursor.next().is_some() {
if !cursor.check_or(false, |ch| ch.is_ascii_digit()) {
break;
}
}
let value = cursor
@ -208,7 +200,7 @@ pub(crate) fn parse_time_duration(cursor: &mut Cursor) -> TemporalResult<TimeDur
0.0
};
match cursor.peek() {
match cursor.next() {
Some(ch) if is_hour_designator(ch) => {
if previous_unit > TimeUnit::Hour {
return Err(
@ -242,14 +234,11 @@ pub(crate) fn parse_time_duration(cursor: &mut Cursor) -> TemporalResult<TimeDur
Some(_) | None => return Err(TemporalError::abrupt_end()),
}
cursor.advance();
if fraction_present {
if cursor.check_or(false, |ch| ch.is_ascii_digit()) {
return Err(TemporalError::syntax()
.with_message("Invalid TimeDuration continuation after FractionPart."));
}
assert_syntax!(
cursor.check_or(true, |ch| !ch.is_ascii_digit()),
"Invalid duration value provided after fraction."
);
break;
}
}

227
core/temporal/src/parser/mod.rs

@ -1,40 +1,113 @@
//! This module implements parsing for ISO 8601 grammar.
use crate::TemporalResult;
use crate::{TemporalError, TemporalResult};
use date_time::DateRecord;
use datetime::DateRecord;
use nodes::{IsoDate, IsoDateTime, IsoTime, TimeZone};
use time::TimeSpec;
mod annotations;
pub(crate) mod date_time;
pub(crate) mod datetime;
pub(crate) mod duration;
mod grammar;
mod nodes;
mod time;
pub(crate) mod time_zone;
use self::date_time::DateTimeFlags;
use self::{datetime::DateTimeFlags, grammar::is_annotation_open};
#[cfg(test)]
mod tests;
// TODO: optimize where possible.
/// `assert_syntax!` is a parser specific utility macro for asserting a syntax test, and returning a
/// `SyntaxError` with the provided message if the test fails.
#[macro_export]
macro_rules! assert_syntax {
($cond:expr, $msg:literal) => {
if !$cond {
return Err(TemporalError::syntax().with_message($msg));
}
};
}
/// A utility function for parsing a `DateTime` string
pub(crate) fn parse_date_time(target: &str) -> TemporalResult<IsoParseRecord> {
date_time::parse_annotated_date_time(DateTimeFlags::empty(), &mut Cursor::new(target))
datetime::parse_annotated_date_time(DateTimeFlags::empty(), &mut Cursor::new(target))
}
/// A utility function for parsing an `Instant` string
#[allow(unused)]
pub(crate) fn parse_instant(target: &str) -> TemporalResult<IsoParseRecord> {
date_time::parse_annotated_date_time(
datetime::parse_annotated_date_time(
DateTimeFlags::UTC_REQ | DateTimeFlags::TIME_REQ,
&mut Cursor::new(target),
)
}
/// A utility function for parsing a `YearMonth` string
pub(crate) fn parse_year_month(target: &str) -> TemporalResult<IsoParseRecord> {
let mut cursor = Cursor::new(target);
let ym = datetime::parse_year_month(&mut cursor);
let Ok(year_month) = ym else {
cursor.pos = 0;
return datetime::parse_annotated_date_time(DateTimeFlags::empty(), &mut cursor);
};
let calendar = if cursor.check_or(false, is_annotation_open) {
let set = annotations::parse_annotation_set(false, &mut cursor)?;
set.calendar
} else {
None
};
cursor.close()?;
Ok(IsoParseRecord {
date: DateRecord {
year: year_month.0,
month: year_month.1,
day: 1,
},
time: None,
tz: None,
calendar,
})
}
/// A utilty function for parsing a `MonthDay` String.
pub(crate) fn parse_month_day(target: &str) -> TemporalResult<IsoParseRecord> {
let mut cursor = Cursor::new(target);
let md = datetime::parse_month_day(&mut cursor);
let Ok(month_day) = md else {
cursor.pos = 0;
return datetime::parse_annotated_date_time(DateTimeFlags::empty(), &mut cursor);
};
let calendar = if cursor.check_or(false, is_annotation_open) {
let set = annotations::parse_annotation_set(false, &mut cursor)?;
set.calendar
} else {
None
};
cursor.close()?;
Ok(IsoParseRecord {
date: DateRecord {
year: 0,
month: month_day.0,
day: month_day.1,
},
time: None,
tz: None,
calendar,
})
}
/// An `IsoParseRecord` is an intermediary record returned by ISO parsing functions.
///
/// `IsoParseRecord` is converted into the ISO AST Nodes.
@ -70,94 +143,6 @@ impl TemporalTimeZoneString {
}
}
/// Parse a [`TemporalYearMonthString`][proposal]
///
/// [proposal]: https://tc39.es/proposal-temporal/#prod-TemporalYearMonthString
#[derive(Debug, Clone, Copy)]
pub struct TemporalYearMonthString;
impl TemporalYearMonthString {
/// Parses a targeted string as a `YearMonth`
///
/// # Errors
///
/// The parse will error if the provided target is not valid
/// Iso8601 grammar.
pub fn parse(cursor: &mut Cursor) -> TemporalResult<IsoDate> {
// TODO: Remove peek in favor of AnnotatedDateTime flag.
if date_time::peek_year_month(cursor)? {
let ym = date_time::parse_year_month(cursor)?;
let calendar = if cursor.check_or(false, |ch| ch == '[') {
let set = annotations::parse_annotation_set(false, cursor)?;
set.calendar
} else {
None
};
return Ok(IsoDate {
year: ym.0,
month: ym.1,
day: 0,
calendar,
});
}
let parse_record = date_time::parse_annotated_date_time(DateTimeFlags::empty(), cursor)?;
Ok(IsoDate {
year: parse_record.date.year,
month: parse_record.date.month,
day: parse_record.date.day,
calendar: parse_record.calendar,
})
}
}
/// Parse a [`TemporalMonthDayString`][proposal]
///
/// [proposal]: https://tc39.es/proposal-temporal/#prod-TemporalMonthDayString
#[derive(Debug, Clone, Copy)]
pub struct TemporalMonthDayString;
impl TemporalMonthDayString {
/// Parses a targeted string as a `MonthDay`.
///
/// # Errors
///
/// The parse will error if the provided target is not valid
/// Iso8601 grammar.
pub fn parse(cursor: &mut Cursor) -> TemporalResult<IsoDate> {
// TODO: Remove peek in favor of AnnotatedDateTime flag.
if date_time::peek_month_day(cursor)? {
let md = date_time::parse_month_day(cursor)?;
let calendar = if cursor.check_or(false, |ch| ch == '[') {
let set = annotations::parse_annotation_set(false, cursor)?;
set.calendar
} else {
None
};
return Ok(IsoDate {
year: 0,
month: md.0,
day: md.1,
calendar,
});
}
let parse_record = date_time::parse_annotated_date_time(DateTimeFlags::empty(), cursor)?;
Ok(IsoDate {
year: parse_record.date.year,
month: parse_record.date.month,
day: parse_record.date.day,
calendar: parse_record.calendar,
})
}
}
/// Parser for a [`TemporalInstantString`][proposal].
///
/// [proposal]: https://tc39.es/proposal-temporal/#prod-TemporalInstantString
@ -172,7 +157,7 @@ impl TemporalInstantString {
/// The parse will error if the provided target is not valid
/// Iso8601 grammar.
pub fn parse(cursor: &mut Cursor) -> TemporalResult<IsoDateTime> {
let parse_record = date_time::parse_annotated_date_time(
let parse_record = datetime::parse_annotated_date_time(
DateTimeFlags::UTC_REQ | DateTimeFlags::TIME_REQ,
cursor,
)?;
@ -225,13 +210,9 @@ impl Cursor {
self.pos
}
/// Peek the value at the current position.
/// Peek the value at next position (current + 1).
fn peek(&self) -> Option<char> {
if (self.pos as usize) < self.source.len() {
Some(self.source[self.pos as usize])
} else {
None
}
self.peek_n(1)
}
/// Peek the value at n len from current.
@ -244,25 +225,41 @@ impl Cursor {
}
}
/// Returns boolean if current position passes check.
/// Runs the provided check on the current position.
fn check<F>(&self, f: F) -> Option<bool>
where
F: FnOnce(char) -> bool,
{
self.peek().map(f)
self.peek_n(0).map(f)
}
/// Returns boolean if current position passes check or default if None.
/// Runs the provided check on current position returns the default value if None.
fn check_or<F>(&self, default: bool, f: F) -> bool
where
F: FnOnce(char) -> bool,
{
self.peek().map_or(default, f)
self.peek_n(0).map_or(default, f)
}
/// Advances the cursor's position and returns the new character.
/// Returns `Cursor`'s current char and advances to the next position.
fn next(&mut self) -> Option<char> {
let result = self.peek_n(0);
self.advance();
self.peek()
result
}
/// Utility method that returns next charactor unwrapped char
///
/// # Panics
///
/// This will panic if the next value has not been confirmed to exist.
fn expect_next(&mut self) -> char {
self.next().expect("Invalid use of expect_next.")
}
/// A utility next method that returns an `AbruptEnd` error if invalid.
fn abrupt_next(&mut self) -> TemporalResult<char> {
self.next().ok_or_else(TemporalError::abrupt_end)
}
/// Advances the cursor's position by 1.
@ -270,8 +267,24 @@ impl Cursor {
self.pos += 1;
}
/// Utility function to advance when a condition is true
fn advance_if(&mut self, condition: bool) {
if condition {
self.advance();
}
}
/// Advances the cursor's position by `n`.
fn advance_n(&mut self, n: u32) {
self.pos += n;
}
/// Closes the current cursor by checking if all contents have been consumed. If not, returns an error for invalid syntax.
fn close(&mut self) -> TemporalResult<()> {
if (self.pos as usize) < self.source.len() {
return Err(TemporalError::syntax()
.with_message("Unexpected syntax at the end of an ISO target."));
}
Ok(())
}
}

108
core/temporal/src/parser/tests.rs

@ -1,11 +1,8 @@
use std::str::FromStr;
use crate::{
components::{DateTime, Duration},
parser::{
parse_date_time, Cursor, TemporalInstantString, TemporalMonthDayString,
TemporalYearMonthString,
},
components::{DateTime, Duration, MonthDay, YearMonth},
parser::{parse_date_time, Cursor, TemporalInstantString},
};
#[test]
@ -56,7 +53,10 @@ fn temporal_year_parsing() {
assert_eq!(result_good.iso_date().year(), 2020);
let err_result = bad_year.parse::<DateTime>();
assert!(err_result.is_err());
assert!(
err_result.is_err(),
"Invalid extended year parsing: \"{bad_year}\" should fail to parse."
);
}
#[test]
@ -81,34 +81,37 @@ fn temporal_annotated_date_time() {
#[test]
fn temporal_year_month() {
let possible_year_months = &[
let possible_year_months = [
"+002020-11",
"2020-11[u-ca=iso8601]",
"+00202011",
"202011[u-ca=iso8601]",
"+002020-11-07T12:28:32[!u-ca=iso8601]",
];
for ym in possible_year_months {
let result = TemporalYearMonthString::parse(&mut Cursor::new(ym)).unwrap();
assert_eq!(result.year, 2020);
assert_eq!(result.month, 11);
let result = ym.parse::<YearMonth>().unwrap();
if let Some(calendar) = result.calendar {
assert_eq!(calendar, "iso8601");
}
assert_eq!(result.year(), 2020);
assert_eq!(result.month(), 11);
}
}
#[test]
fn temporal_month_day() {
let possible_month_day = ["11-07", "1107[+04:00]", "--11-07", "--1107[+04:00]"];
let possible_month_day = [
"11-07",
"1107[+04:00]",
"--11-07",
"--1107[+04:00]",
"+002020-11-07T12:28:32[!u-ca=iso8601]",
];
for md in possible_month_day {
let result = TemporalMonthDayString::parse(&mut Cursor::new(md)).unwrap();
let result = md.parse::<MonthDay>().unwrap();
assert_eq!(result.month, 11);
assert_eq!(result.day, 7);
assert_eq!(result.month(), 11);
assert_eq!(result.day(), 7);
}
}
@ -121,8 +124,11 @@ fn temporal_invalid_annotations() {
];
for invalid in invalid_annotations {
let err_result = TemporalMonthDayString::parse(&mut Cursor::new(invalid));
assert!(err_result.is_err());
let err_result = invalid.parse::<MonthDay>();
assert!(
err_result.is_err(),
"Invalid ISO annotation parsing: \"{invalid}\" should fail parsing."
);
}
}
@ -153,7 +159,10 @@ fn temporal_duration_parsing() {
for dur in durations {
let ok_result = Duration::from_str(dur);
assert!(ok_result.is_ok());
assert!(
ok_result.is_ok(),
"Failing to parse a valid ISO 8601 target: \"{dur}\" should pass."
);
}
let sub_second = durations[2].parse::<Duration>().unwrap();
@ -180,6 +189,61 @@ fn temporal_invalid_durations() {
for test in invalids {
let err = test.parse::<Duration>();
assert!(err.is_err());
assert!(
err.is_err(),
"Invalid ISO8601 Duration target: \"{test}\" should fail duration parsing."
);
}
}
#[test]
fn temporal_invalid_iso_datetime_strings() {
// NOTE: The below tests were initially pulled from test262's `argument-string-invalid`
const INVALID_DATETIME_STRINGS: [&str; 34] = [
"", // 1
"invalid iso8601",
"2020-01-00",
"2020-01-32",
"2020-02-30",
"2021-02-29",
"2020-00-01",
"2020-13-01",
"2020-01-01T",
"2020-01-01T25:00:00",
"2020-01-01T01:60:00",
"2020-01-01T01:60:61",
"2020-01-01junk",
"2020-01-01T00:00:00junk",
"2020-01-01T00:00:00+00:00junk",
"2020-01-01T00:00:00+00:00[UTC]junk",
"2020-01-01T00:00:00+00:00[UTC][u-ca=iso8601]junk",
"02020-01-01",
"2020-001-01",
"2020-01-001",
"2020-01-01T001",
"2020-01-01T01:001",
"2020-01-01T01:01:001",
"2020-W01-1",
"2020-001",
"+0002020-01-01",
// TODO: Add the non-existent calendar test back to the test cases.
// may be valid in other contexts, but insufficient information for PlainDate:
"2020-01",
"+002020-01",
"01-01",
"2020-W01",
"P1Y",
"-P12Y",
// valid, but outside the supported range:
"-999999-01-01",
"+999999-01-01",
];
for invalid_target in INVALID_DATETIME_STRINGS {
let error_result = invalid_target.parse::<DateTime>();
assert!(
error_result.is_err(),
"Invalid ISO8601 `DateTime` target: \"{invalid_target}\" should fail parsing."
);
}
}

84
core/temporal/src/parser/time.rs

@ -4,7 +4,7 @@ use super::{
grammar::{is_decimal_separator, is_time_separator},
Cursor,
};
use crate::{TemporalError, TemporalResult};
use crate::{assert_syntax, TemporalError, TemporalResult};
/// Parsed Time info
#[derive(Debug, Default, Clone, Copy)]
@ -22,14 +22,8 @@ pub(crate) struct TimeSpec {
/// Parse `TimeSpec`
pub(crate) fn parse_time_spec(cursor: &mut Cursor) -> TemporalResult<TimeSpec> {
let hour = parse_hour(cursor)?;
let mut separator = false;
if cursor.check_or(false, |ch| is_time_separator(ch) || ch.is_ascii_digit()) {
if cursor.check_or(false, is_time_separator) {
separator = true;
cursor.advance();
}
} else {
if !cursor.check_or(false, |ch| is_time_separator(ch) || ch.is_ascii_digit()) {
return Ok(TimeSpec {
hour,
minute: 0,
@ -38,24 +32,24 @@ pub(crate) fn parse_time_spec(cursor: &mut Cursor) -> TemporalResult<TimeSpec> {
});
}
let separator_present = cursor.check_or(false, is_time_separator);
cursor.advance_if(separator_present);
let minute = parse_minute_second(cursor, false)?;
if cursor.check_or(false, |ch| is_time_separator(ch) || ch.is_ascii_digit()) {
let is_time_separator = cursor.check_or(false, is_time_separator);
if separator && is_time_separator {
cursor.advance();
} else if is_time_separator {
return Err(TemporalError::syntax().with_message("Invalid TimeSeparator"));
}
} else {
if !cursor.check_or(false, |ch| is_time_separator(ch) || ch.is_ascii_digit()) {
return Ok(TimeSpec {
hour,
minute,
second: 0,
fraction: 0.0,
});
} else if cursor.check_or(false, is_time_separator) && !separator_present {
return Err(TemporalError::syntax().with_message("Invalid TimeSeparator"));
}
cursor.advance_if(separator_present);
let second = parse_minute_second(cursor, true)?;
let fraction = if cursor.check_or(false, is_decimal_separator) {
@ -73,22 +67,31 @@ pub(crate) fn parse_time_spec(cursor: &mut Cursor) -> TemporalResult<TimeSpec> {
}
pub(crate) fn parse_hour(cursor: &mut Cursor) -> TemporalResult<u8> {
let start = cursor.pos();
for _ in 0..2 {
let digit = cursor.abrupt_next()?;
assert_syntax!(digit.is_ascii_digit(), "Hour must be a digit.");
}
let hour_value = cursor
.slice(cursor.pos(), cursor.pos() + 2)
.slice(start, cursor.pos())
.parse::<u8>()
.map_err(|e| TemporalError::syntax().with_message(e.to_string()))?;
if !(0..=23).contains(&hour_value) {
return Err(TemporalError::syntax().with_message("Hour must be in a range of 0-23"));
}
cursor.advance_n(2);
Ok(hour_value)
}
// NOTE: `TimeSecond` is a 60 inclusive `MinuteSecond`.
/// Parse `MinuteSecond`
pub(crate) fn parse_minute_second(cursor: &mut Cursor, inclusive: bool) -> TemporalResult<u8> {
let start = cursor.pos();
for _ in 0..2 {
let digit = cursor.abrupt_next()?;
assert_syntax!(digit.is_ascii_digit(), "MinuteSecond must be a digit.");
}
let min_sec_value = cursor
.slice(cursor.pos(), cursor.pos() + 2)
.slice(start, cursor.pos())
.parse::<u8>()
.map_err(|e| TemporalError::syntax().with_message(e.to_string()))?;
@ -96,8 +99,6 @@ pub(crate) fn parse_minute_second(cursor: &mut Cursor, inclusive: bool) -> Tempo
if !valid_range.contains(&min_sec_value) {
return Err(TemporalError::syntax().with_message("MinuteSecond must be in a range of 0-59"));
}
cursor.advance_n(2);
Ok(min_sec_value)
}
@ -106,25 +107,28 @@ pub(crate) fn parse_minute_second(cursor: &mut Cursor, inclusive: bool) -> Tempo
/// This is primarily used in ISO8601 to add percision past
/// a second.
pub(crate) fn parse_fraction(cursor: &mut Cursor) -> TemporalResult<f64> {
// Decimal is skipped by next call.
let mut fraction_components = Vec::from(['.']);
while let Some(ch) = cursor.next() {
if !ch.is_ascii_digit() {
if fraction_components.len() > 10 {
return Err(
TemporalError::syntax().with_message("Fraction exceeds 9 DecimalDigits")
);
}
let fraction_value = fraction_components
.iter()
.collect::<String>()
.parse::<f64>()
.map_err(|e| TemporalError::syntax().with_message(e.to_string()))?;
return Ok(fraction_value);
}
fraction_components.push(ch);
let mut fraction_components = Vec::default();
// Assert that the first char provided is a decimal separator.
assert_syntax!(
is_decimal_separator(cursor.abrupt_next()?),
"fraction must begin with a valid decimal separator."
);
fraction_components.push('.');
while cursor.check_or(false, |ch| ch.is_ascii_digit()) {
fraction_components.push(cursor.abrupt_next()?);
}
Err(TemporalError::abrupt_end())
assert_syntax!(
fraction_components.len() <= 10,
"Fraction component cannot exceed 9 digits."
);
let fraction_value = fraction_components
.iter()
.collect::<String>()
.parse::<f64>()
.map_err(|e| TemporalError::syntax().with_message(e.to_string()))?;
Ok(fraction_value)
}

77
core/temporal/src/parser/time_zone.rs

@ -11,7 +11,7 @@ use super::{
time::{parse_fraction, parse_hour, parse_minute_second},
Cursor,
};
use crate::{TemporalError, TemporalResult};
use crate::{assert_syntax, TemporalError, TemporalResult};
/// A `TimeZoneAnnotation`.
#[derive(Debug, Clone)]
@ -76,22 +76,20 @@ pub(crate) fn parse_ambiguous_tz_annotation(
}
fn parse_tz_annotation(cursor: &mut Cursor) -> TemporalResult<TimeZoneAnnotation> {
debug_assert!(is_annotation_open(cursor.peek().expect("annotation start")));
assert_syntax!(
is_annotation_open(cursor.abrupt_next()?),
"Invalid annotation opening character."
);
let potential_critical = cursor.next().ok_or_else(TemporalError::abrupt_end)?;
let critical = is_critical_flag(potential_critical);
if critical {
cursor.advance();
}
let critical = cursor.check_or(false, is_critical_flag);
cursor.advance_if(critical);
let tz = parse_time_zone(cursor)?;
if !cursor.check_or(false, is_annotation_close) {
return Err(TemporalError::syntax().with_message("Invalid TimeZoneAnnotation."));
}
cursor.advance();
assert_syntax!(
is_annotation_close(cursor.abrupt_next()?),
"Invalid annotation closing character."
);
Ok(TimeZoneAnnotation { critical, tz })
}
@ -122,21 +120,26 @@ pub(crate) fn parse_time_zone(cursor: &mut Cursor) -> TemporalResult<TimeZone> {
fn parse_tz_iana_name(cursor: &mut Cursor) -> TemporalResult<TimeZone> {
let tz_name_start = cursor.pos();
while let Some(potential_value_char) = cursor.next() {
if is_tz_name_separator(potential_value_char) {
if !cursor.peek_n(1).map_or(false, is_tz_char) {
return Err(TemporalError::syntax()
.with_message("Missing TimeZoneIANANameComponent after '/'"));
}
continue;
}
if !is_tz_char(potential_value_char) {
if cursor.check_or(false, is_annotation_close) {
// Return the valid TimeZoneIANAName
return Ok(TimeZone {
name: Some(cursor.slice(tz_name_start, cursor.pos())),
offset: None,
});
}
if is_tz_name_separator(potential_value_char) {
assert_syntax!(
cursor.peek_n(2).map_or(false, is_tz_char),
"Missing IANA name component after '/'"
);
continue;
}
assert_syntax!(
is_tz_char(potential_value_char),
"Invalid TimeZone IANA name character."
);
}
Err(TemporalError::abrupt_end())
@ -158,14 +161,12 @@ pub(crate) fn parse_date_time_utc(cursor: &mut Cursor) -> TemporalResult<TimeZon
let mut utc_to_minute = parse_utc_offset_minute_precision(cursor)?;
if cursor.check_or(false, is_time_separator) {
if !separated {
return Err(TemporalError::syntax().with_message("Unexpected TimeSeparator"));
}
cursor.advance();
if cursor.check_or(false, is_time_separator) && !separated {
return Err(TemporalError::syntax().with_message("Invalid time separator in UTC offset."));
}
cursor.advance_if(cursor.check_or(false, is_time_separator));
// Return early on None or next char an AnnotationOpen.
// Return early on None or AnnotationOpen.
if cursor.check_or(true, is_annotation_open) {
return Ok(TimeZone {
name: None,
@ -192,22 +193,19 @@ pub(crate) fn parse_date_time_utc(cursor: &mut Cursor) -> TemporalResult<TimeZon
/// Parse an `UtcOffsetMinutePrecision` node
pub(crate) fn parse_utc_offset_minute_precision(cursor: &mut Cursor) -> TemporalResult<UTCOffset> {
let sign = if let Some(ch) = cursor.next() {
if ch == '+' {
1_i8
let sign = if cursor.check_or(false, is_sign) {
if cursor.expect_next() == '+' {
1
} else {
-1_i8
-1
}
} else {
return Err(TemporalError::abrupt_end());
1
};
let hour = parse_hour(cursor)?;
// If at the end of the utc, then return.
if cursor
.check(|ch| !(ch.is_ascii_digit() || is_time_separator(ch)))
.ok_or_else(TemporalError::abrupt_end)?
{
if !cursor.check_or(false, |ch| ch.is_ascii_digit() || is_time_separator(ch)) {
return Ok(UTCOffset {
sign,
hour,
@ -216,11 +214,8 @@ pub(crate) fn parse_utc_offset_minute_precision(cursor: &mut Cursor) -> Temporal
fraction: 0.0,
});
}
// Advance cursor beyond any TimeSeparator
if cursor.check_or(false, is_time_separator) {
cursor.advance();
}
cursor.advance_if(cursor.check_or(false, is_time_separator));
let minute = parse_minute_second(cursor, false)?;

Loading…
Cancel
Save