The complete pretokenisation grammar
The machine-readable Pest grammar for pretokenisation is presented here for convenience.
See Parsing Expression Grammar notation for an explanation of the notation.
This version of the grammar uses Pest's PUSH
, PEEK
, and POP
for the Raw_double_quoted_literal
definitions.
ANY
, PATTERN_WHITE_SPACE
, XID_START
, and XID_CONTINUE
are built in to Pest and so not defined below.
PRETOKEN_2015 = {
Whitespace |
Line_comment |
Block_comment |
Unterminated_block_comment |
Single_quoted_literal |
Double_quoted_literal_2015 |
Raw_double_quoted_literal_2015 |
Unterminated_literal_2015 |
Float_literal |
Reserved_float |
Integer_literal |
Lifetime_or_label |
Raw_identifier |
Reserved_prefix_2015 |
Identifier |
Punctuation
}
PRETOKEN_2021 = {
Whitespace |
Line_comment |
Block_comment |
Unterminated_block_comment |
Single_quoted_literal |
Double_quoted_literal_2021 |
Raw_double_quoted_literal_2021 |
Reserved_literal_2021 |
Float_literal |
Reserved_float |
Integer_literal |
Raw_lifetime_or_label_2021 |
Reserved_lifetime_or_label_prefix_2021 |
Lifetime_or_label |
Raw_identifier |
Reserved_prefix_2021 |
Identifier |
Punctuation
}
PRETOKEN_2024 = {
Whitespace |
Line_comment |
Block_comment |
Unterminated_block_comment |
Single_quoted_literal |
Double_quoted_literal_2021 |
Raw_double_quoted_literal_2021 |
Reserved_literal_2021 |
Reserved_guard_2024 |
Float_literal |
Reserved_float |
Integer_literal |
Raw_lifetime_or_label_2021 |
Reserved_lifetime_or_label_prefix_2021 |
Lifetime_or_label |
Raw_identifier |
Reserved_prefix_2021 |
Identifier |
Punctuation
}
IDENT = { IDENT_START ~ XID_CONTINUE * }
IDENT_START = { XID_START | "_" }
SUFFIX = { IDENT }
Whitespace = { PATTERN_WHITE_SPACE + }
Line_comment = { "//" ~ LINE_COMMENT_CONTENT }
LINE_COMMENT_CONTENT = { ( !"\n" ~ ANY )* }
Block_comment = { "/*" ~ BLOCK_COMMENT_CONTENT ~ "*/" }
BLOCK_COMMENT_CONTENT = { ( Block_comment | !"*/" ~ !"/*" ~ ANY ) * }
Unterminated_block_comment = { "/*" }
Single_quoted_literal = {
SQ_PREFIX ~
"'" ~ SQ_CONTENT ~ "'" ~
SUFFIX ?
}
SQ_PREFIX = { "b" ? }
SQ_CONTENT = {
"\\" ~ ANY ~ ( !"'" ~ ANY ) * |
!"'" ~ ANY
}
Double_quoted_literal_2015 = { DQ_PREFIX_2015 ~ DQ_REMAINDER }
Double_quoted_literal_2021 = { DQ_PREFIX_2021 ~ DQ_REMAINDER }
DQ_PREFIX_2015 = { "b" ? }
DQ_PREFIX_2021 = { ( "b" | "c" ) ? }
DQ_REMAINDER = {
"\"" ~ DQ_CONTENT ~ "\"" ~
SUFFIX ?
}
DQ_CONTENT = {
(
"\\" ~ ANY |
!"\"" ~ ANY
) *
}
Raw_double_quoted_literal_2015 = { RAW_DQ_PREFIX_2015 ~ RAW_DQ_REMAINDER }
Raw_double_quoted_literal_2021 = { RAW_DQ_PREFIX_2021 ~ RAW_DQ_REMAINDER }
RAW_DQ_PREFIX_2015 = { "r" | "br" }
RAW_DQ_PREFIX_2021 = { "r" | "br" | "cr" }
RAW_DQ_REMAINDER = {
PUSH(HASHES) ~
"\"" ~ RAW_DQ_CONTENT ~ "\"" ~
POP ~
SUFFIX ?
}
RAW_DQ_CONTENT = {
( !("\"" ~ PEEK) ~ ANY ) *
}
HASHES = { "#" {0, 255} }
Unterminated_literal_2015 = { "r\"" | "br\"" | "b'" }
Reserved_literal_2021 = { IDENT ~ ( "\"" | "'" ) }
Reserved_guard_2024 = { "##" | "#\"" }
DECIMAL_DIGITS = { ('0'..'9' | "_") * }
HEXADECIMAL_DIGITS = { ('0'..'9' | 'a' .. 'f' | 'A' .. 'F' | "_") * }
LOW_BASE_PRETOKEN_DIGITS = { DECIMAL_DIGITS }
DECIMAL_PART = { '0'..'9' ~ DECIMAL_DIGITS }
Float_literal = {
FLOAT_BODY_WITH_EXPONENT ~ SUFFIX ? |
FLOAT_BODY_WITHOUT_EXPONENT ~ !("e"|"E") ~ SUFFIX ? |
FLOAT_BODY_WITH_FINAL_DOT ~ !"." ~ !IDENT_START
}
FLOAT_BODY_WITH_EXPONENT = {
DECIMAL_PART ~ ("." ~ DECIMAL_PART ) ? ~
("e"|"E") ~ ("+"|"-") ? ~ EXPONENT_DIGITS
}
EXPONENT_DIGITS = { "_" * ~ '0'..'9' ~ DECIMAL_DIGITS }
FLOAT_BODY_WITHOUT_EXPONENT = {
DECIMAL_PART ~ "." ~ DECIMAL_PART
}
FLOAT_BODY_WITH_FINAL_DOT = {
DECIMAL_PART ~ "."
}
Reserved_float = {
RESERVED_FLOAT_EMPTY_EXPONENT | RESERVED_FLOAT_BASED
}
RESERVED_FLOAT_EMPTY_EXPONENT = {
DECIMAL_PART ~ ("." ~ DECIMAL_PART ) ? ~
("e"|"E") ~ ("+"|"-") ?
}
RESERVED_FLOAT_BASED = {
(
("0b" | "0o") ~ LOW_BASE_PRETOKEN_DIGITS |
"0x" ~ HEXADECIMAL_DIGITS
) ~ (
("e"|"E") |
"." ~ !"." ~ !IDENT_START
)
}
Integer_literal = {
( INTEGER_BINARY_LITERAL |
INTEGER_OCTAL_LITERAL |
INTEGER_HEXADECIMAL_LITERAL |
INTEGER_DECIMAL_LITERAL ) ~
SUFFIX_NO_E ?
}
INTEGER_BINARY_LITERAL = { "0b" ~ LOW_BASE_PRETOKEN_DIGITS }
INTEGER_OCTAL_LITERAL = { "0o" ~ LOW_BASE_PRETOKEN_DIGITS }
INTEGER_HEXADECIMAL_LITERAL = { "0x" ~ HEXADECIMAL_DIGITS }
INTEGER_DECIMAL_LITERAL = { DECIMAL_PART }
SUFFIX_NO_E = { !("e"|"E") ~ SUFFIX }
Raw_lifetime_or_label_2021 = { "'r#" ~ IDENT ~ !"'" }
Reserved_lifetime_or_label_prefix_2021 = { "'" ~ IDENT ~ "#" }
Lifetime_or_label = { "'" ~ IDENT ~ !"'" }
Raw_identifier = { "r#" ~ IDENT }
Reserved_prefix_2015 = { "r#" | "br#" }
Reserved_prefix_2021 = { IDENT ~ "#" }
Identifier = { IDENT }
Punctuation = {
";" |
"," |
"." |
"(" |
")" |
"{" |
"}" |
"[" |
"]" |
"@" |
"#" |
"~" |
"?" |
":" |
"$" |
"=" |
"!" |
"<" |
">" |
"-" |
"&" |
"|" |
"+" |
"*" |
"/" |
"^" |
"%"
}