boon now included
This commit is contained in:
197
validator/src/ecma.rs
Normal file
197
validator/src/ecma.rs
Normal file
@ -0,0 +1,197 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use regex_syntax::ast::parse::Parser;
|
||||
use regex_syntax::ast::{self, *};
|
||||
|
||||
// covert ecma regex to rust regex if possible
|
||||
// see https://262.ecma-international.org/11.0/#sec-regexp-regular-expression-objects
|
||||
pub(crate) fn convert(pattern: &str) -> Result<Cow<'_, str>, Box<dyn std::error::Error>> {
|
||||
let mut pattern = Cow::Borrowed(pattern);
|
||||
|
||||
let mut ast = loop {
|
||||
match Parser::new().parse(pattern.as_ref()) {
|
||||
Ok(ast) => break ast,
|
||||
Err(e) => {
|
||||
if let Some(s) = fix_error(&e) {
|
||||
pattern = Cow::Owned(s);
|
||||
} else {
|
||||
Err(e)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
loop {
|
||||
let translator = Translator {
|
||||
pat: pattern.as_ref(),
|
||||
out: None,
|
||||
};
|
||||
if let Some(updated_pattern) = ast::visit(&ast, translator)? {
|
||||
match Parser::new().parse(&updated_pattern) {
|
||||
Ok(updated_ast) => {
|
||||
pattern = Cow::Owned(updated_pattern);
|
||||
ast = updated_ast;
|
||||
}
|
||||
Err(e) => {
|
||||
debug_assert!(
|
||||
false,
|
||||
"ecma::translate changed {:?} to {:?}: {e}",
|
||||
pattern, updated_pattern
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(pattern)
|
||||
}
|
||||
|
||||
fn fix_error(e: &Error) -> Option<String> {
|
||||
if let ErrorKind::EscapeUnrecognized = e.kind() {
|
||||
let (start, end) = (e.span().start.offset, e.span().end.offset);
|
||||
let s = &e.pattern()[start..end];
|
||||
if let r"\c" = s {
|
||||
// handle \c{control_letter}
|
||||
if let Some(control_letter) = e.pattern()[end..].chars().next() {
|
||||
if control_letter.is_ascii_alphabetic() {
|
||||
return Some(format!(
|
||||
"{}{}{}",
|
||||
&e.pattern()[..start],
|
||||
((control_letter as u8) % 32) as char,
|
||||
&e.pattern()[end + 1..],
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/**
|
||||
handles following translations:
|
||||
- \d should ascii digits only. so replace with [0-9]
|
||||
- \D should match everything but ascii digits. so replace with [^0-9]
|
||||
- \w should match ascii letters only. so replace with [a-zA-Z0-9_]
|
||||
- \W should match everything but ascii letters. so replace with [^a-zA-Z0-9_]
|
||||
- \s and \S differences
|
||||
- \a is not an ECMA 262 control escape
|
||||
*/
|
||||
struct Translator<'a> {
|
||||
pat: &'a str,
|
||||
out: Option<String>,
|
||||
}
|
||||
|
||||
impl Translator<'_> {
|
||||
fn replace(&mut self, span: &Span, with: &str) {
|
||||
let (start, end) = (span.start.offset, span.end.offset);
|
||||
self.out = Some(format!("{}{with}{}", &self.pat[..start], &self.pat[end..]));
|
||||
}
|
||||
|
||||
fn replace_class_class(&mut self, perl: &ClassPerl) {
|
||||
match perl.kind {
|
||||
ClassPerlKind::Digit => {
|
||||
self.replace(&perl.span, if perl.negated { "[^0-9]" } else { "[0-9]" });
|
||||
}
|
||||
ClassPerlKind::Word => {
|
||||
let with = &if perl.negated {
|
||||
"[^A-Za-z0-9_]"
|
||||
} else {
|
||||
"[A-Za-z0-9_]"
|
||||
};
|
||||
self.replace(&perl.span, with);
|
||||
}
|
||||
ClassPerlKind::Space => {
|
||||
let with = &if perl.negated {
|
||||
"[^ \t\n\r\u{000b}\u{000c}\u{00a0}\u{feff}\u{2003}\u{2029}]"
|
||||
} else {
|
||||
"[ \t\n\r\u{000b}\u{000c}\u{00a0}\u{feff}\u{2003}\u{2029}]"
|
||||
};
|
||||
self.replace(&perl.span, with);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Visitor for Translator<'_> {
|
||||
type Output = Option<String>;
|
||||
type Err = &'static str;
|
||||
|
||||
fn finish(self) -> Result<Self::Output, Self::Err> {
|
||||
Ok(self.out)
|
||||
}
|
||||
|
||||
fn visit_class_set_item_pre(&mut self, ast: &ast::ClassSetItem) -> Result<(), Self::Err> {
|
||||
if let ClassSetItem::Perl(perl) = ast {
|
||||
self.replace_class_class(perl);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn visit_post(&mut self, ast: &Ast) -> Result<(), Self::Err> {
|
||||
if self.out.is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
match ast {
|
||||
Ast::ClassPerl(perl) => {
|
||||
self.replace_class_class(perl);
|
||||
}
|
||||
Ast::Literal(ref literal) => {
|
||||
if let Literal {
|
||||
kind: LiteralKind::Special(SpecialLiteralKind::Bell),
|
||||
..
|
||||
} = literal.as_ref()
|
||||
{
|
||||
return Err("\\a is not an ECMA 262 control escape");
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ecma_compat_valid() {
|
||||
// println!("{:#?}", Parser::new().parse(r#"a\a"#));
|
||||
let tests = [
|
||||
(r"ab\cAcde\cBfg", "ab\u{1}cde\u{2}fg"), // \c{control_letter}
|
||||
(r"\\comment", r"\\comment"), // there is no \c
|
||||
(r"ab\def", r#"ab[0-9]ef"#), // \d
|
||||
(r"ab[a-z\d]ef", r#"ab[a-z[0-9]]ef"#), // \d inside classSet
|
||||
(r"ab\Def", r#"ab[^0-9]ef"#), // \d
|
||||
(r"ab[a-z\D]ef", r#"ab[a-z[^0-9]]ef"#), // \D inside classSet
|
||||
];
|
||||
for (input, want) in tests {
|
||||
match convert(input) {
|
||||
Ok(got) => {
|
||||
if got.as_ref() != want {
|
||||
panic!("convert({input:?}): got: {got:?}, want: {want:?}");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
panic!("convert({input:?}) failed: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ecma_compat_invalid() {
|
||||
// println!("{:#?}", Parser::new().parse(r#"a\a"#));
|
||||
let tests = [
|
||||
r"\c\n", // \c{invalid_char}
|
||||
r"abc\adef", // \a is not valid
|
||||
];
|
||||
for input in tests {
|
||||
if convert(input).is_ok() {
|
||||
panic!("convert({input:?}) mut fail");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user