Files
voidraft/frontend/src/common/prettier/plugins/scala/scala-parser/lexer.ts

480 lines
16 KiB
TypeScript

import { createToken, Lexer, ILexingResult } from "chevrotain";
// Keywords
export const Val = createToken({ name: "Val", pattern: /val\b/ });
export const Var = createToken({ name: "Var", pattern: /var\b/ });
export const Def = createToken({ name: "Def", pattern: /def\b/ });
export const Class = createToken({ name: "Class", pattern: /class\b/ });
export const ObjectKeyword = createToken({
name: "Object",
pattern: /object\b/,
});
export const Trait = createToken({ name: "Trait", pattern: /trait\b/ });
export const Extends = createToken({ name: "Extends", pattern: /extends\b/ });
export const With = createToken({ name: "With", pattern: /with\b/ });
export const If = createToken({ name: "If", pattern: /if\b/ });
export const Else = createToken({ name: "Else", pattern: /else\b/ });
export const While = createToken({ name: "While", pattern: /while\b/ });
export const For = createToken({ name: "For", pattern: /for\b/ });
export const Yield = createToken({ name: "Yield", pattern: /yield\b/ });
export const Return = createToken({ name: "Return", pattern: /return\b/ });
export const New = createToken({ name: "New", pattern: /new\b/ });
export const This = createToken({ name: "This", pattern: /this\b/ });
export const Super = createToken({ name: "Super", pattern: /super\b/ });
export const Package = createToken({ name: "Package", pattern: /package\b/ });
export const Import = createToken({ name: "Import", pattern: /import\b/ });
export const Case = createToken({ name: "Case", pattern: /case\b/ });
export const Match = createToken({ name: "Match", pattern: /match\b/ });
export const Try = createToken({ name: "Try", pattern: /try\b/ });
export const Catch = createToken({ name: "Catch", pattern: /catch\b/ });
export const Finally = createToken({ name: "Finally", pattern: /finally\b/ });
export const Throw = createToken({ name: "Throw", pattern: /throw\b/ });
export const Null = createToken({ name: "Null", pattern: /null\b/ });
export const True = createToken({ name: "True", pattern: /true\b/ });
export const False = createToken({ name: "False", pattern: /false\b/ });
export const NotImplemented = createToken({
name: "NotImplemented",
pattern: /\?\?\?/,
});
export const Type = createToken({ name: "Type", pattern: /type\b/ });
export const Private = createToken({ name: "Private", pattern: /private\b/ });
export const Protected = createToken({
name: "Protected",
pattern: /protected\b/,
});
export const Public = createToken({ name: "Public", pattern: /public\b/ });
export const Abstract = createToken({
name: "Abstract",
pattern: /abstract\b/,
});
export const Final = createToken({ name: "Final", pattern: /final\b/ });
export const Sealed = createToken({ name: "Sealed", pattern: /sealed\b/ });
export const Implicit = createToken({
name: "Implicit",
pattern: /implicit\b/,
});
export const Lazy = createToken({ name: "Lazy", pattern: /lazy\b/ });
export const Override = createToken({
name: "Override",
pattern: /override\b/,
});
export const Given = createToken({ name: "Given", pattern: /given\b/ });
export const Using = createToken({ name: "Using", pattern: /using\b/ });
export const To = createToken({ name: "To", pattern: /to\b/ });
export const Enum = createToken({ name: "Enum", pattern: /enum\b/ });
export const Array = createToken({ name: "Array", pattern: /Array\b/ });
export const Extension = createToken({
name: "Extension",
pattern: /extension\b/,
});
export const Export = createToken({ name: "Export", pattern: /export\b/ });
export const Opaque = createToken({ name: "Opaque", pattern: /opaque\b/ });
export const Inline = createToken({ name: "Inline", pattern: /inline\b/ });
export const Transparent = createToken({
name: "Transparent",
pattern: /transparent\b/,
});
// Identifiers (must come after keywords)
// Enhanced Unicode identifier support following Scala Language Specification
// Operator identifier for custom operators (e.g., +++, <~>, etc.)
export const OperatorIdentifier = createToken({
name: "OperatorIdentifier",
pattern: /[+\-*/%:&|^<>=!~?#@$\\]+/,
});
// Backward compatible with existing implementation, enhanced mathematical symbol support
// Supports: Latin, Greek, Cyrillic, CJK, Arabic, Hebrew, Mathematical symbols, Emojis (via surrogate pairs)
export const Identifier = createToken({
name: "Identifier",
pattern:
/(?:_[a-zA-Z0-9_$\u00C0-\u00FF\u0370-\u03FF\u0400-\u04FF\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u0590-\u05FF\u0600-\u06FF\u2200-\u22FF\u27C0-\u27EF\u2980-\u29FF\u2A00-\u2AFF]+|[a-zA-Z$\u00C0-\u00FF\u0370-\u03FF\u0400-\u04FF\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u0590-\u05FF\u0600-\u06FF\u2200-\u22FF\u27C0-\u27EF\u2980-\u29FF\u2A00-\u2AFF][a-zA-Z0-9_$\u00C0-\u00FF\u0370-\u03FF\u0400-\u04FF\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u0590-\u05FF\u0600-\u06FF\u2200-\u22FF\u27C0-\u27EF\u2980-\u29FF\u2A00-\u2AFF]*)/u,
});
// Literals
export const IntegerLiteral = createToken({
name: "IntegerLiteral",
pattern: /-?\d+[lLiIsSbB]?/,
});
// Scientific notation literal (must come before FloatingPointLiteral)
export const ScientificNotationLiteral = createToken({
name: "ScientificNotationLiteral",
pattern: /-?\d+(\.\d+)?[eE][+-]?\d+[fFdD]?/,
});
export const FloatingPointLiteral = createToken({
name: "FloatingPointLiteral",
pattern: /-?\d+\.\d+[fFdD]?|-?\.\d+[fFdD]?/,
});
export const StringLiteral = createToken({
name: "StringLiteral",
pattern: /"""[\s\S]*?"""|"([^"\\]|\\.|\\u[0-9A-Fa-f]{4})*"/,
});
export const InterpolatedStringLiteral = createToken({
name: "InterpolatedStringLiteral",
pattern:
/[a-zA-Z_][a-zA-Z0-9_]*"""[\s\S]*?"""|[a-zA-Z_][a-zA-Z0-9_]*"([^"\\]|\\.|\\u[0-9A-Fa-f]{4}|\$[a-zA-Z_][a-zA-Z0-9_]*|\$\{[^}]*\})*"/,
});
export const CharLiteral = createToken({
name: "CharLiteral",
pattern: /'([^'\\]|\\.|\\u[0-9A-Fa-f]{4})'/,
});
// Operators
export const Equals = createToken({ name: "Equals", pattern: /=/ });
export const Plus = createToken({ name: "Plus", pattern: /\+/ });
export const Minus = createToken({ name: "Minus", pattern: /-/ });
export const Star = createToken({ name: "Star", pattern: /\*/ });
export const Slash = createToken({ name: "Slash", pattern: /\// });
export const Backslash = createToken({ name: "Backslash", pattern: /\\/ });
export const Percent = createToken({ name: "Percent", pattern: /%/ });
export const LessThan = createToken({ name: "LessThan", pattern: /</ });
export const GreaterThan = createToken({ name: "GreaterThan", pattern: />/ });
export const LessThanEquals = createToken({
name: "LessThanEquals",
pattern: /<=/,
});
export const GreaterThanEquals = createToken({
name: "GreaterThanEquals",
pattern: />=/,
});
export const EqualsEquals = createToken({
name: "EqualsEquals",
pattern: /==/,
});
export const DoubleEquals = EqualsEquals; // Alias for modular parser compatibility
export const NotEquals = createToken({ name: "NotEquals", pattern: /!=/ });
export const LogicalAnd = createToken({ name: "LogicalAnd", pattern: /&&/ });
export const LogicalOr = createToken({ name: "LogicalOr", pattern: /\|\|/ });
export const Exclamation = createToken({ name: "Exclamation", pattern: /!/ });
export const Arrow = createToken({ name: "Arrow", pattern: /=>/ });
export const TypeLambdaArrow = createToken({
name: "TypeLambdaArrow",
pattern: /=>>/,
});
export const DoubleArrow = TypeLambdaArrow; // Alias for modular parser compatibility
export const LeftArrow = createToken({ name: "LeftArrow", pattern: /<-/ });
export const RightArrow = createToken({ name: "RightArrow", pattern: /->/ });
export const ContextArrow = createToken({
name: "ContextArrow",
pattern: /\?=>/,
});
export const SubtypeOf = createToken({ name: "SubtypeOf", pattern: /<:/ });
export const ColonLess = SubtypeOf; // Alias for modular parser compatibility
export const SupertypeOf = createToken({ name: "SupertypeOf", pattern: />:/ });
export const GreaterColon = SupertypeOf; // Alias for modular parser compatibility
export const AppendOp = createToken({ name: "AppendOp", pattern: /:\+/ });
export const PlusColon = AppendOp; // Alias for modular parser compatibility
export const ColonPlus = createToken({ name: "ColonPlus", pattern: /:\+/ }); // Same as AppendOp but separate token for parser
export const PrependOp = createToken({ name: "PrependOp", pattern: /::/ });
export const ColonColon = PrependOp; // Alias for modular parser compatibility
export const ConcatOp = createToken({ name: "ConcatOp", pattern: /\+\+/ });
export const DoublePlus = ConcatOp; // Alias for modular parser compatibility
export const AppendEquals = createToken({
name: "AppendEquals",
pattern: /\+\+=/,
});
// Compound assignment operators
export const PlusEquals = createToken({ name: "PlusEquals", pattern: /\+=/ });
export const MinusEquals = createToken({ name: "MinusEquals", pattern: /-=/ });
export const StarEquals = createToken({ name: "StarEquals", pattern: /\*=/ });
export const SlashEquals = createToken({ name: "SlashEquals", pattern: /\/=/ });
export const PercentEquals = createToken({
name: "PercentEquals",
pattern: /%=/,
});
// sbt DSL operators
export const DoublePercent = createToken({
name: "DoublePercent",
pattern: /%%/,
});
// Bitwise operators
export const BitwiseAnd = createToken({ name: "BitwiseAnd", pattern: /&/ });
export const BitwiseOr = createToken({ name: "BitwiseOr", pattern: /\|/ });
export const BitwiseXor = createToken({ name: "BitwiseXor", pattern: /\^/ });
export const BitwiseTilde = createToken({ name: "BitwiseTilde", pattern: /~/ });
export const LeftShift = createToken({ name: "LeftShift", pattern: /<</ });
export const RightShift = createToken({ name: "RightShift", pattern: />>/ });
export const UnsignedRightShift = createToken({
name: "UnsignedRightShift",
pattern: />>>/,
});
export const Colon = createToken({ name: "Colon", pattern: /:/ });
export const ColonEquals = createToken({ name: "ColonEquals", pattern: /:=/ });
export const SbtAssign = ColonEquals; // Alias for sbt compatibility
export const Semicolon = createToken({ name: "Semicolon", pattern: /;/ });
export const Comma = createToken({ name: "Comma", pattern: /,/ });
export const Dot = createToken({ name: "Dot", pattern: /\./ });
export const Underscore = createToken({
name: "Underscore",
pattern: /_/,
});
export const At = createToken({ name: "At", pattern: /@/ });
export const Question = createToken({ name: "Question", pattern: /\?/ });
// Quote and Splice tokens for Scala 3 macros
export const QuoteStart = createToken({ name: "QuoteStart", pattern: /'\{/ });
export const SpliceStart = createToken({
name: "SpliceStart",
pattern: /\$\{/,
});
// Additional tokens for modular parser
export const Quote = createToken({ name: "Quote", pattern: /'/ });
export const Dollar = createToken({ name: "Dollar", pattern: /\$/ });
// QuestionArrow is now alias for ContextArrow to avoid duplicate patterns
export const QuestionArrow = ContextArrow;
// String interpolation tokens
export const InterpolatedString = createToken({
name: "InterpolatedString",
pattern: /s"([^"\\]|\\.|\\u[0-9A-Fa-f]{4})*"/,
});
export const FormattedString = createToken({
name: "FormattedString",
pattern: /f"([^"\\]|\\.|\\u[0-9A-Fa-f]{4})*"/,
});
export const RawString = createToken({
name: "RawString",
pattern: /raw"([^"\\]|\\.|\\u[0-9A-Fa-f]{4})*"/,
});
export const CustomInterpolatedString = createToken({
name: "CustomInterpolatedString",
pattern: /[a-zA-Z_][a-zA-Z0-9_]*"([^"\\]|\\.|\\u[0-9A-Fa-f]{4})*"/,
});
// Numeric suffix tokens
export const LongSuffix = createToken({ name: "LongSuffix", pattern: /[lL]/ });
export const IntSuffix = createToken({ name: "IntSuffix", pattern: /[iI]/ });
export const ShortSuffix = createToken({
name: "ShortSuffix",
pattern: /[sS]/,
});
export const ByteSuffix = createToken({ name: "ByteSuffix", pattern: /[bB]/ });
export const FloatSuffix = createToken({
name: "FloatSuffix",
pattern: /[fF]/,
});
export const DoubleSuffix = createToken({
name: "DoubleSuffix",
pattern: /[dD]/,
});
// Additional missing tokens
export const Hash = createToken({ name: "Hash", pattern: /#/ });
// Delimiters
export const LeftParen = createToken({ name: "LeftParen", pattern: /\(/ });
export const RightParen = createToken({ name: "RightParen", pattern: /\)/ });
export const LeftBracket = createToken({ name: "LeftBracket", pattern: /\[/ });
export const RightBracket = createToken({
name: "RightBracket",
pattern: /\]/,
});
export const LeftBrace = createToken({ name: "LeftBrace", pattern: /\{/ });
export const RightBrace = createToken({ name: "RightBrace", pattern: /\}/ });
// Whitespace and Comments
export const WhiteSpace = createToken({
name: "WhiteSpace",
pattern: /\s+/,
group: Lexer.SKIPPED,
});
export const LineComment = createToken({
name: "LineComment",
pattern: /\/\/[^\n\r]*/,
group: "comments",
});
export const BlockComment = createToken({
name: "BlockComment",
pattern: /\/\*([^*]|\*(?!\/))*\*\//,
group: "comments",
});
// All tokens in order
export const allTokens = [
// Comments (must come before operators)
LineComment,
BlockComment,
// Whitespace
WhiteSpace,
// Keywords (must come before Identifier)
Val,
Var,
Def,
Class,
ObjectKeyword,
Trait,
Extends,
With,
If,
Else,
While,
For,
Yield,
Return,
New,
This,
Super,
Package,
Import,
Case,
Match,
Try,
Catch,
Finally,
Throw,
Null,
True,
False,
NotImplemented,
Type,
Private,
Protected,
Public,
Abstract,
Final,
Sealed,
Implicit,
Lazy,
Override,
Given,
Using,
To,
Enum,
Array,
Extension,
Export,
Opaque,
Inline,
Transparent,
// Literals
ScientificNotationLiteral, // Must come before FloatingPointLiteral
FloatingPointLiteral, // Must come before IntegerLiteral
IntegerLiteral,
// String interpolation literals (must come before StringLiteral)
CustomInterpolatedString,
InterpolatedString,
FormattedString,
RawString,
InterpolatedStringLiteral, // Must come before StringLiteral
StringLiteral,
CharLiteral,
// Multi-character operators (must come before single-character)
TypeLambdaArrow, // Must come before Arrow to avoid ambiguity
ContextArrow, // Must come before Arrow to avoid ambiguity
Arrow,
LeftArrow,
RightArrow,
SubtypeOf,
SupertypeOf,
LessThanEquals,
GreaterThanEquals,
EqualsEquals,
NotEquals,
LogicalAnd,
LogicalOr,
ColonEquals, // := must come before :
AppendOp,
PrependOp,
AppendEquals, // ++= must come before ++
ConcatOp,
// Quote and splice tokens (must come before single-character)
QuoteStart, // '{ must come before single '
SpliceStart, // ${ must come before single $
// Compound assignment operators
PlusEquals,
MinusEquals,
StarEquals,
SlashEquals,
PercentEquals,
// Bitwise shift operators (must come before single-character)
UnsignedRightShift, // >>> must come before >>
LeftShift,
RightShift,
// Single-character operators
Equals,
Plus,
Minus,
Star,
Slash,
Backslash,
DoublePercent, // %% must come before single %
Percent,
LessThan,
GreaterThan,
Exclamation,
BitwiseAnd,
BitwiseOr,
BitwiseXor,
BitwiseTilde,
Colon,
Semicolon,
Comma,
Dot,
At,
// QuestionArrow removed - now an alias for ContextArrow
Question,
Quote,
Dollar,
Hash,
// Delimiters
LeftParen,
RightParen,
LeftBracket,
RightBracket,
LeftBrace,
RightBrace,
// Operator identifier (before regular identifier)
OperatorIdentifier,
// Identifier (must come before underscore)
Identifier,
// Underscore (must come after identifier to not interfere with _identifier patterns)
Underscore,
];
// レキサーの作成(インポート時の問題を回避するための遅延初期化)
let scalaLexerInstance: Lexer | null = null;
/**
* Scalaコードの字句解析を行うレキサー
*/
export const ScalaLexer = {
/**
* レキサーインスタンスを取得(遅延初期化)
* @returns Chevrotainレキサーのインスタンス
*/
get instance(): Lexer {
if (!scalaLexerInstance) {
scalaLexerInstance = new Lexer(allTokens);
}
return scalaLexerInstance;
},
/**
* 入力文字列をトークン化
* @param input - 字句解析対象のScalaソースコード
* @returns トークン化の結果(トークン、エラー、グループ化されたトークン)
*/
tokenize(input: string): ILexingResult {
return this.instance.tokenize(input);
},
};
// Export lexer instance for backward compatibility with tests
export const lexerInstance = ScalaLexer;