✨ Improve code block function
This commit is contained in:
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* 解析器构建脚本
|
||||
* 使用 lezer-generator 从语法文件生成解析器
|
||||
* 使用:node build-parser.js
|
||||
*/
|
||||
|
||||
import { execSync } from 'child_process';
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
console.log('🚀 start building parser...');
|
||||
|
||||
try {
|
||||
// 检查语法文件是否存在
|
||||
const grammarFile = path.join(__dirname, 'codeblock.grammar');
|
||||
if (!fs.existsSync(grammarFile)) {
|
||||
throw new Error('grammarFile codeblock.grammar not found');
|
||||
}
|
||||
|
||||
console.log('📄 grammar file:', grammarFile);
|
||||
|
||||
// 运行 lezer-generator
|
||||
console.log('⚙️ building parser...');
|
||||
execSync('npx lezer-generator codeblock.grammar -o parser.js', {
|
||||
cwd: __dirname,
|
||||
stdio: 'inherit'
|
||||
});
|
||||
|
||||
// 检查生成的文件
|
||||
const parserFile = path.join(__dirname, 'parser.js');
|
||||
const termsFile = path.join(__dirname, 'parser.terms.js');
|
||||
|
||||
if (fs.existsSync(parserFile) && fs.existsSync(termsFile)) {
|
||||
console.log('✅ parser file successfully generated!');
|
||||
console.log('📦 parser files:');
|
||||
console.log(' - parser.js');
|
||||
console.log(' - parser.terms.js');
|
||||
} else {
|
||||
throw new Error('failed to generate parser');
|
||||
}
|
||||
|
||||
console.log('🎉 build success!');
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ build failed:', error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
/**
|
||||
* 代码块语言支持
|
||||
* 提供多语言代码块支持
|
||||
*/
|
||||
|
||||
import { parser } from "./parser.js";
|
||||
import { configureNesting } from "./nested-parser";
|
||||
|
||||
import {
|
||||
LRLanguage,
|
||||
LanguageSupport,
|
||||
foldNodeProp,
|
||||
} from "@codemirror/language";
|
||||
import { styleTags, tags as t } from "@lezer/highlight";
|
||||
|
||||
import { json } from "@codemirror/lang-json";
|
||||
|
||||
/**
|
||||
* 折叠节点函数
|
||||
*/
|
||||
function foldNode(node: any) {
|
||||
return { from: node.from, to: node.to - 1 };
|
||||
}
|
||||
|
||||
/**
|
||||
* 代码块语言定义
|
||||
*/
|
||||
export const CodeBlockLanguage = LRLanguage.define({
|
||||
parser: parser.configure({
|
||||
props: [
|
||||
styleTags({
|
||||
BlockDelimiter: t.tagName,
|
||||
}),
|
||||
|
||||
foldNodeProp.add({
|
||||
BlockContent(node: any) {
|
||||
return { from: node.from, to: node.to - 1 };
|
||||
},
|
||||
}),
|
||||
],
|
||||
wrap: configureNesting(),
|
||||
}),
|
||||
languageData: {
|
||||
commentTokens: { line: ";" }
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* 创建代码块语言支持
|
||||
*/
|
||||
export function codeBlockLang() {
|
||||
let wrap = configureNesting();
|
||||
let lang = CodeBlockLanguage.configure({ dialect: "", wrap: wrap });
|
||||
|
||||
return [
|
||||
new LanguageSupport(lang, [json().support]),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取代码块语言扩展
|
||||
*/
|
||||
export function getCodeBlockLanguageExtension() {
|
||||
return codeBlockLang();
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
@external tokens blockContent from "./external-tokens.js" {
|
||||
BlockContent
|
||||
}
|
||||
|
||||
@top Document { Block* }
|
||||
|
||||
Block {
|
||||
BlockDelimiter BlockContent
|
||||
}
|
||||
|
||||
BlockDelimiter {
|
||||
"\n∞∞∞" BlockLanguage Auto? "\n"
|
||||
}
|
||||
|
||||
BlockLanguage {
|
||||
"text" | "math" | "json" | "python" | "html" | "sql" | "markdown" |
|
||||
"java" | "php" | "css" | "xml" | "cpp" | "rust" | "ruby" | "shell" |
|
||||
"yaml" | "go" | "javascript" | "typescript"
|
||||
}
|
||||
|
||||
@tokens {
|
||||
Auto { "-a" }
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/**
|
||||
* 外部标记器
|
||||
* 用于识别代码块内容的边界
|
||||
*/
|
||||
|
||||
import { ExternalTokenizer } from "@lezer/lr";
|
||||
import { BlockContent } from "./parser.terms.js";
|
||||
import { LANGUAGES } from "./languages";
|
||||
|
||||
const EOF = -1;
|
||||
|
||||
const FIRST_TOKEN_CHAR = "\n".charCodeAt(0);
|
||||
const SECOND_TOKEN_CHAR = "∞".charCodeAt(0);
|
||||
|
||||
// 创建语言标记匹配器
|
||||
const languageTokensMatcher = LANGUAGES.map(l => l.token).join("|");
|
||||
const tokenRegEx = new RegExp(`^\\n∞∞∞(${languageTokensMatcher})(-a)?\\n`, "g");
|
||||
|
||||
/**
|
||||
* 代码块内容标记器
|
||||
* 识别 ∞∞∞ 分隔符之间的内容
|
||||
*/
|
||||
export const blockContent = new ExternalTokenizer((input) => {
|
||||
let current = input.peek(0);
|
||||
let next = input.peek(1);
|
||||
|
||||
if (current === EOF) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
// 除非前两个字符是换行符和"∞"字符,否则我们没有代码块内容标记
|
||||
// 所以我们不需要检查标记的其余部分
|
||||
if (current === FIRST_TOKEN_CHAR && next === SECOND_TOKEN_CHAR) {
|
||||
let potentialLang = "";
|
||||
for (let i = 0; i < 18; i++) {
|
||||
potentialLang += String.fromCharCode(input.peek(i));
|
||||
}
|
||||
if (potentialLang.match(tokenRegEx)) {
|
||||
input.acceptToken(BlockContent);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (next === EOF) {
|
||||
input.acceptToken(BlockContent, 1);
|
||||
return;
|
||||
}
|
||||
current = input.advance(1);
|
||||
next = input.peek(1);
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,38 @@
|
||||
/**
|
||||
* 代码块语言解析器入口
|
||||
* 导出所有语言解析相关的功能
|
||||
*/
|
||||
|
||||
// 主要语言支持
|
||||
export {
|
||||
CodeBlockLanguage,
|
||||
codeBlockLang,
|
||||
getCodeBlockLanguageExtension
|
||||
} from './codeblock-lang';
|
||||
|
||||
// 语言映射和信息
|
||||
export {
|
||||
LanguageInfo,
|
||||
LANGUAGES,
|
||||
languageMapping,
|
||||
getLanguage,
|
||||
getLanguageTokens
|
||||
} from './languages';
|
||||
|
||||
// 嵌套解析器
|
||||
export {
|
||||
configureNesting
|
||||
} from './nested-parser';
|
||||
|
||||
// 解析器术语
|
||||
export * from './parser.terms.js';
|
||||
|
||||
// 外部标记器
|
||||
export {
|
||||
blockContent
|
||||
} from './external-tokens';
|
||||
|
||||
// 解析器
|
||||
export {
|
||||
parser
|
||||
} from './parser.js';
|
||||
@@ -0,0 +1,82 @@
|
||||
/**
|
||||
* 语言映射和解析器配置
|
||||
*/
|
||||
|
||||
import { jsonLanguage } from "@codemirror/lang-json";
|
||||
import { pythonLanguage } from "@codemirror/lang-python";
|
||||
import { javascriptLanguage, typescriptLanguage } from "@codemirror/lang-javascript";
|
||||
import { htmlLanguage } from "@codemirror/lang-html";
|
||||
import { StandardSQL } from "@codemirror/lang-sql";
|
||||
import { markdownLanguage } from "@codemirror/lang-markdown";
|
||||
import { javaLanguage } from "@codemirror/lang-java";
|
||||
import { phpLanguage } from "@codemirror/lang-php";
|
||||
import { cssLanguage } from "@codemirror/lang-css";
|
||||
import { cppLanguage } from "@codemirror/lang-cpp";
|
||||
import { xmlLanguage } from "@codemirror/lang-xml";
|
||||
import { rustLanguage } from "@codemirror/lang-rust";
|
||||
|
||||
import { StreamLanguage } from "@codemirror/language";
|
||||
import { ruby } from "@codemirror/legacy-modes/mode/ruby";
|
||||
import { shell } from "@codemirror/legacy-modes/mode/shell";
|
||||
import { go } from "@codemirror/legacy-modes/mode/go";
|
||||
import { yamlLanguage } from "@codemirror/lang-yaml";
|
||||
|
||||
import { SupportedLanguage } from '../types';
|
||||
|
||||
/**
|
||||
* 语言信息类
|
||||
*/
|
||||
export class LanguageInfo {
|
||||
constructor(
|
||||
public token: SupportedLanguage,
|
||||
public name: string,
|
||||
public parser: any,
|
||||
public guesslang?: string | null
|
||||
) {}
|
||||
}
|
||||
|
||||
/**
|
||||
* 支持的语言列表
|
||||
*/
|
||||
export const LANGUAGES: LanguageInfo[] = [
|
||||
new LanguageInfo("text", "Plain Text", null),
|
||||
new LanguageInfo("json", "JSON", jsonLanguage.parser, "json"),
|
||||
new LanguageInfo("python", "Python", pythonLanguage.parser, "py"),
|
||||
new LanguageInfo("javascript", "JavaScript", javascriptLanguage.parser, "js"),
|
||||
new LanguageInfo("typescript", "TypeScript", typescriptLanguage.parser, "ts"),
|
||||
new LanguageInfo("html", "HTML", htmlLanguage.parser, "html"),
|
||||
new LanguageInfo("css", "CSS", cssLanguage.parser, "css"),
|
||||
new LanguageInfo("sql", "SQL", StandardSQL.language.parser, "sql"),
|
||||
new LanguageInfo("markdown", "Markdown", markdownLanguage.parser, "md"),
|
||||
new LanguageInfo("java", "Java", javaLanguage.parser, "java"),
|
||||
new LanguageInfo("php", "PHP", phpLanguage.configure({top:"Program"}).parser, "php"),
|
||||
new LanguageInfo("xml", "XML", xmlLanguage.parser, "xml"),
|
||||
new LanguageInfo("cpp", "C++", cppLanguage.parser, "cpp"),
|
||||
new LanguageInfo("c", "C", cppLanguage.parser, "c"),
|
||||
new LanguageInfo("rust", "Rust", rustLanguage.parser, "rs"),
|
||||
new LanguageInfo("ruby", "Ruby", StreamLanguage.define(ruby).parser, "rb"),
|
||||
new LanguageInfo("shell", "Shell", StreamLanguage.define(shell).parser, "sh"),
|
||||
new LanguageInfo("yaml", "YAML", yamlLanguage.parser, "yaml"),
|
||||
new LanguageInfo("go", "Go", StreamLanguage.define(go).parser, "go"),
|
||||
];
|
||||
|
||||
/**
|
||||
* 语言映射表
|
||||
*/
|
||||
export const languageMapping = Object.fromEntries(
|
||||
LANGUAGES.map(l => [l.token, l.parser])
|
||||
);
|
||||
|
||||
/**
|
||||
* 根据 token 获取语言信息
|
||||
*/
|
||||
export function getLanguage(token: SupportedLanguage): LanguageInfo | undefined {
|
||||
return LANGUAGES.find(lang => lang.token === token);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有语言的 token 列表
|
||||
*/
|
||||
export function getLanguageTokens(): SupportedLanguage[] {
|
||||
return LANGUAGES.map(lang => lang.token);
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/**
|
||||
* 嵌套解析器配置
|
||||
* 为不同语言的代码块提供语法高亮支持
|
||||
*/
|
||||
|
||||
import { parseMixed } from "@lezer/common";
|
||||
import { BlockContent, BlockLanguage } from "./parser.terms.js";
|
||||
import { languageMapping } from "./languages";
|
||||
|
||||
/**
|
||||
* 配置嵌套解析器
|
||||
* 根据代码块的语言标记选择相应的解析器
|
||||
*/
|
||||
export function configureNesting() {
|
||||
return parseMixed((node, input) => {
|
||||
let id = node.type.id;
|
||||
|
||||
if (id === BlockContent) {
|
||||
// 获取父节点中的语言标记
|
||||
let blockLang = node.node.parent?.firstChild?.getChildren(BlockLanguage)[0];
|
||||
let langName = blockLang ? input.read(blockLang.from, blockLang.to) : null;
|
||||
|
||||
// 如果 BlockContent 为空,不返回解析器
|
||||
// 这可以避免 StreamLanguage 解析器在大缓冲区时出错
|
||||
if (node.node.from === node.node.to) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 处理自动检测标记
|
||||
if (langName && langName.endsWith('-a')) {
|
||||
langName = langName.slice(0, -2); // 移除 '-a' 后缀
|
||||
}
|
||||
|
||||
// 查找对应的语言解析器
|
||||
if (langName && langName in languageMapping && languageMapping[langName] !== null) {
|
||||
return {
|
||||
parser: languageMapping[langName],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
||||
import {LRParser} from "@lezer/lr"
|
||||
import {blockContent} from "./external-tokens.js"
|
||||
export const parser = LRParser.deserialize({
|
||||
version: 14,
|
||||
states: "!jQQOQOOOVOQO'#C`O!dOPO'#C_OOOO'#Cc'#CcQQOQOOOOOO'#Ca'#CaO!iOSO,58zOOOO,58y,58yOOOO-E6a-E6aOOOP1G.f1G.fO!qOSO1G.fOOOP7+$Q7+$Q",
|
||||
stateData: "!v~OXPO~OYTOZTO[TO]TO^TO_TO`TOaTObTOcTOdTOeTOfTOgTOhTOiTOjTOkTOlTO~OPVO~OUYOmXO~OmZO~O",
|
||||
goto: "jWPPPX]aPdTROSTQOSRUPQSORWS",
|
||||
nodeNames: "⚠ BlockContent Document Block BlockDelimiter BlockLanguage Auto",
|
||||
maxTerm: 29,
|
||||
skippedNodes: [0],
|
||||
repeatNodeCount: 1,
|
||||
tokenData: ",k~R]YZz}!O!e#V#W!p#Z#[#a#[#]#l#^#_$T#a#b%x#d#e'X#f#g([#g#h)R#h#i*O#l#m+q#m#n,SR!PPmQ%&x%&y!SP!VP%&x%&y!YP!]P%&x%&y!`P!eOXP~!hP#T#U!k~!pOU~~!sQ#d#e!y#g#h#U~!|P#d#e#P~#UOe~~#XP#g#h#[~#aOc~~#dP#c#d#g~#lOj~~#oP#h#i#r~#uP#a#b#x~#{P#`#a$O~$TO^~~$WQ#T#U$^#g#h%g~$aP#j#k$d~$gP#T#U$j~$oPa~#g#h$r~$uP#V#W$x~${P#f#g%O~%RP#]#^%U~%XP#d#e%[~%_P#h#i%b~%gOk~~%jP#c#d%m~%pP#b#c%s~%xO[~~%{P#T#U&O~&RQ#f#g&X#h#i&|~&[P#_#`&_~&bP#W#X&e~&hP#c#d&k~&nP#k#l&q~&tP#b#c&w~&|O`~~'PP#[#]'S~'XOZ~~'[Q#[#]'b#m#n'm~'eP#d#e'h~'mOb~~'pP#h#i's~'vP#[#]'y~'|P#c#d(P~(SP#b#c(V~([O]~~(_P#i#j(b~(eQ#U#V(k#g#h(v~(nP#m#n(q~(vOg~~(yP#h#i(|~)ROf~~)UQ#[#])[#e#f)s~)_P#X#Y)b~)eP#`#a)h~)kP#`#a)n~)sOh~~)vP#`#a)y~*OO_~~*RQ#X#Y*X#m#n*j~*[P#l#m*_~*bP#h#i*e~*jOY~~*mP#d#e*p~*sP#X#Y*v~*yP#g#h*|~+PP#V#W+S~+VP#f#g+Y~+]P#]#^+`~+cP#d#e+f~+iP#h#i+l~+qOl~~+tP#a#b+w~+zP#`#a+}~,SOd~~,VP#T#U,Y~,]P#a#b,`~,cP#`#a,f~,kOi~",
|
||||
tokenizers: [blockContent, 0, 1],
|
||||
topRules: {"Document":[0,2]},
|
||||
tokenPrec: 0
|
||||
})
|
||||
@@ -0,0 +1,8 @@
|
||||
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
||||
export const
|
||||
BlockContent = 1,
|
||||
Document = 2,
|
||||
Block = 3,
|
||||
BlockDelimiter = 4,
|
||||
BlockLanguage = 5,
|
||||
Auto = 6
|
||||
Reference in New Issue
Block a user