Improve code block function

This commit is contained in:
2025-06-18 21:18:49 +08:00
parent cce9cf7e92
commit 9204315c7b
19 changed files with 1618 additions and 136 deletions

View File

@@ -0,0 +1,52 @@
#!/usr/bin/env node
/**
* 解析器构建脚本
* 使用 lezer-generator 从语法文件生成解析器
* 使用node build-parser.js
*/
import { execSync } from 'child_process';
import path from 'path';
import fs from 'fs';
import { fileURLToPath } from 'url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
console.log('🚀 start building parser...');
try {
// 检查语法文件是否存在
const grammarFile = path.join(__dirname, 'codeblock.grammar');
if (!fs.existsSync(grammarFile)) {
throw new Error('grammarFile codeblock.grammar not found');
}
console.log('📄 grammar file:', grammarFile);
// 运行 lezer-generator
console.log('⚙️ building parser...');
execSync('npx lezer-generator codeblock.grammar -o parser.js', {
cwd: __dirname,
stdio: 'inherit'
});
// 检查生成的文件
const parserFile = path.join(__dirname, 'parser.js');
const termsFile = path.join(__dirname, 'parser.terms.js');
if (fs.existsSync(parserFile) && fs.existsSync(termsFile)) {
console.log('✅ parser file successfully generated');
console.log('📦 parser files:');
console.log(' - parser.js');
console.log(' - parser.terms.js');
} else {
throw new Error('failed to generate parser');
}
console.log('🎉 build success');
} catch (error) {
console.error('❌ build failed:', error.message);
process.exit(1);
}

View File

@@ -0,0 +1,65 @@
/**
* 代码块语言支持
* 提供多语言代码块支持
*/
import { parser } from "./parser.js";
import { configureNesting } from "./nested-parser";
import {
LRLanguage,
LanguageSupport,
foldNodeProp,
} from "@codemirror/language";
import { styleTags, tags as t } from "@lezer/highlight";
import { json } from "@codemirror/lang-json";
/**
* 折叠节点函数
*/
function foldNode(node: any) {
return { from: node.from, to: node.to - 1 };
}
/**
* 代码块语言定义
*/
export const CodeBlockLanguage = LRLanguage.define({
parser: parser.configure({
props: [
styleTags({
BlockDelimiter: t.tagName,
}),
foldNodeProp.add({
BlockContent(node: any) {
return { from: node.from, to: node.to - 1 };
},
}),
],
wrap: configureNesting(),
}),
languageData: {
commentTokens: { line: ";" }
}
});
/**
* 创建代码块语言支持
*/
export function codeBlockLang() {
let wrap = configureNesting();
let lang = CodeBlockLanguage.configure({ dialect: "", wrap: wrap });
return [
new LanguageSupport(lang, [json().support]),
];
}
/**
* 获取代码块语言扩展
*/
export function getCodeBlockLanguageExtension() {
return codeBlockLang();
}

View File

@@ -0,0 +1,23 @@
@external tokens blockContent from "./external-tokens.js" {
BlockContent
}
@top Document { Block* }
Block {
BlockDelimiter BlockContent
}
BlockDelimiter {
"\n∞∞∞" BlockLanguage Auto? "\n"
}
BlockLanguage {
"text" | "math" | "json" | "python" | "html" | "sql" | "markdown" |
"java" | "php" | "css" | "xml" | "cpp" | "rust" | "ruby" | "shell" |
"yaml" | "go" | "javascript" | "typescript"
}
@tokens {
Auto { "-a" }
}

View File

@@ -0,0 +1,51 @@
/**
* 外部标记器
* 用于识别代码块内容的边界
*/
import { ExternalTokenizer } from "@lezer/lr";
import { BlockContent } from "./parser.terms.js";
import { LANGUAGES } from "./languages";
const EOF = -1;
const FIRST_TOKEN_CHAR = "\n".charCodeAt(0);
const SECOND_TOKEN_CHAR = "∞".charCodeAt(0);
// 创建语言标记匹配器
const languageTokensMatcher = LANGUAGES.map(l => l.token).join("|");
const tokenRegEx = new RegExp(`^\\n∞∞∞(${languageTokensMatcher})(-a)?\\n`, "g");
/**
* 代码块内容标记器
* 识别 ∞∞∞ 分隔符之间的内容
*/
export const blockContent = new ExternalTokenizer((input) => {
let current = input.peek(0);
let next = input.peek(1);
if (current === EOF) {
return;
}
while (true) {
// 除非前两个字符是换行符和"∞"字符,否则我们没有代码块内容标记
// 所以我们不需要检查标记的其余部分
if (current === FIRST_TOKEN_CHAR && next === SECOND_TOKEN_CHAR) {
let potentialLang = "";
for (let i = 0; i < 18; i++) {
potentialLang += String.fromCharCode(input.peek(i));
}
if (potentialLang.match(tokenRegEx)) {
input.acceptToken(BlockContent);
return;
}
}
if (next === EOF) {
input.acceptToken(BlockContent, 1);
return;
}
current = input.advance(1);
next = input.peek(1);
}
});

View File

@@ -0,0 +1,38 @@
/**
* 代码块语言解析器入口
* 导出所有语言解析相关的功能
*/
// 主要语言支持
export {
CodeBlockLanguage,
codeBlockLang,
getCodeBlockLanguageExtension
} from './codeblock-lang';
// 语言映射和信息
export {
LanguageInfo,
LANGUAGES,
languageMapping,
getLanguage,
getLanguageTokens
} from './languages';
// 嵌套解析器
export {
configureNesting
} from './nested-parser';
// 解析器术语
export * from './parser.terms.js';
// 外部标记器
export {
blockContent
} from './external-tokens';
// 解析器
export {
parser
} from './parser.js';

View File

@@ -0,0 +1,82 @@
/**
* 语言映射和解析器配置
*/
import { jsonLanguage } from "@codemirror/lang-json";
import { pythonLanguage } from "@codemirror/lang-python";
import { javascriptLanguage, typescriptLanguage } from "@codemirror/lang-javascript";
import { htmlLanguage } from "@codemirror/lang-html";
import { StandardSQL } from "@codemirror/lang-sql";
import { markdownLanguage } from "@codemirror/lang-markdown";
import { javaLanguage } from "@codemirror/lang-java";
import { phpLanguage } from "@codemirror/lang-php";
import { cssLanguage } from "@codemirror/lang-css";
import { cppLanguage } from "@codemirror/lang-cpp";
import { xmlLanguage } from "@codemirror/lang-xml";
import { rustLanguage } from "@codemirror/lang-rust";
import { StreamLanguage } from "@codemirror/language";
import { ruby } from "@codemirror/legacy-modes/mode/ruby";
import { shell } from "@codemirror/legacy-modes/mode/shell";
import { go } from "@codemirror/legacy-modes/mode/go";
import { yamlLanguage } from "@codemirror/lang-yaml";
import { SupportedLanguage } from '../types';
/**
* 语言信息类
*/
export class LanguageInfo {
constructor(
public token: SupportedLanguage,
public name: string,
public parser: any,
public guesslang?: string | null
) {}
}
/**
* 支持的语言列表
*/
export const LANGUAGES: LanguageInfo[] = [
new LanguageInfo("text", "Plain Text", null),
new LanguageInfo("json", "JSON", jsonLanguage.parser, "json"),
new LanguageInfo("python", "Python", pythonLanguage.parser, "py"),
new LanguageInfo("javascript", "JavaScript", javascriptLanguage.parser, "js"),
new LanguageInfo("typescript", "TypeScript", typescriptLanguage.parser, "ts"),
new LanguageInfo("html", "HTML", htmlLanguage.parser, "html"),
new LanguageInfo("css", "CSS", cssLanguage.parser, "css"),
new LanguageInfo("sql", "SQL", StandardSQL.language.parser, "sql"),
new LanguageInfo("markdown", "Markdown", markdownLanguage.parser, "md"),
new LanguageInfo("java", "Java", javaLanguage.parser, "java"),
new LanguageInfo("php", "PHP", phpLanguage.configure({top:"Program"}).parser, "php"),
new LanguageInfo("xml", "XML", xmlLanguage.parser, "xml"),
new LanguageInfo("cpp", "C++", cppLanguage.parser, "cpp"),
new LanguageInfo("c", "C", cppLanguage.parser, "c"),
new LanguageInfo("rust", "Rust", rustLanguage.parser, "rs"),
new LanguageInfo("ruby", "Ruby", StreamLanguage.define(ruby).parser, "rb"),
new LanguageInfo("shell", "Shell", StreamLanguage.define(shell).parser, "sh"),
new LanguageInfo("yaml", "YAML", yamlLanguage.parser, "yaml"),
new LanguageInfo("go", "Go", StreamLanguage.define(go).parser, "go"),
];
/**
* 语言映射表
*/
export const languageMapping = Object.fromEntries(
LANGUAGES.map(l => [l.token, l.parser])
);
/**
* 根据 token 获取语言信息
*/
export function getLanguage(token: SupportedLanguage): LanguageInfo | undefined {
return LANGUAGES.find(lang => lang.token === token);
}
/**
* 获取所有语言的 token 列表
*/
export function getLanguageTokens(): SupportedLanguage[] {
return LANGUAGES.map(lang => lang.token);
}

View File

@@ -0,0 +1,44 @@
/**
* 嵌套解析器配置
* 为不同语言的代码块提供语法高亮支持
*/
import { parseMixed } from "@lezer/common";
import { BlockContent, BlockLanguage } from "./parser.terms.js";
import { languageMapping } from "./languages";
/**
* 配置嵌套解析器
* 根据代码块的语言标记选择相应的解析器
*/
export function configureNesting() {
return parseMixed((node, input) => {
let id = node.type.id;
if (id === BlockContent) {
// 获取父节点中的语言标记
let blockLang = node.node.parent?.firstChild?.getChildren(BlockLanguage)[0];
let langName = blockLang ? input.read(blockLang.from, blockLang.to) : null;
// 如果 BlockContent 为空,不返回解析器
// 这可以避免 StreamLanguage 解析器在大缓冲区时出错
if (node.node.from === node.node.to) {
return null;
}
// 处理自动检测标记
if (langName && langName.endsWith('-a')) {
langName = langName.slice(0, -2); // 移除 '-a' 后缀
}
// 查找对应的语言解析器
if (langName && langName in languageMapping && languageMapping[langName] !== null) {
return {
parser: languageMapping[langName],
};
}
}
return null;
});
}

View File

@@ -0,0 +1,17 @@
// This file was generated by lezer-generator. You probably shouldn't edit it.
import {LRParser} from "@lezer/lr"
import {blockContent} from "./external-tokens.js"
export const parser = LRParser.deserialize({
version: 14,
states: "!jQQOQOOOVOQO'#C`O!dOPO'#C_OOOO'#Cc'#CcQQOQOOOOOO'#Ca'#CaO!iOSO,58zOOOO,58y,58yOOOO-E6a-E6aOOOP1G.f1G.fO!qOSO1G.fOOOP7+$Q7+$Q",
stateData: "!v~OXPO~OYTOZTO[TO]TO^TO_TO`TOaTObTOcTOdTOeTOfTOgTOhTOiTOjTOkTOlTO~OPVO~OUYOmXO~OmZO~O",
goto: "jWPPPX]aPdTROSTQOSRUPQSORWS",
nodeNames: "⚠ BlockContent Document Block BlockDelimiter BlockLanguage Auto",
maxTerm: 29,
skippedNodes: [0],
repeatNodeCount: 1,
tokenData: ",k~R]YZz}!O!e#V#W!p#Z#[#a#[#]#l#^#_$T#a#b%x#d#e'X#f#g([#g#h)R#h#i*O#l#m+q#m#n,SR!PPmQ%&x%&y!SP!VP%&x%&y!YP!]P%&x%&y!`P!eOXP~!hP#T#U!k~!pOU~~!sQ#d#e!y#g#h#U~!|P#d#e#P~#UOe~~#XP#g#h#[~#aOc~~#dP#c#d#g~#lOj~~#oP#h#i#r~#uP#a#b#x~#{P#`#a$O~$TO^~~$WQ#T#U$^#g#h%g~$aP#j#k$d~$gP#T#U$j~$oPa~#g#h$r~$uP#V#W$x~${P#f#g%O~%RP#]#^%U~%XP#d#e%[~%_P#h#i%b~%gOk~~%jP#c#d%m~%pP#b#c%s~%xO[~~%{P#T#U&O~&RQ#f#g&X#h#i&|~&[P#_#`&_~&bP#W#X&e~&hP#c#d&k~&nP#k#l&q~&tP#b#c&w~&|O`~~'PP#[#]'S~'XOZ~~'[Q#[#]'b#m#n'm~'eP#d#e'h~'mOb~~'pP#h#i's~'vP#[#]'y~'|P#c#d(P~(SP#b#c(V~([O]~~(_P#i#j(b~(eQ#U#V(k#g#h(v~(nP#m#n(q~(vOg~~(yP#h#i(|~)ROf~~)UQ#[#])[#e#f)s~)_P#X#Y)b~)eP#`#a)h~)kP#`#a)n~)sOh~~)vP#`#a)y~*OO_~~*RQ#X#Y*X#m#n*j~*[P#l#m*_~*bP#h#i*e~*jOY~~*mP#d#e*p~*sP#X#Y*v~*yP#g#h*|~+PP#V#W+S~+VP#f#g+Y~+]P#]#^+`~+cP#d#e+f~+iP#h#i+l~+qOl~~+tP#a#b+w~+zP#`#a+}~,SOd~~,VP#T#U,Y~,]P#a#b,`~,cP#`#a,f~,kOi~",
tokenizers: [blockContent, 0, 1],
topRules: {"Document":[0,2]},
tokenPrec: 0
})

View File

@@ -0,0 +1,8 @@
// This file was generated by lezer-generator. You probably shouldn't edit it.
export const
BlockContent = 1,
Document = 2,
Block = 3,
BlockDelimiter = 4,
BlockLanguage = 5,
Auto = 6