Files
voidraft/frontend/src/views/editor/extensions/codeblock/lang-detect/autodetect.ts

327 lines
9.6 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 基于 Web Worker 的语言自动检测
*/
import { EditorState, Annotation } from '@codemirror/state';
import { EditorView, ViewPlugin } from '@codemirror/view';
import { redoDepth } from '@codemirror/commands';
import { blockState, getActiveNoteBlock } from '../state';
import { levenshteinDistance } from './levenshtein';
import { LANGUAGES } from '../lang-parser/languages';
import { SupportedLanguage, Block } from '../types';
import { changeLanguageTo } from '../commands';
// ===== 类型定义 =====
/**
* 语言检测配置选项
*/
export interface LanguageDetectionConfig {
minContentLength?: number;
confidenceThreshold?: number;
idleDelay?: number;
defaultLanguage?: SupportedLanguage;
}
/**
* 语言检测结果
*/
export interface LanguageDetectionResult {
language: SupportedLanguage;
confidence: number;
}
/**
* Worker 消息接口
*/
interface WorkerMessage {
content: string;
idx: number;
}
/**
* Worker 响应接口
*/
interface WorkerResponse {
language: string;
confidence: number;
idx: number;
}
// ===== 常量配置 =====
/**
* 默认配置
*/
const DEFAULT_CONFIG = {
minContentLength: 20,
confidenceThreshold: 0.15,
idleDelay: 1000,
defaultLanguage: 'text' as SupportedLanguage,
};
/**
* 创建检测ID到语言token的映射
*/
function createDetectionMap(): Map<string, SupportedLanguage> {
const map = new Map<string, SupportedLanguage>();
LANGUAGES.forEach(lang => {
if (lang.detectIds) {
lang.detectIds.forEach(detectId => {
// 保留首个映射,避免重复 detectId 覆盖更基础的语言,例如 js -> ts。
if (!map.has(detectId)) {
map.set(detectId, lang.token);
}
});
}
});
return map;
}
function createWorkerUrl(): URL {
return new URL(`${import.meta.env.BASE_URL}langdetect-worker.js`, window.location.href);
}
/**
* 检测ID到语言token的映射表
*/
const DETECTION_MAP = createDetectionMap();
// ===== 工具函数 =====
/**
* 兼容性函数requestIdleCallback
*/
function requestIdleCallbackCompat(callback: () => void): number {
if (typeof window !== 'undefined' && window.requestIdleCallback) {
return window.requestIdleCallback(callback);
}
return setTimeout(callback, 0) as any;
}
/**
* 兼容性函数cancelIdleCallback
*/
function cancelIdleCallbackCompat(id: number): void {
if (typeof window !== 'undefined' && window.cancelIdleCallback) {
window.cancelIdleCallback(id);
} else {
clearTimeout(id);
}
}
/**
* 语言更改注解
*/
const languageChangeAnnotation = Annotation.define<boolean>();
// ===== Web Worker 管理器 =====
/**
* 语言检测 Worker 管理器
* 负责 Worker 的生命周期管理和消息通信
*/
class LanguageDetectionWorker {
private worker: Worker | null = null;
private pendingRequests = new Map<number, {
resolve: (result: LanguageDetectionResult) => void;
reject: (error: Error) => void;
}>();
private requestId = 0;
constructor() {
this.initWorker();
}
/**
* 初始化 Worker
*/
private initWorker(): void {
try {
this.worker = new Worker(createWorkerUrl());
this.worker.onmessage = (event) => {
const response: WorkerResponse = event.data;
const request = this.pendingRequests.get(response.idx);
if (request) {
this.pendingRequests.delete(response.idx);
if (response.language) {
request.resolve({
language: response.language as SupportedLanguage,
confidence: response.confidence
});
} else {
request.reject(new Error('No detection result'));
}
}
};
this.worker.onerror = () => {
this.pendingRequests.forEach(request => request.reject(new Error('Worker error')));
this.pendingRequests.clear();
};
} catch (error) {
console.error('Failed to initialize worker:', error);
}
}
/**
* 检测语言
*/
async detectLanguage(content: string): Promise<LanguageDetectionResult> {
if (!this.worker) {
throw new Error('Worker not initialized');
}
return new Promise((resolve, reject) => {
const id = ++this.requestId;
this.pendingRequests.set(id, { resolve, reject });
this.worker!.postMessage({ content, idx: id } as WorkerMessage);
// 5秒超时
setTimeout(() => {
if (this.pendingRequests.has(id)) {
this.pendingRequests.delete(id);
reject(new Error('Detection timeout'));
}
}, 5000);
});
}
/**
* 销毁 Worker
*/
destroy(): void {
if (this.worker) {
this.worker.terminate();
this.worker = null;
}
this.pendingRequests.clear();
}
}
// ===== 语言检测插件 =====
/**
* 创建语言检测插件
*/
export function createLanguageDetection(config: LanguageDetectionConfig = {}): ViewPlugin<any> {
const finalConfig = { ...DEFAULT_CONFIG, ...config };
const contentCache = new Map<number, string>();
let idleCallbackId: number | null = null;
let worker: LanguageDetectionWorker | null = null;
return ViewPlugin.fromClass(
class LanguageDetectionPlugin {
constructor(public view: EditorView) {
worker = new LanguageDetectionWorker();
}
update(update: any) {
if (update.docChanged && !update.transactions.some((tr: any) =>
tr.annotation(languageChangeAnnotation))) {
if (idleCallbackId !== null) {
cancelIdleCallbackCompat(idleCallbackId);
}
idleCallbackId = requestIdleCallbackCompat(() => {
this.performDetection(update.state);
});
}
}
private performDetection(state: EditorState): void {
const block = getActiveNoteBlock(state);
if (!block || !block.language.auto) return;
const blocks = state.field(blockState);
const blockIndex = blocks.indexOf(block);
const content = state.doc.sliceString(block.content.from, block.content.to);
// 内容为空时重置为默认语言
if (content === "" && redoDepth(state) === 0) {
if (block.language.name !== finalConfig.defaultLanguage) {
changeLanguageTo(state, this.view.dispatch, block, finalConfig.defaultLanguage, true);
}
contentCache.delete(blockIndex);
return;
}
// 内容太短则跳过
if (content.length <= finalConfig.minContentLength) return;
// 检查内容变化
const cachedContent = contentCache.get(blockIndex);
if (cachedContent && levenshteinDistance(cachedContent, content) < content.length * 0.1) {
return;
}
this.detectAndUpdate(content, block, blockIndex, state);
}
private async detectAndUpdate(content: string, block: Block, blockIndex: number, state: EditorState): Promise<void> {
if (!worker) return;
try {
const result = await worker.detectLanguage(content);
// 使用检测映射表将检测结果转换为我们支持的语言
const mappedLanguage = DETECTION_MAP.get(result.language);
if (mappedLanguage &&
result.confidence >= finalConfig.confidenceThreshold &&
mappedLanguage !== block.language.name) {
// 只有在用户没有撤销操作时才更改语言
if (redoDepth(state) === 0) {
changeLanguageTo(state, this.view.dispatch, block, mappedLanguage, true);
}
}
contentCache.set(blockIndex, content);
} catch (error) {
console.warn('Language detection failed:', error);
}
}
destroy() {
if (idleCallbackId !== null) {
cancelIdleCallbackCompat(idleCallbackId);
}
if (worker) {
worker.destroy();
worker = null;
}
contentCache.clear();
}
}
);
}
// ===== 公共 API =====
/**
* 手动检测单个内容的语言
*/
export async function detectLanguage(content: string): Promise<LanguageDetectionResult> {
const worker = new LanguageDetectionWorker();
try {
return await worker.detectLanguage(content);
} finally {
worker.destroy();
}
}
/**
* 批量检测多个内容的语言
*/
export async function detectLanguages(contents: string[]): Promise<LanguageDetectionResult[]> {
const worker = new LanguageDetectionWorker();
try {
return await Promise.all(contents.map(content => worker.detectLanguage(content)));
} finally {
worker.destroy();
}
}