/** * 基于 Web Worker 的语言自动检测 */ import { EditorState, Annotation } from '@codemirror/state'; import { EditorView, ViewPlugin } from '@codemirror/view'; import { redoDepth } from '@codemirror/commands'; import { blockState, getActiveNoteBlock } from '../state'; import { levenshteinDistance } from './levenshtein'; import { LANGUAGES } from '../lang-parser/languages'; import { SupportedLanguage, Block } from '../types'; import { changeLanguageTo } from '../commands'; // ===== 类型定义 ===== /** * 语言检测配置选项 */ export interface LanguageDetectionConfig { minContentLength?: number; confidenceThreshold?: number; idleDelay?: number; defaultLanguage?: SupportedLanguage; } /** * 语言检测结果 */ export interface LanguageDetectionResult { language: SupportedLanguage; confidence: number; } /** * Worker 消息接口 */ interface WorkerMessage { content: string; idx: number; } /** * Worker 响应接口 */ interface WorkerResponse { language: string; confidence: number; idx: number; } // ===== 常量配置 ===== /** * 默认配置 */ const DEFAULT_CONFIG = { minContentLength: 20, confidenceThreshold: 0.15, idleDelay: 1000, defaultLanguage: 'text' as SupportedLanguage, }; /** * 创建检测ID到语言token的映射 */ function createDetectionMap(): Map { const map = new Map(); LANGUAGES.forEach(lang => { if (lang.detectIds) { lang.detectIds.forEach(detectId => { // 保留首个映射,避免重复 detectId 覆盖更基础的语言,例如 js -> ts。 if (!map.has(detectId)) { map.set(detectId, lang.token); } }); } }); return map; } function createWorkerUrl(): URL { return new URL(`${import.meta.env.BASE_URL}langdetect-worker.js`, window.location.href); } /** * 检测ID到语言token的映射表 */ const DETECTION_MAP = createDetectionMap(); // ===== 工具函数 ===== /** * 兼容性函数:requestIdleCallback */ function requestIdleCallbackCompat(callback: () => void): number { if (typeof window !== 'undefined' && window.requestIdleCallback) { return window.requestIdleCallback(callback); } return setTimeout(callback, 0) as any; } /** * 兼容性函数:cancelIdleCallback */ function cancelIdleCallbackCompat(id: number): void { if (typeof window !== 'undefined' && window.cancelIdleCallback) { window.cancelIdleCallback(id); } else { clearTimeout(id); } } /** * 语言更改注解 */ const languageChangeAnnotation = Annotation.define(); // ===== Web Worker 管理器 ===== /** * 语言检测 Worker 管理器 * 负责 Worker 的生命周期管理和消息通信 */ class LanguageDetectionWorker { private worker: Worker | null = null; private pendingRequests = new Map void; reject: (error: Error) => void; }>(); private requestId = 0; constructor() { this.initWorker(); } /** * 初始化 Worker */ private initWorker(): void { try { this.worker = new Worker(createWorkerUrl()); this.worker.onmessage = (event) => { const response: WorkerResponse = event.data; const request = this.pendingRequests.get(response.idx); if (request) { this.pendingRequests.delete(response.idx); if (response.language) { request.resolve({ language: response.language as SupportedLanguage, confidence: response.confidence }); } else { request.reject(new Error('No detection result')); } } }; this.worker.onerror = () => { this.pendingRequests.forEach(request => request.reject(new Error('Worker error'))); this.pendingRequests.clear(); }; } catch (error) { console.error('Failed to initialize worker:', error); } } /** * 检测语言 */ async detectLanguage(content: string): Promise { if (!this.worker) { throw new Error('Worker not initialized'); } return new Promise((resolve, reject) => { const id = ++this.requestId; this.pendingRequests.set(id, { resolve, reject }); this.worker!.postMessage({ content, idx: id } as WorkerMessage); // 5秒超时 setTimeout(() => { if (this.pendingRequests.has(id)) { this.pendingRequests.delete(id); reject(new Error('Detection timeout')); } }, 5000); }); } /** * 销毁 Worker */ destroy(): void { if (this.worker) { this.worker.terminate(); this.worker = null; } this.pendingRequests.clear(); } } // ===== 语言检测插件 ===== /** * 创建语言检测插件 */ export function createLanguageDetection(config: LanguageDetectionConfig = {}): ViewPlugin { const finalConfig = { ...DEFAULT_CONFIG, ...config }; const contentCache = new Map(); let idleCallbackId: number | null = null; let worker: LanguageDetectionWorker | null = null; return ViewPlugin.fromClass( class LanguageDetectionPlugin { constructor(public view: EditorView) { worker = new LanguageDetectionWorker(); } update(update: any) { if (update.docChanged && !update.transactions.some((tr: any) => tr.annotation(languageChangeAnnotation))) { if (idleCallbackId !== null) { cancelIdleCallbackCompat(idleCallbackId); } idleCallbackId = requestIdleCallbackCompat(() => { this.performDetection(update.state); }); } } private performDetection(state: EditorState): void { const block = getActiveNoteBlock(state); if (!block || !block.language.auto) return; const blocks = state.field(blockState); const blockIndex = blocks.indexOf(block); const content = state.doc.sliceString(block.content.from, block.content.to); // 内容为空时重置为默认语言 if (content === "" && redoDepth(state) === 0) { if (block.language.name !== finalConfig.defaultLanguage) { changeLanguageTo(state, this.view.dispatch, block, finalConfig.defaultLanguage, true); } contentCache.delete(blockIndex); return; } // 内容太短则跳过 if (content.length <= finalConfig.minContentLength) return; // 检查内容变化 const cachedContent = contentCache.get(blockIndex); if (cachedContent && levenshteinDistance(cachedContent, content) < content.length * 0.1) { return; } this.detectAndUpdate(content, block, blockIndex, state); } private async detectAndUpdate(content: string, block: Block, blockIndex: number, state: EditorState): Promise { if (!worker) return; try { const result = await worker.detectLanguage(content); // 使用检测映射表将检测结果转换为我们支持的语言 const mappedLanguage = DETECTION_MAP.get(result.language); if (mappedLanguage && result.confidence >= finalConfig.confidenceThreshold && mappedLanguage !== block.language.name) { // 只有在用户没有撤销操作时才更改语言 if (redoDepth(state) === 0) { changeLanguageTo(state, this.view.dispatch, block, mappedLanguage, true); } } contentCache.set(blockIndex, content); } catch (error) { console.warn('Language detection failed:', error); } } destroy() { if (idleCallbackId !== null) { cancelIdleCallbackCompat(idleCallbackId); } if (worker) { worker.destroy(); worker = null; } contentCache.clear(); } } ); } // ===== 公共 API ===== /** * 手动检测单个内容的语言 */ export async function detectLanguage(content: string): Promise { const worker = new LanguageDetectionWorker(); try { return await worker.detectLanguage(content); } finally { worker.destroy(); } } /** * 批量检测多个内容的语言 */ export async function detectLanguages(contents: string[]): Promise { const worker = new LanguageDetectionWorker(); try { return await Promise.all(contents.map(content => worker.detectLanguage(content))); } finally { worker.destroy(); } }