package services // Edit 表示编辑操作类型 type EditType int const ( // EditInsert 插入操作 EditInsert EditType = iota // EditDelete 删除操作 EditDelete // EditEqual 相等部分 EditEqual ) // Edit 表示单个编辑操作 type Edit struct { Type EditType // 操作类型 Content string // 操作内容 } // DiffResult 包含差异比较的结果信息 type DiffResult struct { Edits []Edit // 编辑操作列表 InsertCount int // 插入的字符数 DeleteCount int // 删除的字符数 ChangedLines int // 变更的行数 TotalChanges int // 总变更字符数(插入+删除) ChangedTokens int // 变更的token数(如单词、标识符等) } // calculateChangesDetailed 使用Myers差分算法计算两个字符串之间的具体变更 func calculateChangesDetailed(oldText, newText string) DiffResult { // 将文本分割成行 oldLines := splitLines(oldText) newLines := splitLines(newText) // 计算行级别的差异 edits := computeLineEdits(oldLines, newLines) // 计算变更统计 result := DiffResult{ Edits: edits, } // 统计变更 for _, edit := range edits { switch edit.Type { case EditInsert: result.InsertCount += len(edit.Content) result.ChangedLines++ case EditDelete: result.DeleteCount += len(edit.Content) result.ChangedLines++ } } result.TotalChanges = result.InsertCount + result.DeleteCount result.ChangedTokens = estimateChangedTokens(edits) return result } // splitLines 将文本分割成行 func splitLines(text string) []string { var lines []string var currentLine string for _, char := range text { if char == '\n' { lines = append(lines, currentLine) currentLine = "" } else { currentLine += string(char) } } // 添加最后一行(如果不是以换行符结尾) if currentLine != "" { lines = append(lines, currentLine) } return lines } // computeLineEdits 使用Myers差分算法计算行级别的差异 func computeLineEdits(oldLines, newLines []string) []Edit { var edits []Edit // 使用Myers差分算法计算行级别的差异 script := myersDiff(oldLines, newLines) // 将差异脚本转换为编辑操作 for _, op := range script { switch op.Type { case EditEqual: edits = append(edits, Edit{ Type: EditEqual, Content: oldLines[op.OldStart], }) case EditDelete: edits = append(edits, Edit{ Type: EditDelete, Content: oldLines[op.OldStart], }) case EditInsert: edits = append(edits, Edit{ Type: EditInsert, Content: newLines[op.NewStart], }) } } return edits } // DiffOp 表示差分操作 type DiffOp struct { Type EditType OldStart int OldEnd int NewStart int NewEnd int } // myersDiff 实现Myers差分算法 func myersDiff(oldLines, newLines []string) []DiffOp { // 基本思路:Myers差分算法通过建立编辑图来寻找最短编辑路径 // 简化版实现 var script []DiffOp oldLen := len(oldLines) newLen := len(newLines) // 使用动态规划找出最长公共子序列(LCS) lcs := longestCommonSubsequence(oldLines, newLines) // 根据LCS构建差分脚本 oldIndex, newIndex := 0, 0 for _, entry := range lcs { // 处理LCS之前的差异 for oldIndex < entry.OldIndex { script = append(script, DiffOp{ Type: EditDelete, OldStart: oldIndex, OldEnd: oldIndex + 1, NewStart: newIndex, NewEnd: newIndex, }) oldIndex++ } for newIndex < entry.NewIndex { script = append(script, DiffOp{ Type: EditInsert, OldStart: oldIndex, OldEnd: oldIndex, NewStart: newIndex, NewEnd: newIndex + 1, }) newIndex++ } // 处理相等部分 script = append(script, DiffOp{ Type: EditEqual, OldStart: oldIndex, OldEnd: oldIndex + 1, NewStart: newIndex, NewEnd: newIndex + 1, }) oldIndex++ newIndex++ } // 处理剩余差异 for oldIndex < oldLen { script = append(script, DiffOp{ Type: EditDelete, OldStart: oldIndex, OldEnd: oldIndex + 1, NewStart: newIndex, NewEnd: newIndex, }) oldIndex++ } for newIndex < newLen { script = append(script, DiffOp{ Type: EditInsert, OldStart: oldIndex, OldEnd: oldIndex, NewStart: newIndex, NewEnd: newIndex + 1, }) newIndex++ } return script } // LCSEntry 表示最长公共子序列中的一个条目 type LCSEntry struct { OldIndex int NewIndex int } // longestCommonSubsequence 寻找两个字符串数组的最长公共子序列 func longestCommonSubsequence(oldLines, newLines []string) []LCSEntry { oldLen := len(oldLines) newLen := len(newLines) // 创建动态规划表 dp := make([][]int, oldLen+1) for i := range dp { dp[i] = make([]int, newLen+1) } // 填充DP表 for i := 1; i <= oldLen; i++ { for j := 1; j <= newLen; j++ { if oldLines[i-1] == newLines[j-1] { dp[i][j] = dp[i-1][j-1] + 1 } else { dp[i][j] = max(dp[i-1][j], dp[i][j-1]) } } } // 回溯找出LCS var lcs []LCSEntry i, j := oldLen, newLen for i > 0 && j > 0 { if oldLines[i-1] == newLines[j-1] { lcs = append([]LCSEntry{{OldIndex: i - 1, NewIndex: j - 1}}, lcs...) i-- j-- } else if dp[i-1][j] > dp[i][j-1] { i-- } else { j-- } } return lcs } // max 返回两个整数中的较大值 func max(a, b int) int { if a > b { return a } return b } // estimateChangedTokens 估计变更的token数量 // 这里使用简单的单词分割来估计 func estimateChangedTokens(edits []Edit) int { tokenCount := 0 for _, edit := range edits { switch edit.Type { case EditInsert, EditDelete: // 简单地将内容按空白字符分割成单词 words := splitIntoWords(edit.Content) tokenCount += len(words) } } return tokenCount } // splitIntoWords 将文本分割成单词 func splitIntoWords(text string) []string { var words []string var currentWord string // 简单的状态机: // - 如果是字母、数字或下划线,添加到当前单词 // - 否则,结束当前单词并开始新单词 for _, char := range text { if isWordChar(char) { currentWord += string(char) } else { if currentWord != "" { words = append(words, currentWord) currentWord = "" } } } // 添加最后一个单词(如果有) if currentWord != "" { words = append(words, currentWord) } return words } // isWordChar 判断字符是否是单词字符(字母、数字或下划线) func isWordChar(char rune) bool { return (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') || (char >= '0' && char <= '9') || char == '_' }