diff --git a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordContext.java b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordContext.java index 8c47988..831c0b3 100644 --- a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordContext.java +++ b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordContext.java @@ -1,257 +1,276 @@ -//package com.schisandra.share.application.sensitive; -// -//import com.alibaba.fastjson.JSON; -// -//import com.schisandra.share.common.enums.IsDeletedFlagEnum; -//import lombok.extern.slf4j.Slf4j; -//import org.springframework.util.CollectionUtils; -// -//import java.util.*; -//import java.util.concurrent.Executors; -//import java.util.concurrent.ScheduledExecutorService; -//import java.util.concurrent.TimeUnit; -// -///** -// * 词库上下文环境 -// *

-// * 初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型 -// * -// * @author minghu.zhang -// */ -//@SuppressWarnings({"rawtypes", "unchecked"}) -//@Slf4j -//public class WordContext { -// -// /** -// * 敏感词字典 -// */ -// private final Map wordMap = new HashMap(1024); -// -// /** -// * 是否已初始化 -// */ -// private boolean init; -// -// private long addLastId; -// -// public WordContext(boolean autoLoad, SensitiveWordsService service) { -// clearDelData(service); -// Set black = new HashSet<>(); -// Set white = new HashSet<>(); -// List list = service.list(Wrappers.lambdaQuery().eq(SensitiveWords::getIsDeleted, IsDeletedFlagEnum.UN_DELETED.getCode())); -// for (SensitiveWords words : list) { -// if (words.getType() == 1) { -// black.add(words.getWords()); -// } else { -// white.add(words.getWords()); -// } -// } -// if (CollectionUtils.isNotEmpty(list)) { -// this.addLastId = list.get(list.size() - 1).getId(); -// } -// initKeyWord(black, white); -// if (autoLoad) { -// reloadWord(service); -// } -// } -// -// private void clearDelData(SensitiveWordsService service) { -// LambdaUpdateWrapper remove = Wrappers.lambdaUpdate().eq(SensitiveWords::getIsDeleted, IsDeletedFlagEnum.DELETED.getCode()); -// service.remove(remove); -// } -// -// private void reloadWord(SensitiveWordsService service) { -// -// // 创建一个单线程的定时线程池 -// ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(); -// // 创建一个Runnable任务 -// Runnable task = () -> { -// try { -// addNewWords(service); -// removeDelWords(service); -// } catch (Exception e) { -// log.error("Sensitive words task error", e); -// } -// }; -// // 定时执行任务,初始延迟0,之后每分钟执行一次 -// scheduler.scheduleAtFixedRate(task, 0, 1, TimeUnit.MINUTES); -// -// } -// -// private void removeDelWords(SensitiveWordsService service) { -// LambdaUpdateWrapper query = Wrappers.lambdaUpdate() -// .eq(SensitiveWords::getIsDeleted, IsDeletedFlagEnum.DELETED.getCode()); -// List list = service.list(query); -// if (CollectionUtils.isNotEmpty(list)) { -// log.info("removeDelWords {}", JSON.toJSON(list)); -// Set black = new HashSet<>(); -// Set white = new HashSet<>(); -// for (SensitiveWords words : list) { -// if (words.getType() == 1) { -// black.add(words.getWords()); -// } else { -// white.add(words.getWords()); -// } -// } -// removeWord(black, WordType.BLACK); -// removeWord(white, WordType.WHITE); -// } -// } -// -// private void addNewWords(SensitiveWordsService service) { -// LambdaUpdateWrapper query = Wrappers.lambdaUpdate() -// .gt(SensitiveWords::getId, addLastId) -// .eq(SensitiveWords::getIsDeleted, IsDeletedFlagEnum.UN_DELETED.getCode()); -// List list = service.list(query); -// if (CollectionUtils.isNotEmpty(list)) { -// log.info("addNewWords {}", JSON.toJSON(list)); -// this.addLastId = list.get(list.size() - 1).getId(); -// Set black = new HashSet<>(); -// Set white = new HashSet<>(); -// for (SensitiveWords words : list) { -// if (words.getType() == 1) { -// black.add(words.getWords()); -// } else { -// white.add(words.getWords()); -// } -// } -// addWord(black, WordType.BLACK); -// addWord(white, WordType.WHITE); -// } -// } -// -// /** -// * 获取初始化的敏感词列表 -// * -// * @return 敏感词列表 -// */ -// public Map getWordMap() { -// return wordMap; -// } -// -// /** -// * 初始化 -// */ -// private synchronized void initKeyWord(Set black, Set white) { -// try { -// if (!init) { -// // 将敏感词库加入到HashMap中 -// addWord(black, WordType.BLACK); -// // 将非敏感词库也加入到HashMap中 -// addWord(white, WordType.WHITE); -// } -// init = true; -// } catch (Exception e) { -// throw new RuntimeException(e); -// } -// } -// -// /** -// * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
-// * 中 = { isEnd = 0 国 = {
-// * isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd = 1 } -// * } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1 } } } } -// */ -// public void addWord(Collection wordList, WordType wordType) { -// if (CollectionUtils.isEmpty(wordList)) { -// return; -// } -// Map nowMap; -// Map newWorMap; -// // 迭代keyWordSet -// for (String key : wordList) { -// nowMap = wordMap; -// for (int i = 0; i < key.length(); i++) { -// // 转换成char型 -// char keyChar = key.charAt(i); -// // 获取 -// Object wordMap = nowMap.get(keyChar); -// // 如果存在该key,直接赋值 -// if (wordMap != null) { -// nowMap = (Map) wordMap; -// } else { -// // 不存在则构建一个map,同时将isEnd设置为0,因为他不是最后一个 -// newWorMap = new HashMap<>(4); -// // 不是最后一个 -// newWorMap.put("isEnd", String.valueOf(EndType.HAS_NEXT.ordinal())); -// nowMap.put(keyChar, newWorMap); -// nowMap = newWorMap; -// } -// -// if (i == key.length() - 1) { -// // 最后一个 -// nowMap.put("isEnd", String.valueOf(EndType.IS_END.ordinal())); -// nowMap.put("isWhiteWord", String.valueOf(wordType.ordinal())); -// } -// } -// } -// } -// -// /** -// * 在线删除敏感词 -// * -// * @param wordList 敏感词列表 -// * @param wordType 黑名单 BLACk,白名单WHITE -// */ -// public void removeWord(Collection wordList, WordType wordType) { -// if (CollectionUtils.isEmpty(wordList)) { -// return; -// } -// Map nowMap; -// for (String key : wordList) { -// List cacheList = new ArrayList<>(); -// nowMap = wordMap; -// for (int i = 0; i < key.length(); i++) { -// char keyChar = key.charAt(i); -// -// Object map = nowMap.get(keyChar); -// if (map != null) { -// nowMap = (Map) map; -// cacheList.add(nowMap); -// } else { -// return; -// } -// -// if (i == key.length() - 1) { -// char[] keys = key.toCharArray(); -// boolean cleanable = false; -// char lastChar = 0; -// for (int j = cacheList.size() - 1; j >= 0; j--) { -// Map cacheMap = cacheList.get(j); -// if (j == cacheList.size() - 1) { -// if (String.valueOf(WordType.BLACK.ordinal()).equals(cacheMap.get("isWhiteWord"))) { -// if (wordType == WordType.WHITE) { -// return; -// } -// } -// if (String.valueOf(WordType.WHITE.ordinal()).equals(cacheMap.get("isWhiteWord"))) { -// if (wordType == WordType.BLACK) { -// return; -// } -// } -// cacheMap.remove("isWhiteWord"); -// cacheMap.remove("isEnd"); -// if (cacheMap.size() == 0) { -// cleanable = true; -// continue; -// } -// } -// if (cleanable) { -// Object isEnd = cacheMap.get("isEnd"); -// if (String.valueOf(EndType.IS_END.ordinal()).equals(isEnd)) { -// cleanable = false; -// } -// cacheMap.remove(lastChar); -// } -// lastChar = keys[j]; -// } -// -// if (cleanable) { -// wordMap.remove(lastChar); -// } -// } -// } -// } -// } -// -//} +package com.schisandra.share.application.sensitive; + +import com.alibaba.fastjson.JSON; + +import com.mybatisflex.core.query.QueryWrapper; +import com.mybatisflex.core.util.CollectionUtil; +import com.schisandra.share.common.enums.IsDeletedFlagEnum; +import com.schisandra.share.infra.basic.dao.SchisandraShareSensitiveWordsDao; +import com.schisandra.share.infra.basic.entity.SchisandraShareSensitiveWords; +import com.schisandra.share.infra.basic.entity.table.SchisandraShareSensitiveWordsTableDef; +import com.schisandra.share.infra.basic.service.SchisandraShareSensitiveWordsService; +import lombok.extern.slf4j.Slf4j; +import org.springframework.util.CollectionUtils; + +import javax.annotation.Resource; +import java.util.*; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +/** + * 词库上下文环境 + *

+ * 初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型 + * + * @author minghu.zhang + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +@Slf4j +public class WordContext { + + /** + * 敏感词字典 + */ + private final Map wordMap = new HashMap(1024); + + /** + * 是否已初始化 + */ + private boolean init; + + private long addLastId; + + @Resource + private SchisandraShareSensitiveWordsDao schisandraShareSensitiveWordsDao; + + public WordContext(boolean autoLoad) { + clearDelData(); + Set black = new HashSet<>(); + Set white = new HashSet<>(); + + QueryWrapper query = new QueryWrapper(); + query.where(SchisandraShareSensitiveWordsTableDef.SCHISANDRA_SHARE_SENSITIVE_WORDS.IS_DELETED.eq(IsDeletedFlagEnum.UN_DELETED.getCode())); + List list = schisandraShareSensitiveWordsDao.selectListByQuery(query); + + for (SchisandraShareSensitiveWords words : list) { + if (words.getType() == 1) { + black.add(words.getWords()); + } else { + white.add(words.getWords()); + } + } + if (CollectionUtil.isNotEmpty(list)) { + this.addLastId = Long.parseLong(list.get(list.size() - 1).getId()); + } + initKeyWord(black, white); + if (autoLoad) { + reloadWord(); + } + } + + private void clearDelData() { + QueryWrapper query = new QueryWrapper(); + query.where(SchisandraShareSensitiveWordsTableDef.SCHISANDRA_SHARE_SENSITIVE_WORDS.IS_DELETED.eq(IsDeletedFlagEnum.DELETED.getCode())); + List list = schisandraShareSensitiveWordsDao.selectListByQuery(query); + schisandraShareSensitiveWordsDao.deleteBatchByIds(list.stream().map(SchisandraShareSensitiveWords::getId).collect(Collectors.toList())); + + } + + private void reloadWord() { + + // 创建一个单线程的定时线程池 + ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(); + // 创建一个Runnable任务 + Runnable task = () -> { + try { + addNewWords(); + removeDelWords(); + } catch (Exception e) { + log.error("Sensitive words task error", e); + } + }; + // 定时执行任务,初始延迟0,之后每分钟执行一次 + scheduler.scheduleAtFixedRate(task, 0, 1, TimeUnit.MINUTES); + + } + + private void removeDelWords() { + QueryWrapper query = new QueryWrapper(); + query.where(SchisandraShareSensitiveWordsTableDef.SCHISANDRA_SHARE_SENSITIVE_WORDS.IS_DELETED.eq(IsDeletedFlagEnum.DELETED.getCode())); + List list = schisandraShareSensitiveWordsDao.selectListByQuery(query); + + if (CollectionUtil.isNotEmpty(list)) { + log.info("removeDelWords {}", JSON.toJSON(list)); + Set black = new HashSet<>(); + Set white = new HashSet<>(); + for (SchisandraShareSensitiveWords words : list) { + if (words.getType() == 1) { + black.add(words.getWords()); + } else { + white.add(words.getWords()); + } + } + removeWord(black, WordType.BLACK); + removeWord(white, WordType.WHITE); + } + } + + private void addNewWords() { + QueryWrapper query = new QueryWrapper(); + query.where(SchisandraShareSensitiveWordsTableDef.SCHISANDRA_SHARE_SENSITIVE_WORDS.ID.gt(addLastId).and(SchisandraShareSensitiveWordsTableDef.SCHISANDRA_SHARE_SENSITIVE_WORDS.IS_DELETED.eq(IsDeletedFlagEnum.UN_DELETED.getCode()))); + List list = schisandraShareSensitiveWordsDao.selectListByQuery(query); + + if (CollectionUtil.isNotEmpty(list)) { + log.info("addNewWords {}", JSON.toJSON(list)); + this.addLastId = Long.parseLong(list.get(list.size() - 1).getId()); + Set black = new HashSet<>(); + Set white = new HashSet<>(); + for (SchisandraShareSensitiveWords words : list) { + if (words.getType() == 1) { + black.add(words.getWords()); + } else { + white.add(words.getWords()); + } + } + addWord(black, WordType.BLACK); + addWord(white, WordType.WHITE); + } + } + + /** + * 获取初始化的敏感词列表 + * + * @return 敏感词列表 + */ + public Map getWordMap() { + return wordMap; + } + + /** + * 初始化 + */ + private synchronized void initKeyWord(Set black, Set white) { + try { + if (!init) { + // 将敏感词库加入到HashMap中 + addWord(black, WordType.BLACK); + // 将非敏感词库也加入到HashMap中 + addWord(white, WordType.WHITE); + } + init = true; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
+ * 中 = { isEnd = 0 国 = {
+ * isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd = 1 } + * } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1 } } } } + */ + public void addWord(Collection wordList, WordType wordType) { + if (CollectionUtils.isEmpty(wordList)) { + return; + } + Map nowMap; + Map newWorMap; + // 迭代keyWordSet + for (String key : wordList) { + nowMap = wordMap; + for (int i = 0; i < key.length(); i++) { + // 转换成char型 + char keyChar = key.charAt(i); + // 获取 + Object wordMap = nowMap.get(keyChar); + // 如果存在该key,直接赋值 + if (wordMap != null) { + nowMap = (Map) wordMap; + } else { + // 不存在则构建一个map,同时将isEnd设置为0,因为他不是最后一个 + newWorMap = new HashMap<>(4); + // 不是最后一个 + newWorMap.put("isEnd", String.valueOf(EndType.HAS_NEXT.ordinal())); + nowMap.put(keyChar, newWorMap); + nowMap = newWorMap; + } + + if (i == key.length() - 1) { + // 最后一个 + nowMap.put("isEnd", String.valueOf(EndType.IS_END.ordinal())); + nowMap.put("isWhiteWord", String.valueOf(wordType.ordinal())); + } + } + } + } + + /** + * 在线删除敏感词 + * + * @param wordList 敏感词列表 + * @param wordType 黑名单 BLACk,白名单WHITE + */ + public void removeWord(Collection wordList, WordType wordType) { + if (CollectionUtils.isEmpty(wordList)) { + return; + } + Map nowMap; + for (String key : wordList) { + List cacheList = new ArrayList<>(); + nowMap = wordMap; + for (int i = 0; i < key.length(); i++) { + char keyChar = key.charAt(i); + + Object map = nowMap.get(keyChar); + if (map != null) { + nowMap = (Map) map; + cacheList.add(nowMap); + } else { + return; + } + + if (i == key.length() - 1) { + char[] keys = key.toCharArray(); + boolean cleanable = false; + char lastChar = 0; + for (int j = cacheList.size() - 1; j >= 0; j--) { + Map cacheMap = cacheList.get(j); + if (j == cacheList.size() - 1) { + if (String.valueOf(WordType.BLACK.ordinal()).equals(cacheMap.get("isWhiteWord"))) { + if (wordType == WordType.WHITE) { + return; + } + } + if (String.valueOf(WordType.WHITE.ordinal()).equals(cacheMap.get("isWhiteWord"))) { + if (wordType == WordType.BLACK) { + return; + } + } + cacheMap.remove("isWhiteWord"); + cacheMap.remove("isEnd"); + if (cacheMap.size() == 0) { + cleanable = true; + continue; + } + } + if (cleanable) { + Object isEnd = cacheMap.get("isEnd"); + if (String.valueOf(EndType.IS_END.ordinal()).equals(isEnd)) { + cleanable = false; + } + cacheMap.remove(lastChar); + } + lastChar = keys[j]; + } + + if (cleanable) { + wordMap.remove(lastChar); + } + } + } + } + } + +} diff --git a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordFilter.java b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordFilter.java index a601481..11540eb 100644 --- a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordFilter.java +++ b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordFilter.java @@ -1,222 +1,224 @@ -//package com.schisandra.share.application.sensitive; -// -// -//import java.util.ArrayList; -//import java.util.List; -//import java.util.Map; -//import java.util.Objects; -// -///** -// * 敏感词过滤器 -// * -// * @author minghu.zhang -// */ -//@SuppressWarnings("rawtypes") -//public class WordFilter { -// -// /** -// * 敏感词表 -// */ -// private final Map wordMap; -// -// /** -// * 构造函数 -// */ -// public WordFilter(WordContext context) { -// this.wordMap = context.getWordMap(); -// } -// -// /** -// * 替换敏感词 -// * -// * @param text 输入文本 -// */ -// public String replace(final String text) { -// return replace(text, 0, '*'); -// } -// -// /** -// * 替换敏感词 -// * -// * @param text 输入文本 -// * @param symbol 替换符号 -// */ -// public String replace(final String text, final char symbol) { -// return replace(text, 0, symbol); -// } -// -// /** -// * 替换敏感词 -// * -// * @param text 输入文本 -// * @param skip 文本距离 -// * @param symbol 替换符号 -// */ -// public String replace(final String text, final int skip, final char symbol) { -// char[] charset = text.toCharArray(); -// for (int i = 0; i < charset.length; i++) { -// FlagIndex fi = getFlagIndex(charset, i, skip); -// if (fi.isFlag()) { -// if (!fi.isWhiteWord()) { -// for (int j : fi.getIndex()) { -// charset[j] = symbol; -// } -// } else { -// i += fi.getIndex().size() - 1; -// } -// } -// } -// return new String(charset); -// } -// -// /** -// * 是否包含敏感词 -// * -// * @param text 输入文本 -// */ -// public boolean include(final String text) { -// return include(text, 0); -// } -// -// /** -// * 是否包含敏感词 -// * -// * @param text 输入文本 -// * @param skip 文本距离 -// */ -// public boolean include(final String text, final int skip) { -// boolean include = false; -// char[] charset = text.toCharArray(); -// for (int i = 0; i < charset.length; i++) { -// FlagIndex fi = getFlagIndex(charset, i, skip); -// if (fi.isFlag()) { -// if (fi.isWhiteWord()) { -// i += fi.getIndex().size() - 1; -// } else { -// include = true; -// break; -// } -// } -// } -// return include; -// } -// -// /** -// * 获取敏感词数量 -// * -// * @param text 输入文本 -// */ -// public int wordCount(final String text) { -// return wordCount(text, 0); -// } -// -// /** -// * 获取敏感词数量 -// * -// * @param text 输入文本 -// * @param skip 文本距离 -// */ -// public int wordCount(final String text, final int skip) { -// int count = 0; -// char[] charset = text.toCharArray(); -// for (int i = 0; i < charset.length; i++) { -// FlagIndex fi = getFlagIndex(charset, i, skip); -// if (fi.isFlag()) { -// if (fi.isWhiteWord()) { -// i += fi.getIndex().size() - 1; -// } else { -// count++; -// } -// } -// } -// return count; -// } -// -// public void check(final String text) { -// List wordList = wordList(text); -// if (CollectionUtils.isNotEmpty(wordList)) { -// throw new IllegalArgumentException(String.format("内容包含敏感词 【%s】", String.join("、", wordList))); -// } -// } -// -// /** -// * 获取敏感词列表 -// * -// * @param text 输入文本 -// */ -// public List wordList(final String text) { -// return wordList(text, 0); -// } -// -// /** -// * 获取敏感词列表 -// * -// * @param text 输入文本 -// * @param skip 文本距离 -// */ -// public List wordList(final String text, final int skip) { -// List wordList = new ArrayList<>(); -// char[] charset = text.toCharArray(); -// for (int i = 0; i < charset.length; i++) { -// FlagIndex fi = getFlagIndex(charset, i, skip); -// if (fi.isFlag()) { -// if (fi.isWhiteWord()) { -// i += fi.getIndex().size() - 1; -// } else { -// StringBuilder builder = new StringBuilder(); -// for (int j : fi.getIndex()) { -// char word = text.charAt(j); -// builder.append(word); -// } -// wordList.add(builder.toString()); -// } -// } -// } -// return wordList; -// } -// -// /** -// * 获取标记索引 -// * -// * @param charset 输入文本 -// * @param begin 检测起始 -// * @param skip 文本距离 -// */ -// private FlagIndex getFlagIndex(final char[] charset, final int begin, final int skip) { -// FlagIndex fi = new FlagIndex(); -// -// Map current = wordMap; -// boolean flag = false; -// int count = 0; -// List index = new ArrayList<>(); -// for (int i = begin; i < charset.length; i++) { -// char word = charset[i]; -// Map mapTree = (Map) current.get(word); -// if (count > skip || (i == begin && Objects.isNull(mapTree))) { -// break; -// } -// if (Objects.nonNull(mapTree)) { -// current = mapTree; -// count = 0; -// index.add(i); -// } else { -// count++; -// if (flag && count > skip) { -// break; -// } -// } -// if ("1".equals(current.get("isEnd"))) { -// flag = true; -// } -// if ("1".equals(current.get("isWhiteWord"))) { -// fi.setWhiteWord(true); -// break; -// } -// } -// -// fi.setFlag(flag); -// fi.setIndex(index); -// -// return fi; -// } -//} +package com.schisandra.share.application.sensitive; + + +import com.mybatisflex.core.util.CollectionUtil; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * 敏感词过滤器 + * + * @author minghu.zhang + */ +@SuppressWarnings("rawtypes") +public class WordFilter { + + /** + * 敏感词表 + */ + private final Map wordMap; + + /** + * 构造函数 + */ + public WordFilter(WordContext context) { + this.wordMap = context.getWordMap(); + } + + /** + * 替换敏感词 + * + * @param text 输入文本 + */ + public String replace(final String text) { + return replace(text, 0, '*'); + } + + /** + * 替换敏感词 + * + * @param text 输入文本 + * @param symbol 替换符号 + */ + public String replace(final String text, final char symbol) { + return replace(text, 0, symbol); + } + + /** + * 替换敏感词 + * + * @param text 输入文本 + * @param skip 文本距离 + * @param symbol 替换符号 + */ + public String replace(final String text, final int skip, final char symbol) { + char[] charset = text.toCharArray(); + for (int i = 0; i < charset.length; i++) { + FlagIndex fi = getFlagIndex(charset, i, skip); + if (fi.isFlag()) { + if (!fi.isWhiteWord()) { + for (int j : fi.getIndex()) { + charset[j] = symbol; + } + } else { + i += fi.getIndex().size() - 1; + } + } + } + return new String(charset); + } + + /** + * 是否包含敏感词 + * + * @param text 输入文本 + */ + public boolean include(final String text) { + return include(text, 0); + } + + /** + * 是否包含敏感词 + * + * @param text 输入文本 + * @param skip 文本距离 + */ + public boolean include(final String text, final int skip) { + boolean include = false; + char[] charset = text.toCharArray(); + for (int i = 0; i < charset.length; i++) { + FlagIndex fi = getFlagIndex(charset, i, skip); + if (fi.isFlag()) { + if (fi.isWhiteWord()) { + i += fi.getIndex().size() - 1; + } else { + include = true; + break; + } + } + } + return include; + } + + /** + * 获取敏感词数量 + * + * @param text 输入文本 + */ + public int wordCount(final String text) { + return wordCount(text, 0); + } + + /** + * 获取敏感词数量 + * + * @param text 输入文本 + * @param skip 文本距离 + */ + public int wordCount(final String text, final int skip) { + int count = 0; + char[] charset = text.toCharArray(); + for (int i = 0; i < charset.length; i++) { + FlagIndex fi = getFlagIndex(charset, i, skip); + if (fi.isFlag()) { + if (fi.isWhiteWord()) { + i += fi.getIndex().size() - 1; + } else { + count++; + } + } + } + return count; + } + + public void check(final String text) { + List wordList = wordList(text); + if (CollectionUtil.isNotEmpty(wordList)) { + throw new IllegalArgumentException(String.format("内容包含敏感词 【%s】", String.join("、", wordList))); + } + } + + /** + * 获取敏感词列表 + * + * @param text 输入文本 + */ + public List wordList(final String text) { + return wordList(text, 0); + } + + /** + * 获取敏感词列表 + * + * @param text 输入文本 + * @param skip 文本距离 + */ + public List wordList(final String text, final int skip) { + List wordList = new ArrayList<>(); + char[] charset = text.toCharArray(); + for (int i = 0; i < charset.length; i++) { + FlagIndex fi = getFlagIndex(charset, i, skip); + if (fi.isFlag()) { + if (fi.isWhiteWord()) { + i += fi.getIndex().size() - 1; + } else { + StringBuilder builder = new StringBuilder(); + for (int j : fi.getIndex()) { + char word = text.charAt(j); + builder.append(word); + } + wordList.add(builder.toString()); + } + } + } + return wordList; + } + + /** + * 获取标记索引 + * + * @param charset 输入文本 + * @param begin 检测起始 + * @param skip 文本距离 + */ + private FlagIndex getFlagIndex(final char[] charset, final int begin, final int skip) { + FlagIndex fi = new FlagIndex(); + + Map current = wordMap; + boolean flag = false; + int count = 0; + List index = new ArrayList<>(); + for (int i = begin; i < charset.length; i++) { + char word = charset[i]; + Map mapTree = (Map) current.get(word); + if (count > skip || (i == begin && Objects.isNull(mapTree))) { + break; + } + if (Objects.nonNull(mapTree)) { + current = mapTree; + count = 0; + index.add(i); + } else { + count++; + if (flag && count > skip) { + break; + } + } + if ("1".equals(current.get("isEnd"))) { + flag = true; + } + if ("1".equals(current.get("isWhiteWord"))) { + fi.setWhiteWord(true); + break; + } + } + + fi.setFlag(flag); + fi.setIndex(index); + + return fi; + } +}