diff --git a/schisandra-cloud-storage-auth/schisandra-cloud-storage-auth-common/pom.xml b/schisandra-cloud-storage-auth/schisandra-cloud-storage-auth-common/pom.xml index 99d758a..6050468 100644 --- a/schisandra-cloud-storage-auth/schisandra-cloud-storage-auth-common/pom.xml +++ b/schisandra-cloud-storage-auth/schisandra-cloud-storage-auth-common/pom.xml @@ -112,7 +112,7 @@ com.github.ben-manes.caffeine caffeine - 3.1.8 + 2.9.3 diff --git a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/EndType.java b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/EndType.java new file mode 100644 index 0000000..904eb2e --- /dev/null +++ b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/EndType.java @@ -0,0 +1,14 @@ +package com.schisandra.share.application.sensitive; + +/** + * 结束类型定义 + * + * @date 11:37 2020/11/11 + **/ +public enum EndType { + + /** + * 有下一个,结束 + */ + HAS_NEXT, IS_END +} diff --git a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/FlagIndex.java b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/FlagIndex.java new file mode 100644 index 0000000..09e8cbc --- /dev/null +++ b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/FlagIndex.java @@ -0,0 +1,47 @@ +package com.schisandra.share.application.sensitive; + +import java.util.List; + +/** + * 敏感词标记 + * + */ +public class FlagIndex { + + /** + * 标记结果 + */ + private boolean flag; + /** + * 是否黑名单词汇 + */ + private boolean isWhiteWord; + /** + * 标记索引 + */ + private List index; + + public boolean isFlag() { + return flag; + } + + public void setFlag(boolean flag) { + this.flag = flag; + } + + public List getIndex() { + return index; + } + + public void setIndex(List index) { + this.index = index; + } + + public boolean isWhiteWord() { + return isWhiteWord; + } + + public void setWhiteWord(boolean whiteWord) { + isWhiteWord = whiteWord; + } +} diff --git a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordContext.java b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordContext.java new file mode 100644 index 0000000..8c47988 --- /dev/null +++ b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordContext.java @@ -0,0 +1,257 @@ +//package com.schisandra.share.application.sensitive; +// +//import com.alibaba.fastjson.JSON; +// +//import com.schisandra.share.common.enums.IsDeletedFlagEnum; +//import lombok.extern.slf4j.Slf4j; +//import org.springframework.util.CollectionUtils; +// +//import java.util.*; +//import java.util.concurrent.Executors; +//import java.util.concurrent.ScheduledExecutorService; +//import java.util.concurrent.TimeUnit; +// +///** +// * 词库上下文环境 +// *

+// * 初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型 +// * +// * @author minghu.zhang +// */ +//@SuppressWarnings({"rawtypes", "unchecked"}) +//@Slf4j +//public class WordContext { +// +// /** +// * 敏感词字典 +// */ +// private final Map wordMap = new HashMap(1024); +// +// /** +// * 是否已初始化 +// */ +// private boolean init; +// +// private long addLastId; +// +// public WordContext(boolean autoLoad, SensitiveWordsService service) { +// clearDelData(service); +// Set black = new HashSet<>(); +// Set white = new HashSet<>(); +// List list = service.list(Wrappers.lambdaQuery().eq(SensitiveWords::getIsDeleted, IsDeletedFlagEnum.UN_DELETED.getCode())); +// for (SensitiveWords words : list) { +// if (words.getType() == 1) { +// black.add(words.getWords()); +// } else { +// white.add(words.getWords()); +// } +// } +// if (CollectionUtils.isNotEmpty(list)) { +// this.addLastId = list.get(list.size() - 1).getId(); +// } +// initKeyWord(black, white); +// if (autoLoad) { +// reloadWord(service); +// } +// } +// +// private void clearDelData(SensitiveWordsService service) { +// LambdaUpdateWrapper remove = Wrappers.lambdaUpdate().eq(SensitiveWords::getIsDeleted, IsDeletedFlagEnum.DELETED.getCode()); +// service.remove(remove); +// } +// +// private void reloadWord(SensitiveWordsService service) { +// +// // 创建一个单线程的定时线程池 +// ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(); +// // 创建一个Runnable任务 +// Runnable task = () -> { +// try { +// addNewWords(service); +// removeDelWords(service); +// } catch (Exception e) { +// log.error("Sensitive words task error", e); +// } +// }; +// // 定时执行任务,初始延迟0,之后每分钟执行一次 +// scheduler.scheduleAtFixedRate(task, 0, 1, TimeUnit.MINUTES); +// +// } +// +// private void removeDelWords(SensitiveWordsService service) { +// LambdaUpdateWrapper query = Wrappers.lambdaUpdate() +// .eq(SensitiveWords::getIsDeleted, IsDeletedFlagEnum.DELETED.getCode()); +// List list = service.list(query); +// if (CollectionUtils.isNotEmpty(list)) { +// log.info("removeDelWords {}", JSON.toJSON(list)); +// Set black = new HashSet<>(); +// Set white = new HashSet<>(); +// for (SensitiveWords words : list) { +// if (words.getType() == 1) { +// black.add(words.getWords()); +// } else { +// white.add(words.getWords()); +// } +// } +// removeWord(black, WordType.BLACK); +// removeWord(white, WordType.WHITE); +// } +// } +// +// private void addNewWords(SensitiveWordsService service) { +// LambdaUpdateWrapper query = Wrappers.lambdaUpdate() +// .gt(SensitiveWords::getId, addLastId) +// .eq(SensitiveWords::getIsDeleted, IsDeletedFlagEnum.UN_DELETED.getCode()); +// List list = service.list(query); +// if (CollectionUtils.isNotEmpty(list)) { +// log.info("addNewWords {}", JSON.toJSON(list)); +// this.addLastId = list.get(list.size() - 1).getId(); +// Set black = new HashSet<>(); +// Set white = new HashSet<>(); +// for (SensitiveWords words : list) { +// if (words.getType() == 1) { +// black.add(words.getWords()); +// } else { +// white.add(words.getWords()); +// } +// } +// addWord(black, WordType.BLACK); +// addWord(white, WordType.WHITE); +// } +// } +// +// /** +// * 获取初始化的敏感词列表 +// * +// * @return 敏感词列表 +// */ +// public Map getWordMap() { +// return wordMap; +// } +// +// /** +// * 初始化 +// */ +// private synchronized void initKeyWord(Set black, Set white) { +// try { +// if (!init) { +// // 将敏感词库加入到HashMap中 +// addWord(black, WordType.BLACK); +// // 将非敏感词库也加入到HashMap中 +// addWord(white, WordType.WHITE); +// } +// init = true; +// } catch (Exception e) { +// throw new RuntimeException(e); +// } +// } +// +// /** +// * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
+// * 中 = { isEnd = 0 国 = {
+// * isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd = 1 } +// * } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1 } } } } +// */ +// public void addWord(Collection wordList, WordType wordType) { +// if (CollectionUtils.isEmpty(wordList)) { +// return; +// } +// Map nowMap; +// Map newWorMap; +// // 迭代keyWordSet +// for (String key : wordList) { +// nowMap = wordMap; +// for (int i = 0; i < key.length(); i++) { +// // 转换成char型 +// char keyChar = key.charAt(i); +// // 获取 +// Object wordMap = nowMap.get(keyChar); +// // 如果存在该key,直接赋值 +// if (wordMap != null) { +// nowMap = (Map) wordMap; +// } else { +// // 不存在则构建一个map,同时将isEnd设置为0,因为他不是最后一个 +// newWorMap = new HashMap<>(4); +// // 不是最后一个 +// newWorMap.put("isEnd", String.valueOf(EndType.HAS_NEXT.ordinal())); +// nowMap.put(keyChar, newWorMap); +// nowMap = newWorMap; +// } +// +// if (i == key.length() - 1) { +// // 最后一个 +// nowMap.put("isEnd", String.valueOf(EndType.IS_END.ordinal())); +// nowMap.put("isWhiteWord", String.valueOf(wordType.ordinal())); +// } +// } +// } +// } +// +// /** +// * 在线删除敏感词 +// * +// * @param wordList 敏感词列表 +// * @param wordType 黑名单 BLACk,白名单WHITE +// */ +// public void removeWord(Collection wordList, WordType wordType) { +// if (CollectionUtils.isEmpty(wordList)) { +// return; +// } +// Map nowMap; +// for (String key : wordList) { +// List cacheList = new ArrayList<>(); +// nowMap = wordMap; +// for (int i = 0; i < key.length(); i++) { +// char keyChar = key.charAt(i); +// +// Object map = nowMap.get(keyChar); +// if (map != null) { +// nowMap = (Map) map; +// cacheList.add(nowMap); +// } else { +// return; +// } +// +// if (i == key.length() - 1) { +// char[] keys = key.toCharArray(); +// boolean cleanable = false; +// char lastChar = 0; +// for (int j = cacheList.size() - 1; j >= 0; j--) { +// Map cacheMap = cacheList.get(j); +// if (j == cacheList.size() - 1) { +// if (String.valueOf(WordType.BLACK.ordinal()).equals(cacheMap.get("isWhiteWord"))) { +// if (wordType == WordType.WHITE) { +// return; +// } +// } +// if (String.valueOf(WordType.WHITE.ordinal()).equals(cacheMap.get("isWhiteWord"))) { +// if (wordType == WordType.BLACK) { +// return; +// } +// } +// cacheMap.remove("isWhiteWord"); +// cacheMap.remove("isEnd"); +// if (cacheMap.size() == 0) { +// cleanable = true; +// continue; +// } +// } +// if (cleanable) { +// Object isEnd = cacheMap.get("isEnd"); +// if (String.valueOf(EndType.IS_END.ordinal()).equals(isEnd)) { +// cleanable = false; +// } +// cacheMap.remove(lastChar); +// } +// lastChar = keys[j]; +// } +// +// if (cleanable) { +// wordMap.remove(lastChar); +// } +// } +// } +// } +// } +// +//} diff --git a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordFilter.java b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordFilter.java new file mode 100644 index 0000000..a601481 --- /dev/null +++ b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordFilter.java @@ -0,0 +1,222 @@ +//package com.schisandra.share.application.sensitive; +// +// +//import java.util.ArrayList; +//import java.util.List; +//import java.util.Map; +//import java.util.Objects; +// +///** +// * 敏感词过滤器 +// * +// * @author minghu.zhang +// */ +//@SuppressWarnings("rawtypes") +//public class WordFilter { +// +// /** +// * 敏感词表 +// */ +// private final Map wordMap; +// +// /** +// * 构造函数 +// */ +// public WordFilter(WordContext context) { +// this.wordMap = context.getWordMap(); +// } +// +// /** +// * 替换敏感词 +// * +// * @param text 输入文本 +// */ +// public String replace(final String text) { +// return replace(text, 0, '*'); +// } +// +// /** +// * 替换敏感词 +// * +// * @param text 输入文本 +// * @param symbol 替换符号 +// */ +// public String replace(final String text, final char symbol) { +// return replace(text, 0, symbol); +// } +// +// /** +// * 替换敏感词 +// * +// * @param text 输入文本 +// * @param skip 文本距离 +// * @param symbol 替换符号 +// */ +// public String replace(final String text, final int skip, final char symbol) { +// char[] charset = text.toCharArray(); +// for (int i = 0; i < charset.length; i++) { +// FlagIndex fi = getFlagIndex(charset, i, skip); +// if (fi.isFlag()) { +// if (!fi.isWhiteWord()) { +// for (int j : fi.getIndex()) { +// charset[j] = symbol; +// } +// } else { +// i += fi.getIndex().size() - 1; +// } +// } +// } +// return new String(charset); +// } +// +// /** +// * 是否包含敏感词 +// * +// * @param text 输入文本 +// */ +// public boolean include(final String text) { +// return include(text, 0); +// } +// +// /** +// * 是否包含敏感词 +// * +// * @param text 输入文本 +// * @param skip 文本距离 +// */ +// public boolean include(final String text, final int skip) { +// boolean include = false; +// char[] charset = text.toCharArray(); +// for (int i = 0; i < charset.length; i++) { +// FlagIndex fi = getFlagIndex(charset, i, skip); +// if (fi.isFlag()) { +// if (fi.isWhiteWord()) { +// i += fi.getIndex().size() - 1; +// } else { +// include = true; +// break; +// } +// } +// } +// return include; +// } +// +// /** +// * 获取敏感词数量 +// * +// * @param text 输入文本 +// */ +// public int wordCount(final String text) { +// return wordCount(text, 0); +// } +// +// /** +// * 获取敏感词数量 +// * +// * @param text 输入文本 +// * @param skip 文本距离 +// */ +// public int wordCount(final String text, final int skip) { +// int count = 0; +// char[] charset = text.toCharArray(); +// for (int i = 0; i < charset.length; i++) { +// FlagIndex fi = getFlagIndex(charset, i, skip); +// if (fi.isFlag()) { +// if (fi.isWhiteWord()) { +// i += fi.getIndex().size() - 1; +// } else { +// count++; +// } +// } +// } +// return count; +// } +// +// public void check(final String text) { +// List wordList = wordList(text); +// if (CollectionUtils.isNotEmpty(wordList)) { +// throw new IllegalArgumentException(String.format("内容包含敏感词 【%s】", String.join("、", wordList))); +// } +// } +// +// /** +// * 获取敏感词列表 +// * +// * @param text 输入文本 +// */ +// public List wordList(final String text) { +// return wordList(text, 0); +// } +// +// /** +// * 获取敏感词列表 +// * +// * @param text 输入文本 +// * @param skip 文本距离 +// */ +// public List wordList(final String text, final int skip) { +// List wordList = new ArrayList<>(); +// char[] charset = text.toCharArray(); +// for (int i = 0; i < charset.length; i++) { +// FlagIndex fi = getFlagIndex(charset, i, skip); +// if (fi.isFlag()) { +// if (fi.isWhiteWord()) { +// i += fi.getIndex().size() - 1; +// } else { +// StringBuilder builder = new StringBuilder(); +// for (int j : fi.getIndex()) { +// char word = text.charAt(j); +// builder.append(word); +// } +// wordList.add(builder.toString()); +// } +// } +// } +// return wordList; +// } +// +// /** +// * 获取标记索引 +// * +// * @param charset 输入文本 +// * @param begin 检测起始 +// * @param skip 文本距离 +// */ +// private FlagIndex getFlagIndex(final char[] charset, final int begin, final int skip) { +// FlagIndex fi = new FlagIndex(); +// +// Map current = wordMap; +// boolean flag = false; +// int count = 0; +// List index = new ArrayList<>(); +// for (int i = begin; i < charset.length; i++) { +// char word = charset[i]; +// Map mapTree = (Map) current.get(word); +// if (count > skip || (i == begin && Objects.isNull(mapTree))) { +// break; +// } +// if (Objects.nonNull(mapTree)) { +// current = mapTree; +// count = 0; +// index.add(i); +// } else { +// count++; +// if (flag && count > skip) { +// break; +// } +// } +// if ("1".equals(current.get("isEnd"))) { +// flag = true; +// } +// if ("1".equals(current.get("isWhiteWord"))) { +// fi.setWhiteWord(true); +// break; +// } +// } +// +// fi.setFlag(flag); +// fi.setIndex(index); +// +// return fi; +// } +//} diff --git a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordType.java b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordType.java new file mode 100644 index 0000000..13d5637 --- /dev/null +++ b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-application/schisandra-cloud-storage-share-application-controller/src/main/java/com/schisandra/share/application/sensitive/WordType.java @@ -0,0 +1,15 @@ +package com.schisandra.share.application.sensitive; + +/** + * 词汇类型 + * + * @author minghu.zhang + * @date 11:37 2020/11/11 + **/ +public enum WordType { + + /** + * 黑名单/白名单 + */ + BLACK, WHITE +} diff --git a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-infra/src/main/resources/mapper/SchisandraShareTagsDao.xml b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-infra/src/main/resources/mapper/SchisandraShareTagsDao.xml index b1cc36b..bee008a 100644 --- a/schisandra-cloud-storage-share/schisandra-cloud-storage-share-infra/src/main/resources/mapper/SchisandraShareTagsDao.xml +++ b/schisandra-cloud-storage-share/schisandra-cloud-storage-share-infra/src/main/resources/mapper/SchisandraShareTagsDao.xml @@ -5,6 +5,7 @@ + diff --git a/schisandra-cloud-storage-system/schisandra-cloud-storage-system-common/pom.xml b/schisandra-cloud-storage-system/schisandra-cloud-storage-system-common/pom.xml index a5aafed..dbe771d 100644 --- a/schisandra-cloud-storage-system/schisandra-cloud-storage-system-common/pom.xml +++ b/schisandra-cloud-storage-system/schisandra-cloud-storage-system-common/pom.xml @@ -100,7 +100,7 @@ com.github.ben-manes.caffeine caffeine - 3.1.8 + 2.9.3