diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/algorithm/ac/ACTrie.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/algorithm/ac/ACTrie.java index f88b830..523e4b2 100644 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/algorithm/ac/ACTrie.java +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/algorithm/ac/ACTrie.java @@ -2,6 +2,7 @@ package com.abin.mallchat.common.common.algorithm.ac; import com.google.common.collect.Lists; +import javax.annotation.concurrent.NotThreadSafe; import java.util.*; import java.util.stream.Collectors; @@ -9,6 +10,7 @@ import java.util.stream.Collectors; * aho-corasick算法(又称AC自动机算法) * Created by berg on 2023/6/18. */ +@NotThreadSafe public class ACTrie { // 根节点 @@ -20,7 +22,7 @@ public class ACTrie { for (String word : words) { addWord(word); } - initTrieFailover(); + initFailover(); } public void addWord(String word) { @@ -34,11 +36,14 @@ public class ACTrie { walkNode.setLeaf(true); } - public void initTrieFailover() { + /** + * 初始化节点中的回退指针 + */ + private void initFailover() { //第一层的fail指针指向root Queue queue = new LinkedList<>(); - Map childrens = root.getChildren(); - for (ACTrieNode node : childrens.values()) { + Map children = root.getChildren(); + for (ACTrieNode node : children.values()) { node.setFailover(root); queue.offer(node); } diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/SensitiveWordUtils0.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/SensitiveWordUtils0.java new file mode 100644 index 0000000..f67c694 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/SensitiveWordUtils0.java @@ -0,0 +1,70 @@ +package com.abin.mallchat.common.common.utils; + +import com.abin.mallchat.common.common.algorithm.ac.ACTrie; +import com.abin.mallchat.common.common.algorithm.ac.MatchResult; +import org.HdrHistogram.ConcurrentHistogram; +import org.apache.commons.lang3.StringUtils; + +import java.util.List; +import java.util.Objects; + +/** + * 基于ac自动机实现的敏感词过滤工具类 + * 可以用来替代{@link ConcurrentHistogram} + * 为了兼容提供了相同的api接口 {@code hasSensitiveWord} + * + * Created by berg on 2023/6/18. + */ +public class SensitiveWordUtils0 { + + private final static char mask_char = '*'; // 替代字符 + + private static ACTrie ac_trie = null; + + /** + * 有敏感词 + * + * @param text 文本 + * @return boolean + */ + public static boolean hasSensitiveWord(String text) { + if (StringUtils.isBlank(text)) return false; + return !Objects.equals(filter(text), text); + } + + /** + * 敏感词替换 + * + * @param text 待替换文本 + * @return 替换后的文本 + */ + public static String filter(String text) { + if (StringUtils.isBlank(text)) return text; + List matchResults = ac_trie.matches(text); + StringBuffer result = new StringBuffer(text); + // matchResults是按照startIndex排序的,因此可以通过不断更新endIndex最大值的方式算出尚未被替代部分 + int endIndex = 0; + for (MatchResult matchResult : matchResults) { + endIndex = Math.max(endIndex, matchResult.getEndIndex()); + replaceBetween(result, matchResult.getStartIndex(), endIndex); + } + return result.toString(); + } + + private static void replaceBetween(StringBuffer buffer, int startIndex, int endIndex) { + for (int i = startIndex; i < endIndex; i++) { + buffer.setCharAt(i, mask_char); + } + } + + /** + * 加载敏感词列表 + * + * @param words 敏感词数组 + */ + public static void loadWord(List words) { + if (words == null) return; + ac_trie = new ACTrie(words); + } + +}