feat;resolve conflicts

This commit is contained in:
xiaocairush
2023-06-18 21:28:02 +08:00
parent 3943f44d19
commit 28116878d1
2 changed files with 79 additions and 4 deletions

View File

@@ -2,6 +2,7 @@ package com.abin.mallchat.common.common.algorithm.ac;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import javax.annotation.concurrent.NotThreadSafe;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@@ -9,6 +10,7 @@ import java.util.stream.Collectors;
* aho-corasick算法又称AC自动机算法 * aho-corasick算法又称AC自动机算法
* Created by berg on 2023/6/18. * Created by berg on 2023/6/18.
*/ */
@NotThreadSafe
public class ACTrie { public class ACTrie {
// 根节点 // 根节点
@@ -20,7 +22,7 @@ public class ACTrie {
for (String word : words) { for (String word : words) {
addWord(word); addWord(word);
} }
initTrieFailover(); initFailover();
} }
public void addWord(String word) { public void addWord(String word) {
@@ -34,11 +36,14 @@ public class ACTrie {
walkNode.setLeaf(true); walkNode.setLeaf(true);
} }
public void initTrieFailover() { /**
* 初始化节点中的回退指针
*/
private void initFailover() {
//第一层的fail指针指向root //第一层的fail指针指向root
Queue<ACTrieNode> queue = new LinkedList<>(); Queue<ACTrieNode> queue = new LinkedList<>();
Map<Character, ACTrieNode> childrens = root.getChildren(); Map<Character, ACTrieNode> children = root.getChildren();
for (ACTrieNode node : childrens.values()) { for (ACTrieNode node : children.values()) {
node.setFailover(root); node.setFailover(root);
queue.offer(node); queue.offer(node);
} }

View File

@@ -0,0 +1,70 @@
package com.abin.mallchat.common.common.utils;
import com.abin.mallchat.common.common.algorithm.ac.ACTrie;
import com.abin.mallchat.common.common.algorithm.ac.MatchResult;
import org.HdrHistogram.ConcurrentHistogram;
import org.apache.commons.lang3.StringUtils;
import java.util.List;
import java.util.Objects;
/**
* 基于ac自动机实现的敏感词过滤工具类
* 可以用来替代{@link ConcurrentHistogram}
* 为了兼容提供了相同的api接口 {@code hasSensitiveWord}
*
* Created by berg on 2023/6/18.
*/
public class SensitiveWordUtils0 {
private final static char mask_char = '*'; // 替代字符
private static ACTrie ac_trie = null;
/**
* 有敏感词
*
* @param text 文本
* @return boolean
*/
public static boolean hasSensitiveWord(String text) {
if (StringUtils.isBlank(text)) return false;
return !Objects.equals(filter(text), text);
}
/**
* 敏感词替换
*
* @param text 待替换文本
* @return 替换后的文本
*/
public static String filter(String text) {
if (StringUtils.isBlank(text)) return text;
List<MatchResult> matchResults = ac_trie.matches(text);
StringBuffer result = new StringBuffer(text);
// matchResults是按照startIndex排序的因此可以通过不断更新endIndex最大值的方式算出尚未被替代部分
int endIndex = 0;
for (MatchResult matchResult : matchResults) {
endIndex = Math.max(endIndex, matchResult.getEndIndex());
replaceBetween(result, matchResult.getStartIndex(), endIndex);
}
return result.toString();
}
private static void replaceBetween(StringBuffer buffer, int startIndex, int endIndex) {
for (int i = startIndex; i < endIndex; i++) {
buffer.setCharAt(i, mask_char);
}
}
/**
* 加载敏感词列表
*
* @param words 敏感词数组
*/
public static void loadWord(List<String> words) {
if (words == null) return;
ac_trie = new ACTrie(words);
}
}