mirror of
https://github.com/zongzibinbin/MallChat.git
synced 2026-03-13 21:53:41 +08:00
feat;resolve conflicts
This commit is contained in:
@@ -2,6 +2,7 @@ package com.abin.mallchat.common.common.algorithm.ac;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import javax.annotation.concurrent.NotThreadSafe;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@@ -9,6 +10,7 @@ import java.util.stream.Collectors;
|
||||
* aho-corasick算法(又称AC自动机算法)
|
||||
* Created by berg on 2023/6/18.
|
||||
*/
|
||||
@NotThreadSafe
|
||||
public class ACTrie {
|
||||
|
||||
// 根节点
|
||||
@@ -20,7 +22,7 @@ public class ACTrie {
|
||||
for (String word : words) {
|
||||
addWord(word);
|
||||
}
|
||||
initTrieFailover();
|
||||
initFailover();
|
||||
}
|
||||
|
||||
public void addWord(String word) {
|
||||
@@ -34,11 +36,14 @@ public class ACTrie {
|
||||
walkNode.setLeaf(true);
|
||||
}
|
||||
|
||||
public void initTrieFailover() {
|
||||
/**
|
||||
* 初始化节点中的回退指针
|
||||
*/
|
||||
private void initFailover() {
|
||||
//第一层的fail指针指向root
|
||||
Queue<ACTrieNode> queue = new LinkedList<>();
|
||||
Map<Character, ACTrieNode> childrens = root.getChildren();
|
||||
for (ACTrieNode node : childrens.values()) {
|
||||
Map<Character, ACTrieNode> children = root.getChildren();
|
||||
for (ACTrieNode node : children.values()) {
|
||||
node.setFailover(root);
|
||||
queue.offer(node);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
package com.abin.mallchat.common.common.utils;
|
||||
|
||||
import com.abin.mallchat.common.common.algorithm.ac.ACTrie;
|
||||
import com.abin.mallchat.common.common.algorithm.ac.MatchResult;
|
||||
import org.HdrHistogram.ConcurrentHistogram;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* 基于ac自动机实现的敏感词过滤工具类
|
||||
* 可以用来替代{@link ConcurrentHistogram}
|
||||
* 为了兼容提供了相同的api接口 {@code hasSensitiveWord}
|
||||
*
|
||||
* Created by berg on 2023/6/18.
|
||||
*/
|
||||
public class SensitiveWordUtils0 {
|
||||
|
||||
private final static char mask_char = '*'; // 替代字符
|
||||
|
||||
private static ACTrie ac_trie = null;
|
||||
|
||||
/**
|
||||
* 有敏感词
|
||||
*
|
||||
* @param text 文本
|
||||
* @return boolean
|
||||
*/
|
||||
public static boolean hasSensitiveWord(String text) {
|
||||
if (StringUtils.isBlank(text)) return false;
|
||||
return !Objects.equals(filter(text), text);
|
||||
}
|
||||
|
||||
/**
|
||||
* 敏感词替换
|
||||
*
|
||||
* @param text 待替换文本
|
||||
* @return 替换后的文本
|
||||
*/
|
||||
public static String filter(String text) {
|
||||
if (StringUtils.isBlank(text)) return text;
|
||||
List<MatchResult> matchResults = ac_trie.matches(text);
|
||||
StringBuffer result = new StringBuffer(text);
|
||||
// matchResults是按照startIndex排序的,因此可以通过不断更新endIndex最大值的方式算出尚未被替代部分
|
||||
int endIndex = 0;
|
||||
for (MatchResult matchResult : matchResults) {
|
||||
endIndex = Math.max(endIndex, matchResult.getEndIndex());
|
||||
replaceBetween(result, matchResult.getStartIndex(), endIndex);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private static void replaceBetween(StringBuffer buffer, int startIndex, int endIndex) {
|
||||
for (int i = startIndex; i < endIndex; i++) {
|
||||
buffer.setCharAt(i, mask_char);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 加载敏感词列表
|
||||
*
|
||||
* @param words 敏感词数组
|
||||
*/
|
||||
public static void loadWord(List<String> words) {
|
||||
if (words == null) return;
|
||||
ac_trie = new ACTrie(words);
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user