mirror of
https://github.com/zongzibinbin/MallChat.git
synced 2026-03-13 21:53:41 +08:00
feat;resolve conflicts
This commit is contained in:
@@ -2,6 +2,7 @@ package com.abin.mallchat.common.common.algorithm.ac;
|
|||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
|
import javax.annotation.concurrent.NotThreadSafe;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@@ -9,6 +10,7 @@ import java.util.stream.Collectors;
|
|||||||
* aho-corasick算法(又称AC自动机算法)
|
* aho-corasick算法(又称AC自动机算法)
|
||||||
* Created by berg on 2023/6/18.
|
* Created by berg on 2023/6/18.
|
||||||
*/
|
*/
|
||||||
|
@NotThreadSafe
|
||||||
public class ACTrie {
|
public class ACTrie {
|
||||||
|
|
||||||
// 根节点
|
// 根节点
|
||||||
@@ -20,7 +22,7 @@ public class ACTrie {
|
|||||||
for (String word : words) {
|
for (String word : words) {
|
||||||
addWord(word);
|
addWord(word);
|
||||||
}
|
}
|
||||||
initTrieFailover();
|
initFailover();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addWord(String word) {
|
public void addWord(String word) {
|
||||||
@@ -34,11 +36,14 @@ public class ACTrie {
|
|||||||
walkNode.setLeaf(true);
|
walkNode.setLeaf(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initTrieFailover() {
|
/**
|
||||||
|
* 初始化节点中的回退指针
|
||||||
|
*/
|
||||||
|
private void initFailover() {
|
||||||
//第一层的fail指针指向root
|
//第一层的fail指针指向root
|
||||||
Queue<ACTrieNode> queue = new LinkedList<>();
|
Queue<ACTrieNode> queue = new LinkedList<>();
|
||||||
Map<Character, ACTrieNode> childrens = root.getChildren();
|
Map<Character, ACTrieNode> children = root.getChildren();
|
||||||
for (ACTrieNode node : childrens.values()) {
|
for (ACTrieNode node : children.values()) {
|
||||||
node.setFailover(root);
|
node.setFailover(root);
|
||||||
queue.offer(node);
|
queue.offer(node);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
package com.abin.mallchat.common.common.utils;
|
||||||
|
|
||||||
|
import com.abin.mallchat.common.common.algorithm.ac.ACTrie;
|
||||||
|
import com.abin.mallchat.common.common.algorithm.ac.MatchResult;
|
||||||
|
import org.HdrHistogram.ConcurrentHistogram;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 基于ac自动机实现的敏感词过滤工具类
|
||||||
|
* 可以用来替代{@link ConcurrentHistogram}
|
||||||
|
* 为了兼容提供了相同的api接口 {@code hasSensitiveWord}
|
||||||
|
*
|
||||||
|
* Created by berg on 2023/6/18.
|
||||||
|
*/
|
||||||
|
public class SensitiveWordUtils0 {
|
||||||
|
|
||||||
|
private final static char mask_char = '*'; // 替代字符
|
||||||
|
|
||||||
|
private static ACTrie ac_trie = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 有敏感词
|
||||||
|
*
|
||||||
|
* @param text 文本
|
||||||
|
* @return boolean
|
||||||
|
*/
|
||||||
|
public static boolean hasSensitiveWord(String text) {
|
||||||
|
if (StringUtils.isBlank(text)) return false;
|
||||||
|
return !Objects.equals(filter(text), text);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 敏感词替换
|
||||||
|
*
|
||||||
|
* @param text 待替换文本
|
||||||
|
* @return 替换后的文本
|
||||||
|
*/
|
||||||
|
public static String filter(String text) {
|
||||||
|
if (StringUtils.isBlank(text)) return text;
|
||||||
|
List<MatchResult> matchResults = ac_trie.matches(text);
|
||||||
|
StringBuffer result = new StringBuffer(text);
|
||||||
|
// matchResults是按照startIndex排序的,因此可以通过不断更新endIndex最大值的方式算出尚未被替代部分
|
||||||
|
int endIndex = 0;
|
||||||
|
for (MatchResult matchResult : matchResults) {
|
||||||
|
endIndex = Math.max(endIndex, matchResult.getEndIndex());
|
||||||
|
replaceBetween(result, matchResult.getStartIndex(), endIndex);
|
||||||
|
}
|
||||||
|
return result.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void replaceBetween(StringBuffer buffer, int startIndex, int endIndex) {
|
||||||
|
for (int i = startIndex; i < endIndex; i++) {
|
||||||
|
buffer.setCharAt(i, mask_char);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 加载敏感词列表
|
||||||
|
*
|
||||||
|
* @param words 敏感词数组
|
||||||
|
*/
|
||||||
|
public static void loadWord(List<String> words) {
|
||||||
|
if (words == null) return;
|
||||||
|
ac_trie = new ACTrie(words);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user