mirror of
https://github.com/zongzibinbin/MallChat.git
synced 2026-03-13 21:53:41 +08:00
fix:优化AC自动机
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
package com.abin.mallchat.common.common.algorithm.sensitiveWord;
|
package com.abin.mallchat.common.common.algorithm.sensitiveWord;
|
||||||
|
|
||||||
import com.abin.mallchat.common.common.algorithm.ac.ACTrie;
|
import com.abin.mallchat.common.common.algorithm.sensitiveWord.ac.ACTrie;
|
||||||
import com.abin.mallchat.common.common.algorithm.ac.MatchResult;
|
import com.abin.mallchat.common.common.algorithm.sensitiveWord.ac.MatchResult;
|
||||||
import org.HdrHistogram.ConcurrentHistogram;
|
import org.HdrHistogram.ConcurrentHistogram;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package com.abin.mallchat.common.common.algorithm.sensitiveWord;
|
package com.abin.mallchat.common.common.algorithm.sensitiveWord;
|
||||||
|
|
||||||
import com.abin.mallchat.common.common.algorithm.ac.ACTrie;
|
import com.abin.mallchat.common.common.algorithm.sensitiveWord.acpro.ACProTrie;
|
||||||
import com.abin.mallchat.common.common.algorithm.acpro.ACProTrie;
|
|
||||||
import io.micrometer.core.instrument.util.StringUtils;
|
import io.micrometer.core.instrument.util.StringUtils;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -13,6 +12,7 @@ import java.util.Objects;
|
|||||||
*@description: 基于ACFilter的优化增强版本
|
*@description: 基于ACFilter的优化增强版本
|
||||||
*/
|
*/
|
||||||
public class ACProFilter implements SensitiveWordFilter{
|
public class ACProFilter implements SensitiveWordFilter{
|
||||||
|
|
||||||
private ACProTrie acProTrie;
|
private ACProTrie acProTrie;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.abin.mallchat.common.common.algorithm.ac;
|
package com.abin.mallchat.common.common.algorithm.sensitiveWord.ac;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.abin.mallchat.common.common.algorithm.ac;
|
package com.abin.mallchat.common.common.algorithm.sensitiveWord.ac;
|
||||||
|
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.abin.mallchat.common.common.algorithm.ac;
|
package com.abin.mallchat.common.common.algorithm.sensitiveWord.ac;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.abin.mallchat.common.common.algorithm.acpro;
|
package com.abin.mallchat.common.common.algorithm.sensitiveWord.acpro;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
@@ -87,52 +87,42 @@ public class ACProTrie {
|
|||||||
// 匹配
|
// 匹配
|
||||||
public String match(String matchWord)
|
public String match(String matchWord)
|
||||||
{
|
{
|
||||||
Word walkNode=root;
|
Word walkNode = root;
|
||||||
char[] wordArray=matchWord.toCharArray();
|
char[] wordArray = matchWord.toCharArray();
|
||||||
for(int i=0;i<wordArray.length;i++)
|
for (int i = 0; i < wordArray.length; i++) {
|
||||||
{
|
// 失败"回溯"
|
||||||
// 失败回调状态
|
while (!walkNode.hasChild(wordArray[i]) && walkNode.failOver != null) {
|
||||||
while(!walkNode.hasChild(wordArray[i]) && walkNode.failOver!=null)
|
walkNode = walkNode.failOver;
|
||||||
{
|
|
||||||
walkNode=walkNode.failOver;
|
|
||||||
}
|
}
|
||||||
if(walkNode.hasChild(wordArray[i])) {
|
if (walkNode.hasChild(wordArray[i])) {
|
||||||
walkNode=walkNode.next.get(wordArray[i]);
|
walkNode = walkNode.next.get(wordArray[i]);
|
||||||
if(walkNode.end){
|
if (walkNode.end) {
|
||||||
// sentinelA和sentinelB作为哨兵节点,去后面探测是否仍存在end
|
// sentinelA和sentinelB作为哨兵节点,去后面探测是否仍存在end
|
||||||
Word sentinelA = walkNode; // 记录当前节点
|
Word sentinelA = walkNode; // 记录当前节点
|
||||||
Word sentinelB = walkNode; //记录end节点
|
Word sentinelB = walkNode; //记录end节点
|
||||||
int k = i+1;
|
int k = i + 1;
|
||||||
boolean flag=false;
|
boolean flag = false;
|
||||||
//判断end是不是最终end即敏感词是否存在包含关系(abc,abcd)
|
//判断end是不是最终end即敏感词是否存在包含关系(abc,abcd)
|
||||||
while(k < wordArray.length && sentinelA.hasChild(wordArray[k])) {
|
while (k < wordArray.length && sentinelA.hasChild(wordArray[k])) {
|
||||||
sentinelA = sentinelA.next.get(wordArray[k]);
|
sentinelA = sentinelA.next.get(wordArray[k]);
|
||||||
k++;
|
k++;
|
||||||
if(sentinelA.end)
|
if (sentinelA.end) {
|
||||||
{
|
sentinelB = sentinelA;
|
||||||
sentinelB=sentinelA;
|
flag = true;
|
||||||
flag=true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 根据结果去替换*
|
// 根据结果去替换*
|
||||||
if(flag){
|
// 计算替换长度
|
||||||
int length=sentinelB.depth;
|
int len = flag ? sentinelB.depth : walkNode.depth;
|
||||||
while(length>0)
|
while (len > 0) {
|
||||||
{
|
len--;
|
||||||
length--;
|
int index = flag ? i - walkNode.depth + 1 + len : i - len;
|
||||||
wordArray[i+length]=MASK;
|
wordArray[index] = MASK;
|
||||||
}
|
|
||||||
// 直接跳到最后的end节点failOver
|
|
||||||
i=i+length;
|
|
||||||
walkNode = sentinelB.failOver;
|
|
||||||
}else{
|
|
||||||
int length=walkNode.depth;
|
|
||||||
while (length>0){
|
|
||||||
length--;
|
|
||||||
wordArray[i-length]=MASK;
|
|
||||||
}
|
|
||||||
walkNode = walkNode.failOver;
|
|
||||||
}
|
}
|
||||||
|
// 更新i
|
||||||
|
i += flag ? sentinelB.depth : 0;
|
||||||
|
// 更新node
|
||||||
|
walkNode = flag ? sentinelB.failOver : walkNode.failOver;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -4,9 +4,6 @@ import com.abin.mallchat.common.common.algorithm.sensitiveWord.ACFilter;
|
|||||||
import com.abin.mallchat.common.common.algorithm.sensitiveWord.ACProFilter;
|
import com.abin.mallchat.common.common.algorithm.sensitiveWord.ACProFilter;
|
||||||
import com.abin.mallchat.common.common.algorithm.sensitiveWord.DFAFilter;
|
import com.abin.mallchat.common.common.algorithm.sensitiveWord.DFAFilter;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.FileReader;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -50,7 +47,7 @@ public class SensitiveTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void ACMulti() {
|
public void ACMulti() {
|
||||||
List<String> sensitiveList = Arrays.asList("白痴", "你是白痴", "白痴吗");
|
List<String> sensitiveList = Arrays.asList("你是白痴","你是");
|
||||||
ACFilter instance = new ACFilter();
|
ACFilter instance = new ACFilter();
|
||||||
instance.loadWord(sensitiveList);
|
instance.loadWord(sensitiveList);
|
||||||
System.out.println(instance.filter("你是白痴吗"));
|
System.out.println(instance.filter("你是白痴吗"));
|
||||||
|
|||||||
Reference in New Issue
Block a user