敏感词工具类封装

This commit is contained in:
zhaoyuhang
2023-07-10 20:38:01 +08:00
parent 911042745d
commit 6546b9fe16
12 changed files with 252 additions and 84 deletions

View File

@@ -0,0 +1,30 @@
package com.abin.mallchat.common.common.config;
import com.abin.mallchat.common.common.utils.sensitiveWord.DFAFilter;
import com.abin.mallchat.common.common.utils.sensitiveWord.SensitiveWordBs;
import com.abin.mallchat.common.sensitive.MyWordDeny;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class SensitiveWordConfig {
@Autowired
private MyWordDeny myWordDeny;
/**
* 初始化引导类
*
* @return 初始化引导类
* @since 1.0.0
*/
@Bean
public SensitiveWordBs sensitiveWordBs() {
return SensitiveWordBs.newInstance()
.filterStrategy(DFAFilter.getInstance())
.sensitiveWord(myWordDeny)
.init();
}
}

View File

@@ -1,4 +1,4 @@
package com.abin.mallchat.common.common.utils;
package com.abin.mallchat.common.common.utils.sensitiveWord;
import com.abin.mallchat.common.common.algorithm.ac.ACTrie;
import com.abin.mallchat.common.common.algorithm.ac.MatchResult;
@@ -15,7 +15,7 @@ import java.util.Objects;
*
* Created by berg on 2023/6/18.
*/
public class SensitiveWordUtils0 {
public class ACFilter implements SensitiveWordFilter {
private final static char mask_char = '*'; // 替代字符
@@ -27,7 +27,7 @@ public class SensitiveWordUtils0 {
* @param text 文本
* @return boolean
*/
public static boolean hasSensitiveWord(String text) {
public boolean hasSensitiveWord(String text) {
if (StringUtils.isBlank(text)) return false;
return !Objects.equals(filter(text), text);
}
@@ -38,7 +38,7 @@ public class SensitiveWordUtils0 {
* @param text 待替换文本
* @return 替换后的文本
*/
public static String filter(String text) {
public String filter(String text) {
if (StringUtils.isBlank(text)) return text;
List<MatchResult> matchResults = ac_trie.matches(text);
StringBuffer result = new StringBuffer(text);
@@ -62,7 +62,7 @@ public class SensitiveWordUtils0 {
*
* @param words 敏感词数组
*/
public static void loadWord(List<String> words) {
public void loadWord(List<String> words) {
if (words == null) return;
ac_trie = new ACTrie(words);
}

View File

@@ -1,4 +1,4 @@
package com.abin.mallchat.common.common.utils;
package com.abin.mallchat.common.common.utils.sensitiveWord;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;
@@ -18,7 +18,10 @@ import java.util.*;
* @author zhaoyuhang
* @date 2023/06/19
*/
public final class SensitiveWordUtils {
public final class DFAFilter implements SensitiveWordFilter {
private DFAFilter() {
}
private static Word root = new Word(' '); // 敏感词字典的根节点
private final static char replace = '*'; // 替代字符
private final static String skipChars = " !*-+_=,.@;:;:。、??()【】[]《》<>“”\""; // 遇到这些字符就会跳过
@@ -30,6 +33,10 @@ public final class SensitiveWordUtils {
}
}
public static DFAFilter getInstance() {
return new DFAFilter();
}
/**
* 判断文本中是否存在敏感词
@@ -37,7 +44,7 @@ public final class SensitiveWordUtils {
* @param text 文本
* @return true: 存在敏感词, false: 不存在敏感词
*/
public static boolean hasSensitiveWord(String text) {
public boolean hasSensitiveWord(String text) {
if (StringUtils.isBlank(text)) return false;
return !Objects.equals(filter(text), text);
}
@@ -48,7 +55,7 @@ public final class SensitiveWordUtils {
* @param text 待替换文本
* @return 替换后的文本
*/
public static String filter(String text) {
public String filter(String text) {
StringBuilder result = new StringBuilder(text);
int index = 0;
while (index < result.length()) {
@@ -93,7 +100,7 @@ public final class SensitiveWordUtils {
*
* @param words 敏感词数组
*/
public static void loadWord(List<String> words) {
public void loadWord(List<String> words) {
if (!CollectionUtils.isEmpty(words)) {
Word newRoot = new Word(' ');
words.forEach(word -> loadWord(word, newRoot));
@@ -106,7 +113,7 @@ public final class SensitiveWordUtils {
*
* @param word
*/
public static void loadWord(String word, Word root) {
public void loadWord(String word, Word root) {
if (StringUtils.isBlank(word)) {
return;
}
@@ -136,7 +143,7 @@ public final class SensitiveWordUtils {
*
* @param path 文本文件的绝对路径
*/
public static void loadWordFromFile(String path) {
public void loadWordFromFile(String path) {
try (InputStream inputStream = Files.newInputStream(Paths.get(path))) {
loadWord(inputStream);
} catch (IOException e) {
@@ -150,7 +157,7 @@ public final class SensitiveWordUtils {
* @param inputStream 文本文件输入流
* @throws IOException IO异常
*/
public static void loadWord(InputStream inputStream) throws IOException {
public void loadWord(InputStream inputStream) throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
String line;
ArrayList<String> list = new ArrayList<>();
@@ -167,7 +174,7 @@ public final class SensitiveWordUtils {
* @param c 待检测字符
* @return true: 需要跳过, false: 不需要跳过
*/
private static boolean skip(char c) {
private boolean skip(char c) {
return skipSet.contains(c);
}
@@ -186,17 +193,7 @@ public final class SensitiveWordUtils {
public Word(char c) {
this.c = c;
this.end = false;
this.next = new HashMap<>();
}
}
public static void main(String[] args) {
String text = "白日,梦";
String filter = filter(text);
System.out.println(filter);
}
}

View File

@@ -0,0 +1,18 @@
package com.abin.mallchat.common.common.utils.sensitiveWord;
import java.util.List;
/**
* 敏感词
*
* @author zhaoyuhang
* @date 2023/07/09
*/
public interface IWordDeny {
/**
* 获取结果
* @return 结果
* @since 0.0.13
*/
List<String> deny();
}

View File

@@ -0,0 +1,102 @@
package com.abin.mallchat.common.common.utils.sensitiveWord;
import java.util.List;
/**
* 敏感词引导类
*
* @author zhaoyuhang
* @date 2023/07/08
*/
public class SensitiveWordBs {
/**
* 私有化构造器
*/
private SensitiveWordBs() {
}
/**
* 脱敏策略
*/
private SensitiveWordFilter sensitiveWordFilter = DFAFilter.getInstance();
/**
* 敏感词列表
*/
private IWordDeny wordDeny;
public static SensitiveWordBs newInstance() {
return new SensitiveWordBs();
}
/**
* 初始化
*
* 1. 根据配置,初始化对应的 map。比较消耗性能。
* @since 0.0.13
* @return this
*/
public SensitiveWordBs init() {
List<String> words = wordDeny.deny();
loadWord(words);
return this;
}
/**
* 过滤策略
*
* @param filter 过滤器
* @return 结果
* @since 0.7.0
*/
public SensitiveWordBs filterStrategy(SensitiveWordFilter filter) {
if (filter == null) {
throw new IllegalArgumentException("filter can not be null");
}
this.sensitiveWordFilter = filter;
return this;
}
public SensitiveWordBs sensitiveWord(IWordDeny wordDeny) {
if (wordDeny == null) {
throw new IllegalArgumentException("wordDeny can not be null");
}
this.wordDeny = wordDeny;
return this;
}
/**
* 有敏感词
*
* @param text 文本
* @return boolean
*/
public boolean hasSensitiveWord(String text) {
return sensitiveWordFilter.hasSensitiveWord(text);
}
/**
* 过滤
*
* @param text 文本
* @return {@link String}
*/
public String filter(String text) {
return sensitiveWordFilter.filter(text);
}
/**
* 加载敏感词列表
*
* @param words 敏感词数组
*/
private void loadWord(List<String> words) {
sensitiveWordFilter.loadWord(words);
}
}

View File

@@ -0,0 +1,37 @@
package com.abin.mallchat.common.common.utils.sensitiveWord;
import java.util.List;
/**
* 敏感词过滤
*
* @author zhaoyuhang
* @date 2023/07/08
*/
public interface SensitiveWordFilter {
/**
* 有敏感词
*
* @param text 文本
* @return boolean
*/
boolean hasSensitiveWord(String text);
/**
* 过滤
*
* @param text 文本
* @return {@link String}
*/
String filter(String text);
/**
* 加载敏感词列表
*
* @param words 敏感词数组
*/
void loadWord(List<String> words);
}

View File

@@ -0,0 +1,24 @@
package com.abin.mallchat.common.sensitive;
import com.abin.mallchat.common.common.utils.sensitiveWord.IWordDeny;
import com.abin.mallchat.common.sensitive.dao.SensitiveWordDao;
import com.abin.mallchat.common.sensitive.domain.SensitiveWord;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.stream.Collectors;
@Component
public class MyWordDeny implements IWordDeny {
@Autowired
private SensitiveWordDao sensitiveWordDao;
@Override
public List<String> deny() {
return sensitiveWordDao.list()
.stream()
.map(SensitiveWord::getWord)
.collect(Collectors.toList());
}
}

View File

@@ -1,5 +0,0 @@
package com.abin.mallchat.common.sensitive.service;
public interface ISensitiveWordService {
}

View File

@@ -1,39 +0,0 @@
package com.abin.mallchat.common.sensitive.service.impl;
import com.abin.mallchat.common.common.utils.SensitiveWordUtils;
import com.abin.mallchat.common.sensitive.dao.SensitiveWordDao;
import com.abin.mallchat.common.sensitive.domain.SensitiveWord;
import com.abin.mallchat.common.sensitive.service.ISensitiveWordService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import javax.annotation.PostConstruct;
import java.util.List;
import java.util.stream.Collectors;
@Service
@Slf4j
public class SensitiveWordServiceImpl implements ISensitiveWordService {
@Autowired
private SensitiveWordDao sensitiveWordDao;
@Autowired
private ThreadPoolTaskExecutor threadPoolTaskExecutor;
@PostConstruct
public void initSensitiveWord() {
threadPoolTaskExecutor.execute(() -> {
log.info("[initSensitiveWord] start");
List<SensitiveWord> list = sensitiveWordDao.list();
if (!CollectionUtils.isEmpty(list)) {
List<String> wordList = list.stream()
.map(SensitiveWord::getWord)
.collect(Collectors.toList());
SensitiveWordUtils.loadWord(wordList);
}
log.info("[initSensitiveWord] end; loading sensitiveWords num:{}", list.size());
});
}
}