Merge pull request #105 from 1045078399/sensitive

feat:敏感词封装
This commit is contained in:
zongzibinbin
2023-07-16 01:40:56 +08:00
committed by GitHub
12 changed files with 253 additions and 85 deletions

View File

@@ -0,0 +1,30 @@
package com.abin.mallchat.common.common.config;
import com.abin.mallchat.common.common.utils.sensitiveWord.DFAFilter;
import com.abin.mallchat.common.common.utils.sensitiveWord.SensitiveWordBs;
import com.abin.mallchat.common.sensitive.MyWordDeny;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class SensitiveWordConfig {
@Autowired
private MyWordDeny myWordDeny;
/**
* 初始化引导类
*
* @return 初始化引导类
* @since 1.0.0
*/
@Bean
public SensitiveWordBs sensitiveWordBs() {
return SensitiveWordBs.newInstance()
.filterStrategy(DFAFilter.getInstance())
.sensitiveWord(myWordDeny)
.init();
}
}

View File

@@ -1,4 +1,4 @@
package com.abin.mallchat.common.common.utils; package com.abin.mallchat.common.common.utils.sensitiveWord;
import com.abin.mallchat.common.common.algorithm.ac.ACTrie; import com.abin.mallchat.common.common.algorithm.ac.ACTrie;
import com.abin.mallchat.common.common.algorithm.ac.MatchResult; import com.abin.mallchat.common.common.algorithm.ac.MatchResult;
@@ -15,7 +15,7 @@ import java.util.Objects;
* *
* Created by berg on 2023/6/18. * Created by berg on 2023/6/18.
*/ */
public class SensitiveWordUtils0 { public class ACFilter implements SensitiveWordFilter {
private final static char mask_char = '*'; // 替代字符 private final static char mask_char = '*'; // 替代字符
@@ -27,7 +27,7 @@ public class SensitiveWordUtils0 {
* @param text 文本 * @param text 文本
* @return boolean * @return boolean
*/ */
public static boolean hasSensitiveWord(String text) { public boolean hasSensitiveWord(String text) {
if (StringUtils.isBlank(text)) return false; if (StringUtils.isBlank(text)) return false;
return !Objects.equals(filter(text), text); return !Objects.equals(filter(text), text);
} }
@@ -38,7 +38,7 @@ public class SensitiveWordUtils0 {
* @param text 待替换文本 * @param text 待替换文本
* @return 替换后的文本 * @return 替换后的文本
*/ */
public static String filter(String text) { public String filter(String text) {
if (StringUtils.isBlank(text)) return text; if (StringUtils.isBlank(text)) return text;
List<MatchResult> matchResults = ac_trie.matches(text); List<MatchResult> matchResults = ac_trie.matches(text);
StringBuffer result = new StringBuffer(text); StringBuffer result = new StringBuffer(text);
@@ -62,7 +62,7 @@ public class SensitiveWordUtils0 {
* *
* @param words 敏感词数组 * @param words 敏感词数组
*/ */
public static void loadWord(List<String> words) { public void loadWord(List<String> words) {
if (words == null) return; if (words == null) return;
ac_trie = new ACTrie(words); ac_trie = new ACTrie(words);
} }

View File

@@ -1,4 +1,4 @@
package com.abin.mallchat.common.common.utils; package com.abin.mallchat.common.common.utils.sensitiveWord;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils; import org.springframework.util.CollectionUtils;
@@ -18,7 +18,10 @@ import java.util.*;
* @author zhaoyuhang * @author zhaoyuhang
* @date 2023/06/19 * @date 2023/06/19
*/ */
public final class SensitiveWordUtils { public final class DFAFilter implements SensitiveWordFilter {
private DFAFilter() {
}
private static Word root = new Word(' '); // 敏感词字典的根节点 private static Word root = new Word(' '); // 敏感词字典的根节点
private final static char replace = '*'; // 替代字符 private final static char replace = '*'; // 替代字符
private final static String skipChars = " !*-+_=,.@;:;:。、??()【】[]《》<>“”\""; // 遇到这些字符就会跳过 private final static String skipChars = " !*-+_=,.@;:;:。、??()【】[]《》<>“”\""; // 遇到这些字符就会跳过
@@ -30,6 +33,10 @@ public final class SensitiveWordUtils {
} }
} }
public static DFAFilter getInstance() {
return new DFAFilter();
}
/** /**
* 判断文本中是否存在敏感词 * 判断文本中是否存在敏感词
@@ -37,7 +44,7 @@ public final class SensitiveWordUtils {
* @param text 文本 * @param text 文本
* @return true: 存在敏感词, false: 不存在敏感词 * @return true: 存在敏感词, false: 不存在敏感词
*/ */
public static boolean hasSensitiveWord(String text) { public boolean hasSensitiveWord(String text) {
if (StringUtils.isBlank(text)) return false; if (StringUtils.isBlank(text)) return false;
return !Objects.equals(filter(text), text); return !Objects.equals(filter(text), text);
} }
@@ -48,7 +55,7 @@ public final class SensitiveWordUtils {
* @param text 待替换文本 * @param text 待替换文本
* @return 替换后的文本 * @return 替换后的文本
*/ */
public static String filter(String text) { public String filter(String text) {
StringBuilder result = new StringBuilder(text); StringBuilder result = new StringBuilder(text);
int index = 0; int index = 0;
while (index < result.length()) { while (index < result.length()) {
@@ -93,7 +100,7 @@ public final class SensitiveWordUtils {
* *
* @param words 敏感词数组 * @param words 敏感词数组
*/ */
public static void loadWord(List<String> words) { public void loadWord(List<String> words) {
if (!CollectionUtils.isEmpty(words)) { if (!CollectionUtils.isEmpty(words)) {
Word newRoot = new Word(' '); Word newRoot = new Word(' ');
words.forEach(word -> loadWord(word, newRoot)); words.forEach(word -> loadWord(word, newRoot));
@@ -106,7 +113,7 @@ public final class SensitiveWordUtils {
* *
* @param word * @param word
*/ */
public static void loadWord(String word, Word root) { public void loadWord(String word, Word root) {
if (StringUtils.isBlank(word)) { if (StringUtils.isBlank(word)) {
return; return;
} }
@@ -136,7 +143,7 @@ public final class SensitiveWordUtils {
* *
* @param path 文本文件的绝对路径 * @param path 文本文件的绝对路径
*/ */
public static void loadWordFromFile(String path) { public void loadWordFromFile(String path) {
try (InputStream inputStream = Files.newInputStream(Paths.get(path))) { try (InputStream inputStream = Files.newInputStream(Paths.get(path))) {
loadWord(inputStream); loadWord(inputStream);
} catch (IOException e) { } catch (IOException e) {
@@ -150,7 +157,7 @@ public final class SensitiveWordUtils {
* @param inputStream 文本文件输入流 * @param inputStream 文本文件输入流
* @throws IOException IO异常 * @throws IOException IO异常
*/ */
public static void loadWord(InputStream inputStream) throws IOException { public void loadWord(InputStream inputStream) throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) { try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
String line; String line;
ArrayList<String> list = new ArrayList<>(); ArrayList<String> list = new ArrayList<>();
@@ -167,7 +174,7 @@ public final class SensitiveWordUtils {
* @param c 待检测字符 * @param c 待检测字符
* @return true: 需要跳过, false: 不需要跳过 * @return true: 需要跳过, false: 不需要跳过
*/ */
private static boolean skip(char c) { private boolean skip(char c) {
return skipSet.contains(c); return skipSet.contains(c);
} }
@@ -186,17 +193,7 @@ public final class SensitiveWordUtils {
public Word(char c) { public Word(char c) {
this.c = c; this.c = c;
this.end = false;
this.next = new HashMap<>(); this.next = new HashMap<>();
} }
} }
public static void main(String[] args) {
String text = "白日,梦";
String filter = filter(text);
System.out.println(filter);
}
} }

View File

@@ -0,0 +1,18 @@
package com.abin.mallchat.common.common.utils.sensitiveWord;
import java.util.List;
/**
* 敏感词
*
* @author zhaoyuhang
* @date 2023/07/09
*/
public interface IWordDeny {
/**
* 获取结果
* @return 结果
* @since 0.0.13
*/
List<String> deny();
}

View File

@@ -0,0 +1,102 @@
package com.abin.mallchat.common.common.utils.sensitiveWord;
import java.util.List;
/**
* 敏感词引导类
*
* @author zhaoyuhang
* @date 2023/07/08
*/
public class SensitiveWordBs {
/**
* 私有化构造器
*/
private SensitiveWordBs() {
}
/**
* 脱敏策略
*/
private SensitiveWordFilter sensitiveWordFilter = DFAFilter.getInstance();
/**
* 敏感词列表
*/
private IWordDeny wordDeny;
public static SensitiveWordBs newInstance() {
return new SensitiveWordBs();
}
/**
* 初始化
*
* 1. 根据配置,初始化对应的 map。比较消耗性能。
* @since 0.0.13
* @return this
*/
public SensitiveWordBs init() {
List<String> words = wordDeny.deny();
loadWord(words);
return this;
}
/**
* 过滤策略
*
* @param filter 过滤器
* @return 结果
* @since 0.7.0
*/
public SensitiveWordBs filterStrategy(SensitiveWordFilter filter) {
if (filter == null) {
throw new IllegalArgumentException("filter can not be null");
}
this.sensitiveWordFilter = filter;
return this;
}
public SensitiveWordBs sensitiveWord(IWordDeny wordDeny) {
if (wordDeny == null) {
throw new IllegalArgumentException("wordDeny can not be null");
}
this.wordDeny = wordDeny;
return this;
}
/**
* 有敏感词
*
* @param text 文本
* @return boolean
*/
public boolean hasSensitiveWord(String text) {
return sensitiveWordFilter.hasSensitiveWord(text);
}
/**
* 过滤
*
* @param text 文本
* @return {@link String}
*/
public String filter(String text) {
return sensitiveWordFilter.filter(text);
}
/**
* 加载敏感词列表
*
* @param words 敏感词数组
*/
private void loadWord(List<String> words) {
sensitiveWordFilter.loadWord(words);
}
}

View File

@@ -0,0 +1,37 @@
package com.abin.mallchat.common.common.utils.sensitiveWord;
import java.util.List;
/**
* 敏感词过滤
*
* @author zhaoyuhang
* @date 2023/07/08
*/
public interface SensitiveWordFilter {
/**
* 有敏感词
*
* @param text 文本
* @return boolean
*/
boolean hasSensitiveWord(String text);
/**
* 过滤
*
* @param text 文本
* @return {@link String}
*/
String filter(String text);
/**
* 加载敏感词列表
*
* @param words 敏感词数组
*/
void loadWord(List<String> words);
}

View File

@@ -0,0 +1,24 @@
package com.abin.mallchat.common.sensitive;
import com.abin.mallchat.common.common.utils.sensitiveWord.IWordDeny;
import com.abin.mallchat.common.sensitive.dao.SensitiveWordDao;
import com.abin.mallchat.common.sensitive.domain.SensitiveWord;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.stream.Collectors;
@Component
public class MyWordDeny implements IWordDeny {
@Autowired
private SensitiveWordDao sensitiveWordDao;
@Override
public List<String> deny() {
return sensitiveWordDao.list()
.stream()
.map(SensitiveWord::getWord)
.collect(Collectors.toList());
}
}

View File

@@ -1,5 +0,0 @@
package com.abin.mallchat.common.sensitive.service;
public interface ISensitiveWordService {
}

View File

@@ -1,39 +0,0 @@
package com.abin.mallchat.common.sensitive.service.impl;
import com.abin.mallchat.common.common.utils.SensitiveWordUtils;
import com.abin.mallchat.common.sensitive.dao.SensitiveWordDao;
import com.abin.mallchat.common.sensitive.domain.SensitiveWord;
import com.abin.mallchat.common.sensitive.service.ISensitiveWordService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import javax.annotation.PostConstruct;
import java.util.List;
import java.util.stream.Collectors;
@Service
@Slf4j
public class SensitiveWordServiceImpl implements ISensitiveWordService {
@Autowired
private SensitiveWordDao sensitiveWordDao;
@Autowired
private ThreadPoolTaskExecutor threadPoolTaskExecutor;
@PostConstruct
public void initSensitiveWord() {
threadPoolTaskExecutor.execute(() -> {
log.info("[initSensitiveWord] start");
List<SensitiveWord> list = sensitiveWordDao.list();
if (!CollectionUtils.isEmpty(list)) {
List<String> wordList = list.stream()
.map(SensitiveWord::getWord)
.collect(Collectors.toList());
SensitiveWordUtils.loadWord(wordList);
}
log.info("[initSensitiveWord] end; loading sensitiveWords num:{}", list.size());
});
}
}

View File

@@ -10,9 +10,9 @@ import com.abin.mallchat.common.chat.domain.enums.MessageTypeEnum;
import com.abin.mallchat.common.chat.service.cache.MsgCache; import com.abin.mallchat.common.chat.service.cache.MsgCache;
import com.abin.mallchat.common.common.domain.enums.YesOrNoEnum; import com.abin.mallchat.common.common.domain.enums.YesOrNoEnum;
import com.abin.mallchat.common.common.utils.AssertUtil; import com.abin.mallchat.common.common.utils.AssertUtil;
import com.abin.mallchat.common.common.utils.SensitiveWordUtils;
import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlDiscover; import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlDiscover;
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
import com.abin.mallchat.common.common.utils.sensitiveWord.SensitiveWordBs;
import com.abin.mallchat.common.user.domain.entity.User; import com.abin.mallchat.common.user.domain.entity.User;
import com.abin.mallchat.common.user.domain.enums.RoleEnum; import com.abin.mallchat.common.user.domain.enums.RoleEnum;
import com.abin.mallchat.common.user.service.IRoleService; import com.abin.mallchat.common.user.service.IRoleService;
@@ -47,6 +47,8 @@ public class TextMsgHandler extends AbstractMsgHandler {
private UserInfoCache userInfoCache; private UserInfoCache userInfoCache;
@Autowired @Autowired
private IRoleService iRoleService; private IRoleService iRoleService;
@Autowired
private SensitiveWordBs sensitiveWordBs;
private static final PrioritizedUrlDiscover URL_TITLE_DISCOVER = new PrioritizedUrlDiscover(); private static final PrioritizedUrlDiscover URL_TITLE_DISCOVER = new PrioritizedUrlDiscover();
@@ -82,7 +84,7 @@ public class TextMsgHandler extends AbstractMsgHandler {
MessageExtra extra = Optional.ofNullable(msg.getExtra()).orElse(new MessageExtra()); MessageExtra extra = Optional.ofNullable(msg.getExtra()).orElse(new MessageExtra());
Message update = new Message(); Message update = new Message();
update.setId(msg.getId()); update.setId(msg.getId());
update.setContent(SensitiveWordUtils.filter(body.getContent())); update.setContent(sensitiveWordBs.filter(body.getContent()));
update.setExtra(extra); update.setExtra(extra);
//如果有回复消息 //如果有回复消息
if (Objects.nonNull(body.getReplyMsgId())) { if (Objects.nonNull(body.getReplyMsgId())) {

View File

@@ -36,6 +36,7 @@ public class GPTChatAIHandler extends AbstractChatAIHandler {
@Override @Override
protected void init() { protected void init() {
super.init(); super.init();
if (isUse()) {
UserInfoResp userInfo = userService.getUserInfo(chatGPTProperties.getAIUserId()); UserInfoResp userInfo = userService.getUserInfo(chatGPTProperties.getAIUserId());
if (userInfo == null) { if (userInfo == null) {
log.error("根据AIUserId:{} 找不到用户信息", chatGPTProperties.getAIUserId()); log.error("根据AIUserId:{} 找不到用户信息", chatGPTProperties.getAIUserId());
@@ -47,6 +48,7 @@ public class GPTChatAIHandler extends AbstractChatAIHandler {
} }
AI_NAME = userInfo.getName(); AI_NAME = userInfo.getName();
} }
}
@Override @Override
protected boolean isUse() { protected boolean isUse() {
@@ -90,14 +92,12 @@ public class GPTChatAIHandler extends AbstractChatAIHandler {
text = ChatGPTUtils.parseText(response); text = ChatGPTUtils.parseText(response);
} catch (Exception e) { } catch (Exception e) {
log.warn("gpt doChat warn:", e); log.warn("gpt doChat warn:", e);
text= "我累了,明天再聊吧"; text = "我累了,明天再聊吧";
} }
return text; return text;
} }
@Override @Override
protected boolean supports(Message message) { protected boolean supports(Message message) {
if (!chatGPTProperties.isUse()) { if (!chatGPTProperties.isUse()) {

View File

@@ -4,7 +4,7 @@ import cn.hutool.core.util.StrUtil;
import com.abin.mallchat.common.common.event.UserBlackEvent; import com.abin.mallchat.common.common.event.UserBlackEvent;
import com.abin.mallchat.common.common.event.UserRegisterEvent; import com.abin.mallchat.common.common.event.UserRegisterEvent;
import com.abin.mallchat.common.common.utils.AssertUtil; import com.abin.mallchat.common.common.utils.AssertUtil;
import com.abin.mallchat.common.common.utils.SensitiveWordUtils; import com.abin.mallchat.common.common.utils.sensitiveWord.SensitiveWordBs;
import com.abin.mallchat.common.user.dao.BlackDao; import com.abin.mallchat.common.user.dao.BlackDao;
import com.abin.mallchat.common.user.dao.ItemConfigDao; import com.abin.mallchat.common.user.dao.ItemConfigDao;
import com.abin.mallchat.common.user.dao.UserBackpackDao; import com.abin.mallchat.common.user.dao.UserBackpackDao;
@@ -63,6 +63,8 @@ public class UserServiceImpl implements UserService {
private BlackDao blackDao; private BlackDao blackDao;
@Autowired @Autowired
private UserSummaryCache userSummaryCache; private UserSummaryCache userSummaryCache;
@Autowired
private SensitiveWordBs sensitiveWordBs;
@Override @Override
public UserInfoResp getUserInfo(Long uid) { public UserInfoResp getUserInfo(Long uid) {
@@ -76,7 +78,7 @@ public class UserServiceImpl implements UserService {
public void modifyName(Long uid, ModifyNameReq req) { public void modifyName(Long uid, ModifyNameReq req) {
//判断名字是不是重复 //判断名字是不是重复
String newName = req.getName(); String newName = req.getName();
AssertUtil.isFalse(SensitiveWordUtils.hasSensitiveWord(newName), "名字中包含敏感词,请重新输入"); // 判断名字中有没有敏感词 AssertUtil.isFalse(sensitiveWordBs.hasSensitiveWord(newName), "名字中包含敏感词,请重新输入"); // 判断名字中有没有敏感词
User oldUser = userDao.getByName(newName); User oldUser = userDao.getByName(newName);
AssertUtil.isEmpty(oldUser, "名字已经被抢占了,请换一个哦~~"); AssertUtil.isEmpty(oldUser, "名字已经被抢占了,请换一个哦~~");
//判断改名卡够不够 //判断改名卡够不够