深圳网站建设 公司元,某企业网站网页设计模板,天津建设工程信息网 官网首页,门户网站推广怎么做1 前言敏感词过滤就是你在项目中输入某些字#xff08;比如输入xxoo相关的文字时#xff09;时要能检测出来#xff0c;很多项目中都会有一个敏感词管理模块#xff0c;在敏感词管理模块中你可以加入敏感词#xff0c;然后根据加入的敏感词去过滤输入内容中的敏感词并进行…1 前言敏感词过滤就是你在项目中输入某些字比如输入xxoo相关的文字时时要能检测出来很多项目中都会有一个敏感词管理模块在敏感词管理模块中你可以加入敏感词然后根据加入的敏感词去过滤输入内容中的敏感词并进行相应的处理要么提示要么高亮显示要么直接替换成其它的文字或者符号代替。敏感词过滤的做法有很多其中有比较常用的如下几种1.查询数据库当中的敏感词循环每一个敏感词然后去输入的文本中从头到尾搜索一遍看是否存在此敏感词有则做相应的处理这种方式讲白了就是找到一个处理一个。优点so easy。用java代码实现基本没什么难度。缺点这效率是非常低的如果是英文时你会发现一个很无语的事情比如英文a是敏感词那我如果是一篇英文文档那程序它得处理多少次敏感词谁能告诉我2.传说中的DFA算法有限状态机,也正是我要给大家分享的毕竟感觉比较通用算法的原理希望大家能够自己去网上查查资料这里就不详细说明了。优点至少比上面那sb效率高点。缺点对于学过算法的应该不难对于没学过算法的用起来也不难就是理解起来有点gg疼匹配效率也不高比较耗费内存敏感词越多内存占用的就越大。2 代码实现2.1 敏感词库初始化在项目启动前读取数据,将敏感词加载到Map中,具体实现如下:建表语句:CREATE TABLE sensitive_word (id int(11) NOT NULL AUTO_INCREMENT COMMENT 主键,content varchar(50) NOT NULL COMMENT 关键词,create_time datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 创建时间,update_time datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 更新时间,PRIMARY KEY (id)
) ENGINEInnoDB AUTO_INCREMENT2 DEFAULT CHARSETutf8mb4;INSERT INTO fuying.sensitive_word (id, content, create_time, update_time) VALUES (1, 吴名氏, 2023-03-02 14:21:36, 2023-03-02 14:21:36);实体类SensitiveWord.java:package com.wkf.workrecord.tools.dfa.entity;import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;import java.io.Serializable;
import java.util.Date;/*** author wuKeFan* date 2023-03-02 13:48:58*/
Data
TableName(sensitive_word)
public class SensitiveWord implements Serializable {private static final long serialVersionUID 1L;TableId(value id, type IdType.AUTO)private Integer id;private String content;private Date createTime;private Date updateTime;}
数据库持久类SensitiveWordMapper.java:package com.wkf.workrecord.tools.dfa.mapper;import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.wkf.workrecord.tools.dfa.entity.SensitiveWord;/*** author wuKeFan* date 2023-03-02 13:50:16*/
public interface SensitiveWordMapper extends BaseMapperSensitiveWord {
}
service类SensitiveWordService.java和SensitiveWordServiceImpl.java:package com.wkf.workrecord.tools.dfa.service;import com.baomidou.mybatisplus.extension.service.IService;
import com.wkf.workrecord.tools.dfa.entity.SensitiveWord;import java.util.Set;/*** 敏感词过滤服务类* author wuKeFan* date 2023-03-02 13:47:04*/
public interface SensitiveWordService extends IServiceSensitiveWord {SetString sensitiveWordFiltering(String text);}
package com.wkf.workrecord.tools.dfa.service;import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.wkf.workrecord.tools.dfa.mapper.SensitiveWordMapper;
import com.wkf.workrecord.tools.dfa.SensitiveWordUtils;
import com.wkf.workrecord.tools.dfa.entity.SensitiveWord;
import org.springframework.stereotype.Service;
import java.util.Set;/*** author wuKeFan* date 2023-03-02 13:48:04*/
Service
public class SensitiveWordServiceImpl extends ServiceImplSensitiveWordMapper, SensitiveWord implements SensitiveWordService{Overridepublic SetString sensitiveWordFiltering(String text) {// 得到敏感词有哪些传入2表示获取所有敏感词return SensitiveWordUtils.getSensitiveWord(text, 2);}
}
敏感词过滤工具类SensitiveWordUtils:package com.wkf.workrecord.tools.dfa;import com.wkf.workrecord.tools.dfa.entity.SensitiveWord;
import lombok.extern.slf4j.Slf4j;import java.util.*;/*** 敏感词过滤工具类* author wuKeFan* date 2023-03-02 13:45:19*/
Slf4j
SuppressWarnings(unused)
public class SensitiveWordUtils {/*** 敏感词库*/public static final MapObject, Object sensitiveWordMap new HashMap();/*** 只过滤最小敏感词*/public static int minMatchTYpe 1;/*** 过滤所有敏感词*/public static int maxMatchType 2;/*** 初始化敏感词*/public static void initKeyWord(ListSensitiveWord sensitiveWords) {try {// 从敏感词集合对象中取出敏感词并封装到Set集合中SetString keyWordSet new HashSet();for (SensitiveWord s : sensitiveWords) {keyWordSet.add(s.getContent().trim());}// 将敏感词库加入到HashMap中addSensitiveWordToHashMap(keyWordSet);}catch (Exception e) {log.error(初始化敏感词出错,, e);}}/*** 封装敏感词库** param keyWordSet 敏感词库列表*/private static void addSensitiveWordToHashMap(SetString keyWordSet) {// 敏感词String key;// 用来按照相应的格式保存敏感词库数据MapObject, Object nowMap;// 用来辅助构建敏感词库MapObject, Object newWorMap;// 使用一个迭代器来循环敏感词集合for (String s : keyWordSet) {key s;// 等于敏感词库HashMap对象在内存中占用的是同一个地址所以此nowMap对象的变化sensitiveWordMap对象也会跟着改变nowMap sensitiveWordMap;for (int i 0; i key.length(); i) {// 截取敏感词当中的字在敏感词库中字为HashMap对象的Key键值char keyChar key.charAt(i);// 判断这个字是否存在于敏感词库中Object wordMap nowMap.get(keyChar);if (wordMap ! null) {nowMap (MapObject, Object) wordMap;} else {newWorMap new HashMap();newWorMap.put(isEnd, 0);nowMap.put(keyChar, newWorMap);nowMap newWorMap;}// 如果该字是当前敏感词的最后一个字则标识为结尾字if (i key.length() - 1) {nowMap.put(isEnd, 1);}log.info(封装敏感词库过程 sensitiveWordMap);}log.info(查看敏感词库数据: sensitiveWordMap);}}/*** 敏感词库敏感词数量** return 返回数量*/public static int getWordSize() {return SensitiveWordUtils.sensitiveWordMap.size();}/*** 是否包含敏感词** param txt 敏感词* param matchType 匹配类型* return 返回结果*/public static boolean isContainSensitiveWord(String txt, int matchType) {boolean flag false;for (int i 0; i txt.length(); i) {int matchFlag checkSensitiveWord(txt, i, matchType);if (matchFlag 0) {flag true;}}return flag;}/*** 获取敏感词内容** param txt 敏感词* param matchType 匹配类型* return 敏感词内容*/public static SetString getSensitiveWord(String txt, int matchType) {SetString sensitiveWordList new HashSet();for (int i 0; i txt.length(); i) {int length checkSensitiveWord(txt, i, matchType);if (length 0) {// 将检测出的敏感词保存到集合中sensitiveWordList.add(txt.substring(i, i length));i i length - 1;}}return sensitiveWordList;}/*** 替换敏感词** param txt 敏感词* param matchType 匹配类型* param replaceChar 代替词* return 返回敏感词*/public static String replaceSensitiveWord(String txt, int matchType, String replaceChar) {String resultTxt txt;SetString set getSensitiveWord(txt, matchType);IteratorString iterator set.iterator();String word;String replaceString;while (iterator.hasNext()) {word iterator.next();replaceString getReplaceChars(replaceChar, word.length());resultTxt resultTxt.replaceAll(word, replaceString);}return resultTxt;}/*** 替换敏感词内容** param replaceChar 需要替换的敏感词* param length 替换长度* return 返回结果*/private static String getReplaceChars(String replaceChar, int length) {StringBuilder resultReplace new StringBuilder(replaceChar);for (int i 1; i length; i) {resultReplace.append(replaceChar);}return resultReplace.toString();}/*** 检查敏感词数量** param txt 敏感词* param beginIndex 开始下标* param matchType 匹配类型* return 返回数量*/public static int checkSensitiveWord(String txt, int beginIndex, int matchType) {boolean flag false;// 记录敏感词数量int matchFlag 0;char word;MapObject, Object nowMap SensitiveWordUtils.sensitiveWordMap;for (int i beginIndex; i txt.length(); i) {word txt.charAt(i);// 判断该字是否存在于敏感词库中nowMap (MapObject, Object) nowMap.get(word);if (nowMap ! null) {matchFlag;// 判断是否是敏感词的结尾字如果是结尾字则判断是否继续检测if (1.equals(nowMap.get(isEnd))) {flag true;// 判断过滤类型如果是小过滤则跳出循环否则继续循环if (SensitiveWordUtils.minMatchTYpe matchType) {break;}}}else {break;}}if (!flag) {matchFlag 0;}return matchFlag;}}
项目启动完成后执行初始化敏感关键字StartInit.java:package com.wkf.workrecord.tools.dfa;import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.wkf.workrecord.tools.dfa.entity.SensitiveWord;
import com.wkf.workrecord.tools.dfa.mapper.SensitiveWordMapper;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import javax.annotation.Resource;
import java.util.List;/*** 初始化敏感关键字* author wuKeFan* date 2023-03-02 13:57:45*/
Component
public class StartInit {Resourceprivate SensitiveWordMapper sensitiveWordMapper;PostConstructpublic void init() {// 从数据库中获取敏感词对象集合调用的方法来自Dao层此方法是service层的实现类ListSensitiveWord sensitiveWords sensitiveWordMapper.selectList(new QueryWrapper());// 构建敏感词库SensitiveWordUtils.initKeyWord(sensitiveWords);}}
2.2 编写测试类编写测试脚本测试效果.代码如下: Testpublic void sensitiveWordTest() {SetString set sensitiveWordService.sensitiveWordFiltering(吴名氏到此一游);for (String string : set) {System.out.println(string);}}执行结果如下:吴名氏为敏感词,匹配成功