Jsoup概述
org.jsoup.Jsoup 是一款强大的Java HTML解析库,它能够:
🚀 从URL/文件/字符串加载HTML文档
🔍 使用DOM遍历提取数据(类似jQuery语法)
✂️ 清理用户输入防止XSS攻击
🛠️ 操作HTML元素实现动态修改
📦 零依赖轻量级设计(仅280KB)
添加依赖(Maven)
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.16.2</version>
</dependency>
创建清理工具类
import org.jsoup.Jsoup;
import org.jsoup.safety.Safelist;
public class XssCleaner {
// 基本HTML标签白名单
private static final Safelist BASIC_WHITELIST = Safelist.basic()
.addTags("div", "span", "img")
.addAttributes("img", "src", "alt", "title")
.addProtocols("img", "src", "http", "https");
public static String clean(String dirty) {
return dirty == null ? null : Jsoup.clean(dirty, BASIC_WHITELIST);
}
}
创建可重复读取的请求包装器
import org.springframework.util.StreamUtils;
import javax.servlet.ReadListener;
import javax.servlet.ServletInputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import java.io.*;
public class CachedBodyHttpServletRequest extends HttpServletRequestWrapper {
private final byte[] cachedBody;
public CachedBodyHttpServletRequest(HttpServletRequest request) throws IOException {
super(request);
InputStream inputStream = request.getInputStream();
this.cachedBody = StreamUtils.copyToByteArray(inputStream);
}
@Override
public ServletInputStream getInputStream() {
return new CachedBodyServletInputStream(cachedBody);
}
@Override
public BufferedReader getReader() {
return new BufferedReader(new InputStreamReader(this.getInputStream()));
}
public static class CachedBodyServletInputStream extends ServletInputStream {
private final ByteArrayInputStream buffer;
public CachedBodyServletInputStream(byte[] contents) {
this.buffer = new ByteArrayInputStream(contents);
}
@Override
public synchronized void reset() {
buffer.reset();
}
@Override
public int read() {
return buffer.read();
}
@Override
public boolean isFinished() {
return buffer.available() == 0;
}
@Override
public boolean isReady() {
return true;
}
@Override
public void setReadListener(ReadListener listener) {
throw new UnsupportedOperationException();
}
}
}
创建自定义参数过滤器
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.thsware.framework.util.XssCleaner;
import org.springframework.core.Ordered;
import org.springframework.core.annotation.Order;
import org.springframework.http.MediaType;
import org.springframework.util.StringUtils;
import org.springframework.web.filter.OncePerRequestFilter;
import javax.servlet.FilterChain;
import javax.servlet.ServletException;
import javax.servlet.ServletInputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.Arrays;
@Order(Ordered.HIGHEST_PRECEDENCE)
public class XssFilter extends OncePerRequestFilter {
private static final ObjectMapper objectMapper = new ObjectMapper();
@Override
protected void doFilterInternal(HttpServletRequest req,
HttpServletResponse res,
FilterChain chain) throws IOException, ServletException {
// 处理json请求
if(StringUtils.startsWithIgnoreCase(req.getContentType(), MediaType.APPLICATION_JSON_VALUE)){
CachedBodyHttpServletRequest requestWrapper = new CachedBodyHttpServletRequest(req);
// 获取原始JSON
JsonNode rootNode = objectMapper.readTree(requestWrapper.getInputStream());
// 递归清理所有字符串值
cleanJsonNode(rootNode);
// 将清理后的JSON写回请求
byte[] cleanedJson = objectMapper.writeValueAsBytes(rootNode);
requestWrapper = new CachedBodyHttpServletRequest(req) {
@Override
public ServletInputStream getInputStream() {
return new CachedBodyServletInputStream(cleanedJson);
}
};
chain.doFilter(requestWrapper, res);
return;
}
// 处理非json请求
chain.doFilter(new XssRequestWrapper(req), res);
}
private void cleanJsonNode(JsonNode node) {
if (node.isObject()) {
ObjectNode objectNode = (ObjectNode) node;
objectNode.fields().forEachRemaining(entry -> {
if (entry.getValue().isTextual()) {
// 清理文本值
String cleaned = XssCleaner.clean(entry.getValue().asText());
objectNode.put(entry.getKey(), cleaned);
} else if (entry.getValue().isObject() || entry.getValue().isArray()) {
// 递归处理嵌套对象/数组
cleanJsonNode(entry.getValue());
}
});
} else if (node.isArray()) {
node.elements().forEachRemaining(this::cleanJsonNode);
}
}
}
class XssRequestWrapper extends HttpServletRequestWrapper {
public XssRequestWrapper(HttpServletRequest request) {
super(request);
}
@Override
public String[] getParameterValues(String name) {
String[] values = super.getParameterValues(name);
if (values == null) return null;
return Arrays.stream(values)
.map(XssCleaner::clean)
.toArray(String[]::new);
}
}
注册拦截器
@Configuration
public class FilterConfig {
@Bean
public FilterRegistrationBean<JsonXssFilter> jsonXssFilter() {
FilterRegistrationBean<JsonXssFilter> registration = new FilterRegistrationBean<>();
registration.setFilter(new JsonXssFilter());
registration.addUrlPatterns("/*");
registration.setOrder(Ordered.HIGHEST_PRECEDENCE); // 设置高优先级
return registration;
}
}