Jsoup概述

org.jsoup.Jsoup 是一款强大的Java HTML解析库,它能够:

🚀 从URL/文件/字符串加载HTML文档
🔍 使用DOM遍历提取数据(类似jQuery语法)
✂️ 清理用户输入防止XSS攻击
🛠️ 操作HTML元素实现动态修改
📦 零依赖轻量级设计(仅280KB)

添加依赖(Maven)

<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.16.2</version>
</dependency>

创建清理工具类

import org.jsoup.Jsoup;
import org.jsoup.safety.Safelist;

public class XssCleaner {
    // 基本HTML标签白名单
    private static final Safelist BASIC_WHITELIST = Safelist.basic()
        .addTags("div", "span", "img")
        .addAttributes("img", "src", "alt", "title")
        .addProtocols("img", "src", "http", "https");
    
    public static String clean(String dirty) {
        return dirty == null ? null : Jsoup.clean(dirty, BASIC_WHITELIST);
    }
}

创建可重复读取的请求包装器

import org.springframework.util.StreamUtils;

import javax.servlet.ReadListener;
import javax.servlet.ServletInputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import java.io.*;

public class CachedBodyHttpServletRequest extends HttpServletRequestWrapper {

    private final byte[] cachedBody;

    public CachedBodyHttpServletRequest(HttpServletRequest request) throws IOException {
        super(request);
        InputStream inputStream = request.getInputStream();
        this.cachedBody = StreamUtils.copyToByteArray(inputStream);
    }

    @Override
    public ServletInputStream getInputStream() {
        return new CachedBodyServletInputStream(cachedBody);
    }

    @Override
    public BufferedReader getReader() {
        return new BufferedReader(new InputStreamReader(this.getInputStream()));
    }

    public static class CachedBodyServletInputStream extends ServletInputStream {
        private final ByteArrayInputStream buffer;

        public CachedBodyServletInputStream(byte[] contents) {
            this.buffer = new ByteArrayInputStream(contents);
        }

        @Override
        public synchronized void reset() {
            buffer.reset();
        }

        @Override
        public int read() {
            return buffer.read();
        }

        @Override
        public boolean isFinished() {
            return buffer.available() == 0;
        }

        @Override
        public boolean isReady() {
            return true;
        }

        @Override
        public void setReadListener(ReadListener listener) {
            throw new UnsupportedOperationException();
        }
    }
}

创建自定义参数过滤器

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.thsware.framework.util.XssCleaner;
import org.springframework.core.Ordered;
import org.springframework.core.annotation.Order;
import org.springframework.http.MediaType;
import org.springframework.util.StringUtils;
import org.springframework.web.filter.OncePerRequestFilter;

import javax.servlet.FilterChain;
import javax.servlet.ServletException;
import javax.servlet.ServletInputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.Arrays;

@Order(Ordered.HIGHEST_PRECEDENCE)
public class XssFilter extends OncePerRequestFilter {
    private static final ObjectMapper objectMapper = new ObjectMapper();

    @Override
    protected void doFilterInternal(HttpServletRequest req,
                                    HttpServletResponse res,
                                    FilterChain chain) throws IOException, ServletException {
        // 处理json请求
        if(StringUtils.startsWithIgnoreCase(req.getContentType(), MediaType.APPLICATION_JSON_VALUE)){
            CachedBodyHttpServletRequest requestWrapper  = new CachedBodyHttpServletRequest(req);
            // 获取原始JSON
            JsonNode rootNode = objectMapper.readTree(requestWrapper.getInputStream());
            // 递归清理所有字符串值
            cleanJsonNode(rootNode);
            // 将清理后的JSON写回请求
            byte[] cleanedJson = objectMapper.writeValueAsBytes(rootNode);
            requestWrapper = new CachedBodyHttpServletRequest(req) {
                @Override
                public ServletInputStream getInputStream() {
                    return new CachedBodyServletInputStream(cleanedJson);
                }
            };

            chain.doFilter(requestWrapper, res);
            return;
        }
        // 处理非json请求
        chain.doFilter(new XssRequestWrapper(req), res);
    }

    private void cleanJsonNode(JsonNode node) {
        if (node.isObject()) {
            ObjectNode objectNode = (ObjectNode) node;
            objectNode.fields().forEachRemaining(entry -> {
                if (entry.getValue().isTextual()) {
                    // 清理文本值
                    String cleaned = XssCleaner.clean(entry.getValue().asText());
                    objectNode.put(entry.getKey(), cleaned);
                } else if (entry.getValue().isObject() || entry.getValue().isArray()) {
                    // 递归处理嵌套对象/数组
                    cleanJsonNode(entry.getValue());
                }
            });
        } else if (node.isArray()) {
            node.elements().forEachRemaining(this::cleanJsonNode);
        }
    }
}

class XssRequestWrapper extends HttpServletRequestWrapper {
    public XssRequestWrapper(HttpServletRequest request) {
        super(request);
    }

    @Override
    public String[] getParameterValues(String name) {
        String[] values = super.getParameterValues(name);
        if (values == null) return null;
        return Arrays.stream(values)
            .map(XssCleaner::clean)
            .toArray(String[]::new);
    }
}

注册拦截器

@Configuration
public class FilterConfig {
    @Bean
    public FilterRegistrationBean<JsonXssFilter> jsonXssFilter() {
        FilterRegistrationBean<JsonXssFilter> registration = new FilterRegistrationBean<>();
        registration.setFilter(new JsonXssFilter());
        registration.addUrlPatterns("/*");
        registration.setOrder(Ordered.HIGHEST_PRECEDENCE); // 设置高优先级
        return registration;
    }
}
最后修改:2025 年 06 月 18 日
如果觉得我的文章对你有用,请随意赞赏