Ver Fonte

add:爬虫逻辑更新v1

zhiqiang.lv há 1 mês atrás
pai
commit
194643a963

+ 5 - 0
pom.xml

@@ -103,6 +103,11 @@
             <artifactId>jsoup</artifactId>
             <version>1.14.3</version>
         </dependency>
+        <dependency>
+            <groupId>com.microsoft.playwright</groupId>
+            <artifactId>playwright</artifactId>
+            <version>1.49.0</version>
+        </dependency>
 
         <dependency>
             <groupId>org.springframework.boot</groupId>

+ 20 - 17
src/main/java/top/lvzhiqiang/job/JavJob.java

@@ -1,15 +1,16 @@
 package top.lvzhiqiang.job;
 
-import com.xxl.job.core.biz.model.ReturnT;
+import javax.annotation.Resource;
+
+import org.springframework.stereotype.Component;
+
 import com.xxl.job.core.context.XxlJobHelper;
 import com.xxl.job.core.handler.annotation.XxlJob;
+
 import lombok.extern.slf4j.Slf4j;
-import org.springframework.stereotype.Component;
 import top.lvzhiqiang.service.Crawler4JavbusService;
 import top.lvzhiqiang.service.Crawler4JavdbService;
 
-import javax.annotation.Resource;
-
 /**
  * javdb更新任务
  *
@@ -34,16 +35,17 @@ public class JavJob {
      * 2025/10/15 17:58
      */
     @XxlJob("monitorJavdbFavoritesJobHandler")
-    public ReturnT<String> monitorJavdbFavoritesJobHandler() {
+    public void monitorJavdbFavoritesJobHandler() {
         try {
             crawler4JavdbService.monitorJavdbFavorites();
+
+            XxlJobHelper.handleSuccess();
         } catch (Exception e) {
             log.error("monitorJavdbFavoritesJobHandler exception", e);
             XxlJobHelper.log(e);
-            return ReturnT.FAIL;
-        }
 
-        return ReturnT.SUCCESS;
+            XxlJobHelper.handleFail("monitorJavdbFavoritesJobHandler异常: " + e.getMessage());
+        }
     }
 
     /**
@@ -54,16 +56,17 @@ public class JavJob {
      * 2025/10/15 17:58
      */
     @XxlJob("monitorJavdbActorsJobHandler")
-    public ReturnT<String> monitorJavdbActorsJobHandler() {
+    public void monitorJavdbActorsJobHandler() {
         try {
             crawler4JavdbService.monitorJavdbActors();
+
+            XxlJobHelper.handleSuccess();
         } catch (Exception e) {
             log.error("monitorJavdbActorsJobHandler exception", e);
             XxlJobHelper.log(e);
-            return ReturnT.FAIL;
-        }
 
-        return ReturnT.SUCCESS;
+            XxlJobHelper.handleFail("monitorJavdbActorsJobHandler异常: " + e.getMessage());
+        }
     }
 
     /**
@@ -75,16 +78,16 @@ public class JavJob {
      * 2025/10/19 17:14
      */
     @XxlJob("checkJavbusVideoSiteJobHandler")
-    public ReturnT<String> checkJavbusVideoSiteJobHandler() {
+    public void checkJavbusVideoSiteJobHandler() {
         try {
             crawler4JavbusService.checkJavbusVideoSite();
+
+            XxlJobHelper.handleSuccess();
         } catch (Exception e) {
             log.error("checkJavbusVideoSiteJobHandler exception", e);
             XxlJobHelper.log(e);
-            return ReturnT.FAIL;
-        }
 
-        return ReturnT.SUCCESS;
+            XxlJobHelper.handleFail("checkJavbusVideoSiteJobHandler异常: " + e.getMessage());
+        }
     }
-
 }

+ 4 - 0
src/main/java/top/lvzhiqiang/mapper/DicCodeMapper.java

@@ -47,6 +47,10 @@ public interface DicCodeMapper {
     @MapKey("codeKey")
     Map<String, DicCode> findAllMap();
 
+    @Select("SELECT * FROM dic_code WHERE delete_flag = 1 AND env like concat('%',#{env},'%')")
+    @MapKey("codeKey")
+    Map<String, DicCode> findAllMapByEnv(String env);
+
     /**
      * 根据codeDesc模糊查询
      */

+ 31 - 0
src/main/java/top/lvzhiqiang/service/ScraperService.java

@@ -0,0 +1,31 @@
+package top.lvzhiqiang.service;
+
+import org.jsoup.nodes.Document;
+
+/**
+ * 通用接口,以后你要加 JavBus、JavLibrary,直接扩展这个接口即可
+ *
+ * @author: lvzhiqiang
+ * @date: 2026/2/11 13:54
+ */
+public interface ScraperService {
+
+    /**
+     * 获取支持的类型,如 javdb
+     *
+     * @return: java.lang.String
+     * @author: lvzhiqiang
+     * @date: 2026/2/11 15:45
+     */
+    String getServiceType();
+
+    /**
+     * 【核心方法】获取页面源码 (带 Session 检查机制)
+     * 所有的业务抓取都要调用这个方法,不要直接用 page.navigate
+     *
+     * @return: org.jsoup.nodes.Document
+     * @author: lvzhiqiang
+     * @date: 2026/2/11 15:45
+     */
+    Document getPageDocumentSecurely(String targetUrl);
+}

+ 307 - 0
src/main/java/top/lvzhiqiang/service/impl/AbstractPlaywrightService.java

@@ -0,0 +1,307 @@
+package top.lvzhiqiang.service.impl;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.InetSocketAddress;
+import java.net.Proxy;
+import java.net.URL;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Base64;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.PostConstruct;
+import javax.annotation.PreDestroy;
+import javax.annotation.Resource;
+
+import org.jsoup.Connection;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+
+import com.alibaba.fastjson.JSONObject;
+import com.microsoft.playwright.Browser;
+import com.microsoft.playwright.BrowserContext;
+import com.microsoft.playwright.BrowserType;
+import com.microsoft.playwright.Page;
+import com.microsoft.playwright.Playwright;
+
+import lombok.extern.slf4j.Slf4j;
+import top.lvzhiqiang.entity.DicCode;
+import top.lvzhiqiang.exception.BusinessException;
+import top.lvzhiqiang.mapper.DicCodeMapper;
+import top.lvzhiqiang.service.ScraperService;
+import top.lvzhiqiang.util.JsoupUtil;
+
+/**
+ * 抽象基类:负责 Playwright 的生命周期管理、防反爬配置、Session 自动维护
+ *
+ * @author: lvzhiqiang
+ * @date: 2026/2/11 13:54
+ */
+@Slf4j
+@Component
+public abstract class AbstractPlaywrightService implements ScraperService {
+    // Playwright 对象池 (复用)
+    protected Playwright playwright;
+    protected Browser browser;
+    protected BrowserContext context;
+    protected Page page;
+    protected Proxy proxy;
+    protected Map<String, DicCode> codeConstantMap;
+
+    @Value("${spring.profiles.active}")
+    private String env;
+
+    @Resource
+    private DicCodeMapper dicCodeMapper;
+
+    /**
+     * 由子类提供:登录页 URL
+     * 
+     * @return
+     */
+    protected abstract String getLoginUrl();
+
+    protected abstract String getPageDocumentSelector();
+
+    /**
+     * 由子类提供:具体的登录动作 (填表、验证码)
+     * 
+     * @return
+     * @throws Exception
+     */
+    protected abstract boolean doLoginAction() throws Exception;
+
+    /**
+     * 由子类提供:判断当前页面是否已登录 (例如检查右上角有没有头像)
+     * 
+     * @return
+     */
+    protected abstract boolean isLoginSuccess();
+
+    /**
+     * Spring 启动时初始化 Playwright 浏览器实例 (或者使用懒加载)
+     * // 只要服务没挂,bean 还在,browser 就在
+     * // 如果服务重启了,bean 重新初始化,browser 也是新的
+     * // 第一次调用 getPageDocumentSecurely 时,
+     * // 检测到 URL 跳转到了 /login (因为新浏览器没 cookie),会触发 doLoginAction
+     * // 登录成功后,继续抓取。
+     * 
+     */
+    @PostConstruct
+    public void init() {
+        // 1. 初始化 Playwright
+        log.info("正在初始化 Playwright 浏览器内核...");
+        playwright = Playwright.create();
+
+        // 智能判断环境:如果有图形界面或者是开发环境,开启有头模式调试;否则无头模式
+        // 这里简单粗暴一点,生产环境通常没有 DISPLAY 变量,或者你可以通过配置文件传入
+        boolean isDev = "dev".equals(env);
+
+        // 准备启动参数(服务器防崩溃三件套)
+        List<String> launchArgs = Arrays.asList(
+                "--no-sandbox", // 允许 root 用户运行(CentOS 默认是用 root 的多)
+                "--disable-setuid-sandbox", // 禁用 setuid 沙箱,防止某些内核安全机制拦截
+                "--disable-dev-shm-usage", // 【关键】防止内存不足导致崩溃。Linux 默认的 /dev/shm(共享内存)非常小(64MB),Chrome 这种吃内存大户一旦页面复杂点(比如
+                // JavDB 的图片多),就会因为共享内存不足而崩溃(Crashpad 错误)。加上这个参数,它就会使用 /tmp 目录,空间管够。
+                "--disable-gpu", // 服务器没显卡,关掉省资源
+                "--disable-blink-features=AutomationControlled" // 去除自动化特征
+        );
+
+        // 2. 启动浏览器
+        log.info("正在启动浏览器 (模式: {} )...", isDev ? "有头调试" : "无头生产");
+        browser = playwright.chromium().launch(new BrowserType.LaunchOptions()
+                .setHeadless(!isDev) // 生产环境 true (无头),开发环境 false (有头)
+                .setChannel("chrome") // 尽量使用本机 Chrome,抗指纹能力更强
+                .setArgs(launchArgs) // 使用 Arrays.asList 生成的参数
+        );
+
+        // 3. 创建上下文 (设置 UserAgent 和 视窗大小,伪装成普通 Mac 用户)
+        context = browser.newContext(new Browser.NewContextOptions()
+                .setUserAgent(JsoupUtil.getUserAgent())
+                .setViewportSize(1920, 1080)); // 防止响应式布局陷阱+反爬指纹
+
+        // 增加全局默认超时时间为 60秒,防止 CF 盾加载慢
+        context.setDefaultTimeout(60000);
+        page = context.newPage();
+
+        // 4. 设置代理模式
+        if ("dev".equals(env)) {
+            proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 7897));
+        } else {
+            proxy = Proxy.NO_PROXY;
+        }
+
+        // 5. 其他字典参数
+        codeConstantMap = dicCodeMapper.findAllMapByEnv(env);
+
+        log.info("浏览器初始化完成。");
+    }
+
+    @PreDestroy
+    public void cleanup() {
+        if (browser != null)
+            browser.close();
+        if (playwright != null)
+            playwright.close();
+    }
+
+    /**
+     * 核心登录方法(带重试机制),返回是否成功
+     */
+    public boolean login() {
+        int maxRetries = 3;
+        for (int i = 0; i < maxRetries; i++) {
+            try {
+                // 先去登录页
+                page.navigate(getLoginUrl());
+                log.info("尝试登录{}... 第 {} 次", getLoginUrl(), (i + 1));
+                // 执行子类的动作
+                if (doLoginAction()) {
+                    return true;
+                }
+            } catch (Exception e) {
+                log.error("登录超时,可能原因:验证码错误或账号问题。当前页面 URL: {}", page.url(), e);
+
+                // 截图保留现场,方便排查
+                try {
+                    page.screenshot(new Page.ScreenshotOptions().setPath(Paths.get("login_error_" + i + ".png")));
+                } catch (Exception ignored) {
+                }
+            }
+            // 失败后刷新页面重试
+            page.reload();
+        }
+        return false;
+    }
+
+    /**
+     * 【核心方法】获取页面源码 (带 Session 检查机制)
+     * 所有的业务抓取都要调用这个方法,不要直接用 page.navigate
+     */
+    @Override
+    public Document getPageDocumentSecurely(String targetUrl) {
+        log.info("准备访问: {}", targetUrl);
+
+        // 1. 发起访问
+        page.navigate(targetUrl);
+
+        // 2. 检查:我是不是被重定向到登录页了? 或者 Cookie 过期了?
+        // 这里只是一个简单的判断示例,根据 URL 或页面特征判断
+        if (page.url().contains("/login") || !isLoginSuccess()) {
+            log.warn("检测到 Session 过期或未登录,触发自动登录流程...");
+
+            // 执行登录
+            if (login()) {
+                log.info("登录成功,重新访问目标页面...{}", targetUrl);
+                // 登录后,带上新的 Cookie 重新访问刚才失败的链接
+                page.navigate(targetUrl);
+            } else {
+                throw new BusinessException("自动登录失败,无法抓取: " + targetUrl);
+            }
+        }
+
+        // 3. 等待内容加载 (子类可以通过 override 调整选择器)
+        try {
+            page.waitForSelector(getPageDocumentSelector(), new Page.WaitForSelectorOptions().setTimeout(30000));
+        } catch (Exception e) {
+            log.error("页面加载超时或核心元素未找到: {}", e.getMessage());
+        }
+
+        // 4. 返回 Jsoup 文档
+        // 把字符串喂给 Jsoup,而不是让 Jsoup 去联网
+        return Jsoup.parse(page.content());
+    }
+
+    protected String getCode(String captchaImgUrl) throws Exception {
+        String ocrAccurateBasicUrl = codeConstantMap.get("bd_ocr_url").getCodeValue();
+        String accessToken = getAuth( codeConstantMap.get("bd_ak").getCodeValue(), codeConstantMap.get("bd_sk").getCodeValue());
+        Map<String, String> headerParams = new HashMap<>();
+        Map<String, String> params = new HashMap<>();
+        headerParams.put("Content-Type", "application/x-www-form-urlencoded");
+
+        Connection.Response imgResponse = JsoupUtil.requestBody(captchaImgUrl, JsoupUtil.HTTP_GET, proxy, null,
+                headerParams, null);
+        byte[] imgBytes = imgResponse.bodyAsBytes();
+        params.put("image", Base64.getEncoder().encodeToString(imgBytes));
+        Connection.Response ocrResponse = JsoupUtil.requestBody(
+                ocrAccurateBasicUrl.concat("?access_token=").concat(accessToken),
+                JsoupUtil.HTTP_POST, Proxy.NO_PROXY, headerParams, params);
+        JSONObject crAccurateBasicResult = JSONObject.parseObject(ocrResponse.body());
+        String seccodeverify = crAccurateBasicResult.getJSONArray("words_result").getJSONObject(0).getString("words");
+        return seccodeverify;
+    }
+
+    protected String getCode(byte[] captchaImgBytes) throws Exception {
+        String ocrAccurateBasicUrl = codeConstantMap.get("bd_ocr_url").getCodeValue();
+        String accessToken = getAuth(codeConstantMap.get("bd_ak").getCodeValue(), codeConstantMap.get("bd_sk").getCodeValue());
+        Map<String, String> headerParams = new HashMap<>();
+        Map<String, String> params = new HashMap<>();
+        headerParams.put("Content-Type", "application/x-www-form-urlencoded");
+
+        params.put("image", Base64.getEncoder().encodeToString(captchaImgBytes));
+        Connection.Response ocrResponse = JsoupUtil.requestBody(
+                ocrAccurateBasicUrl.concat("?access_token=").concat(accessToken),
+                JsoupUtil.HTTP_POST, Proxy.NO_PROXY, headerParams, params);
+        JSONObject crAccurateBasicResult = JSONObject.parseObject(ocrResponse.body());
+        String seccodeverify = crAccurateBasicResult.getJSONArray("words_result").getJSONObject(0).getString("words");
+        return seccodeverify;
+    }
+
+    /**
+     * 获取API访问token
+     * 该token有一定的有效期,需要自行管理,当失效时需重新获取.
+     *
+     * @param ak - 百度云官网获取的 API Key
+     * @param sk - 百度云官网获取的 Securet Key
+     * @return assess_token 示例:
+     *         "24.460da4889caad24cccdb1fea17221975.2592000.1491995545.282335-1234567"
+     */
+    private String getAuth(String ak, String sk) {
+        // 获取token地址
+        String authHost = codeConstantMap.get("bd_authhost_url").getCodeValue();
+        String getAccessTokenUrl = authHost
+                // 1. grant_type为固定参数
+                + "grant_type=client_credentials"
+                // 2. 官网获取的 API Key
+                + "&client_id=" + ak
+                // 3. 官网获取的 Secret Key
+                + "&client_secret=" + sk;
+        try {
+            URL realUrl = new URL(getAccessTokenUrl);
+            // 打开和URL之间的连接
+            HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
+            connection.setRequestMethod("GET");
+            connection.connect();
+            // 获取所有响应头字段
+            Map<String, List<String>> map = connection.getHeaderFields();
+            // 遍历所有的响应头字段
+            for (String key : map.keySet()) {
+                System.err.println(key + "--->" + map.get(key));
+            }
+            // 定义 BufferedReader输入流来读取URL的响应
+            BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+            String result = "";
+            String line;
+            while ((line = in.readLine()) != null) {
+                result += line;
+            }
+            /**
+             * 返回结果示例
+             */
+            System.err.println("result:" + result);
+            JSONObject jsonObject = JSONObject.parseObject(result);
+            String access_token = jsonObject.getString("access_token");
+            return access_token;
+        } catch (Exception e) {
+            System.err.print("获取token失败!");
+            e.printStackTrace(System.err);
+        }
+        return null;
+    }
+}

+ 20 - 3
src/main/java/top/lvzhiqiang/service/impl/Crawler4JavdbServiceImpl.java

@@ -18,6 +18,7 @@ import top.lvzhiqiang.mapper.IcodePoolMapper;
 import top.lvzhiqiang.mapper.VideoMonitorActorsMapper;
 import top.lvzhiqiang.service.BgService;
 import top.lvzhiqiang.service.Crawler4JavdbService;
+import top.lvzhiqiang.service.ScraperService;
 import top.lvzhiqiang.util.JsoupUtil;
 import top.lvzhiqiang.util.StringUtils;
 
@@ -47,6 +48,8 @@ public class Crawler4JavdbServiceImpl implements Crawler4JavdbService {
     private IcodePoolMapper icodePoolMapper;
     @Resource
     private BgService bgService;
+    @Resource
+    private ScraperServiceFactory scraperServiceFactory;
 
     @Value("${spring.profiles.active}")
     private String env;
@@ -96,9 +99,17 @@ public class Crawler4JavdbServiceImpl implements Crawler4JavdbService {
                 List<String> insertCodeList = new ArrayList<>();
                 List<String> codeList = videoMonitorActorsMapper.findAllLogListBySymbol(videoMonitorActors.getSymbol(), 1);
 
-                Map<String, String> headerMap = new HashMap<>();
-                headerMap.put("referer", videoMonitorActors.getHttpUrl());
-                document = JsoupUtil.requestDocument(videoMonitorActors.getHttpUrl(), JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
+                ScraperService scraperService = scraperServiceFactory.getScraperService(website);
+                try {
+                    document = scraperService.getPageDocumentSecurely(videoMonitorActors.getHttpUrl());
+                } catch (Exception e) {
+                    if (e.getMessage().contains("自动登录失败,无法抓取")) {
+                        XxlJobHelper.log("getPageDocumentSecurely exception,symbol={},actorsRemark={}", videoMonitorActors.getSymbol(), videoMonitorActors.getActorsRemark());
+                        return;
+                    }
+                    continue;
+                }
+
                 itembSelects = document.select("div.movie-list").select("div.item");
 
                 for (Element itembSelect : itembSelects) {
@@ -148,6 +159,12 @@ public class Crawler4JavdbServiceImpl implements Crawler4JavdbService {
                 XxlJobHelper.log("videoMonitorActors exception,symbol={},actorsRemark={}", videoMonitorActors.getSymbol(), videoMonitorActors.getActorsRemark());
                 XxlJobHelper.log(e);
             }
+
+            // 稍微休眠一下,防止过快被封 IP
+            try {
+                Thread.sleep(5000L);
+            } catch (InterruptedException e) {
+            }
         }
 
         XxlJobHelper.log("monitorActors结束==============================findSize={}", findSize);

+ 148 - 0
src/main/java/top/lvzhiqiang/service/impl/JavdbPlaywrightServiceImpl.java

@@ -0,0 +1,148 @@
+package top.lvzhiqiang.service.impl;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.springframework.stereotype.Service;
+
+import com.microsoft.playwright.Locator;
+import com.microsoft.playwright.Response;
+
+import lombok.extern.slf4j.Slf4j;
+
+/**
+ * 实现 JavDB 业务类
+ *
+ * @author: lvzhiqiang
+ * @date: 2026/2/11 13:54
+ */
+@Service
+@Slf4j
+public class JavdbPlaywrightServiceImpl extends AbstractPlaywrightService {
+
+    /**
+     * 获取支持的类型,如 javdb
+     *
+     * @return: java.lang.String
+     * @author: lvzhiqiang
+     * @date: 2026/2/11 15:45
+     */
+    @Override
+    public String getServiceType() {
+        return "javdb";
+    }
+
+    /**
+     * 登录页 URL
+     */
+    @Override
+    public String getLoginUrl() {
+        return "https://javdb.com/login";
+    }
+
+    @Override
+    public String getPageDocumentSelector() {
+        return ".movie-list";
+    }
+
+    /**
+     * 判断当前页面是否已登录 (例如检查右上角有没有头像)
+     */
+    @Override
+    protected boolean isLoginSuccess() {
+        // 判断标准:页面里没有 "登入" 按钮,或者有 "我的账户"
+        // 这里用 Jsoup 解析当前 page 内容来判断
+        Document doc = Jsoup.parse(page.content());
+        return doc.select("a[href='/users/profile']").size() > 0 || !doc.text().contains("登入");
+    }
+
+    /**
+     * 真正执行登录的核心方法,单次尝试登录,返回是否成功
+     * 
+     * @throws Exception
+     */
+    @Override
+    protected boolean doLoginAction() throws Exception {
+        // 1. 处理 18 岁弹窗 (如果存在)
+        try {
+            if (page.isVisible("text=是,我已滿18歲")) {
+                log.info("检测到18岁验证弹窗,正在点击...");
+                page.click("text=是,我已滿18歲");
+
+                // 强制休息 1 秒,等待弹窗动画完全消失,防止遮挡后续操作
+                page.waitForTimeout(1000);
+            }
+        } catch (Exception ignored) {
+            // 没弹窗就忽略,继续往下走:即使出错也不要中断流程
+            log.info("未检测到18岁验证弹窗或已自动跳过");
+        }
+
+        // 2. 等待表单加载
+        // 不需要单独写 setTimeout,它会自动使用 context.setDefaultTimeout(60000),60秒足够过
+        // Cloudflare的盾了
+        log.info("等待登录表单加载...");
+        page.waitForSelector("input[name='email']");
+        log.info("已绕过 Cloudflare,进入登录页!");
+
+        // 3. 网络嗅探验证码 (最稳方案)
+        // 定义一个等待器:等待 URL 包含 /rucaptcha/ 的图片响应
+        Locator captchaImg = page.locator("img[alt='圖形驗證碼']");
+        // 如果没有验证码图片(有时系统不弹验证码),直接跳过
+        if (captchaImg.isVisible()) {
+            log.info("检测到验证码图片,准备识别...");
+
+            Response captchaResponse = page.waitForResponse(
+                    res -> res.url().contains("/rucaptcha/") && res.status() == 200,
+                    () -> {
+                        // 点击图片触发刷新,从而捕获新的响应流:这样能确保我们捕获到的响应和当前页面是同步的
+                        captchaImg.click();
+                    });
+
+            byte[] imgBytes = captchaResponse.body();
+            log.info("成功捕获验证码图片,大小: {} bytes", imgBytes.length);
+
+            try {
+                Files.write(Paths.get("output.png"), imgBytes);
+                System.out.println("文件写入成功!");
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+
+            // 调用百度 OCR 接口
+            String captchaCode = getCode(imgBytes);
+            log.info("验证码识别完成,结果: {}", captchaCode);
+            page.fill("input[name='_rucaptcha']", captchaCode);
+        }
+
+        // 4. 填表提交
+        page.fill("input[name='email']", codeConstantMap.get("javdb_username").getCodeValue());
+        page.fill("input[name='password']", codeConstantMap.get("javdb_password").getCodeValue());
+        // 勾选保持登录
+        if (page.isVisible("input[name='remember']")) {
+            page.check("input[name='remember']");
+        }
+
+        page.click("input[name='commit']");
+
+        // 5. 验证登录结果,等待跳转
+        // 等待 URL 变更为非 login 或 出现"我的账户"
+        // 使用 try-catch 等待,如果超时说明登录可能失败(比如验证码错)
+        try {
+            page.waitForURL(url -> !url.contains("/login")&& !url.contains("sessions"),
+                    new com.microsoft.playwright.Page.WaitForURLOptions().setTimeout(15000));
+
+            log.info("登录成功!跳转到了: {}", page.url());
+            return true;
+        } catch (Exception e) {
+            // 如果进了这里,说明 15秒内没跳转。这通常意味着验证码错误。
+            // 此时你可以做一些补救措施,比如:
+            // 截图保存案发现场
+            // page.screenshot(...);
+            // 或者抛出自定义异常让外层循环去重试
+            throw e;
+        }
+    }
+}

+ 53 - 0
src/main/java/top/lvzhiqiang/service/impl/ScraperServiceFactory.java

@@ -0,0 +1,53 @@
+package top.lvzhiqiang.service.impl;
+
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.springframework.stereotype.Component;
+
+import top.lvzhiqiang.service.ScraperService;
+
+/**
+ * ScraperService策略工厂
+ *
+ * @author: lvzhiqiang
+ * @date: 2026/2/11 15:45
+ */
+@Component
+public class ScraperServiceFactory {
+
+    /**
+     * 核心 Map:Key=MYSQL, Value=MysqlHandler
+     */
+    private final Map<String, ScraperService> scraperServiceMap = new ConcurrentHashMap<>();
+
+    /**
+     * 构造函数注入:Spring 会自动把所有 ScraperService 的实现类塞进 list
+     *
+     * @param: scraperServiceList
+     * @author: lvzhiqiang
+     * @date: 2026/2/11 15:45
+     */
+    public ScraperServiceFactory(List<ScraperService> scraperServiceList) {
+        for (ScraperService scraperService : scraperServiceList) {
+            scraperServiceMap.put(scraperService.getServiceType(), scraperService);
+        }
+    }
+
+    /**
+     * 根据类型获取对应的处理器
+     *
+     * @param: serviceType
+     * @return: top.lvzhiqiang.service.ScraperService
+     * @author: lvzhiqiang
+     * @date: 2026/2/11 15:45
+     */
+    public ScraperService getScraperService(String serviceType) {
+        ScraperService scraperService = scraperServiceMap.get(serviceType);
+        if (scraperService == null) {
+            throw new IllegalArgumentException("未找到该类型的ScraperService: " + serviceType);
+        }
+        return scraperService;
+    }
+}

+ 1 - 1
src/main/java/top/lvzhiqiang/util/JsoupUtil.java

@@ -96,7 +96,7 @@ public class JsoupUtil {
         return requestBody(url, httpMethod, proxy, null, headers, data);
     }
 
-    private static String getUserAgent() {
+    public static String getUserAgent() {
         Random r = new Random();
         String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
                 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",

+ 9 - 0
src/test/java/top/lvzhiqiang/CrawlerLoveFootTest.java

@@ -5,6 +5,8 @@ import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
+
+import top.lvzhiqiang.service.Crawler4JavdbService;
 import top.lvzhiqiang.service.Crawler4LoveFootService;
 
 import javax.annotation.Resource;
@@ -26,9 +28,16 @@ public class CrawlerLoveFootTest {
 
     @Resource
     private Crawler4LoveFootService crawler4LoveFootService;
+    @Resource
+    private Crawler4JavdbService crawler4JavdbService;
 
     @Test
     public void testJsoupLoveFoot4feetpassion() throws Exception {
         crawler4LoveFootService.jsoupLoveFoot4feetpassion(null, 2, 2);
     }
+
+    @Test
+    public void testMonitorJavdbActorsJobHandler() throws Exception {
+        crawler4JavdbService.monitorJavdbActors();
+    }
 }