|
|
@@ -1,42 +1,36 @@
|
|
|
package top.lvzhiqiang.service.impl;
|
|
|
|
|
|
-import java.io.BufferedReader;
|
|
|
-import java.io.InputStreamReader;
|
|
|
-import java.net.HttpURLConnection;
|
|
|
-import java.net.InetSocketAddress;
|
|
|
-import java.net.Proxy;
|
|
|
-import java.net.URL;
|
|
|
-import java.nio.file.Paths;
|
|
|
-import java.util.Arrays;
|
|
|
-import java.util.Base64;
|
|
|
-import java.util.HashMap;
|
|
|
-import java.util.List;
|
|
|
-import java.util.Map;
|
|
|
-
|
|
|
-import javax.annotation.PostConstruct;
|
|
|
-import javax.annotation.PreDestroy;
|
|
|
-import javax.annotation.Resource;
|
|
|
-
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.microsoft.playwright.*;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
import org.jsoup.Connection;
|
|
|
import org.jsoup.Jsoup;
|
|
|
import org.jsoup.nodes.Document;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
+import org.springframework.boot.system.ApplicationHome;
|
|
|
import org.springframework.stereotype.Component;
|
|
|
-
|
|
|
-import com.alibaba.fastjson.JSONObject;
|
|
|
-import com.microsoft.playwright.Browser;
|
|
|
-import com.microsoft.playwright.BrowserContext;
|
|
|
-import com.microsoft.playwright.BrowserType;
|
|
|
-import com.microsoft.playwright.Page;
|
|
|
-import com.microsoft.playwright.Playwright;
|
|
|
-
|
|
|
-import lombok.extern.slf4j.Slf4j;
|
|
|
import top.lvzhiqiang.entity.DicCode;
|
|
|
import top.lvzhiqiang.exception.BusinessException;
|
|
|
import top.lvzhiqiang.mapper.DicCodeMapper;
|
|
|
import top.lvzhiqiang.service.ScraperService;
|
|
|
import top.lvzhiqiang.util.JsoupUtil;
|
|
|
|
|
|
+import javax.annotation.PostConstruct;
|
|
|
+import javax.annotation.PreDestroy;
|
|
|
+import javax.annotation.Resource;
|
|
|
+import java.io.BufferedReader;
|
|
|
+import java.io.InputStreamReader;
|
|
|
+import java.net.HttpURLConnection;
|
|
|
+import java.net.InetSocketAddress;
|
|
|
+import java.net.Proxy;
|
|
|
+import java.net.URL;
|
|
|
+import java.nio.file.Files;
|
|
|
+import java.nio.file.Path;
|
|
|
+import java.nio.file.Paths;
|
|
|
+import java.time.LocalDateTime;
|
|
|
+import java.time.format.DateTimeFormatter;
|
|
|
+import java.util.*;
|
|
|
+
|
|
|
/**
|
|
|
* 抽象基类:负责 Playwright 的生命周期管理、防反爬配置、Session 自动维护
|
|
|
*
|
|
|
@@ -117,7 +111,11 @@ public abstract class AbstractPlaywrightService implements ScraperService {
|
|
|
log.info("正在启动浏览器 (模式: {} )...", isDev ? "有头调试" : "无头生产");
|
|
|
browser = playwright.chromium().launch(new BrowserType.LaunchOptions()
|
|
|
.setHeadless(!isDev) // 生产环境 true (无头),开发环境 false (有头)
|
|
|
- .setChannel("chrome") // 尽量使用本机 Chrome,抗指纹能力更强
|
|
|
+ //它的含义是: 告诉 Playwright:“不要用你自带的 Chromium,去 Linux 系统里找一个已经安装好的 Google Chrome 浏览器(通常在 /opt/google/chrome/chrome)来启动。”
|
|
|
+ //你的现状是: 你的 CentOS 服务器是裸机,并没有去官网下载安装 Google Chrome 的 RPM 包(而且在 CentOS 7 上装 Chrome 非常麻烦,全是依赖地狱)。
|
|
|
+ //我们需要让 Playwright 使用它自己下载的 Chromium(它自己带的浏览器是绿色免安装的,就在 ~/.cache/ms-playwright 目录下),而不是去调系统的 Chrome。
|
|
|
+ //删除 或 注释掉 .setChannel("chrome") 这一行。
|
|
|
+ //.setChannel("chrome") // 尽量使用本机 Chrome,抗指纹能力更强
|
|
|
.setArgs(launchArgs) // 使用 Arrays.asList 生成的参数
|
|
|
);
|
|
|
|
|
|
@@ -155,11 +153,11 @@ public abstract class AbstractPlaywrightService implements ScraperService {
|
|
|
* 核心登录方法(带重试机制),返回是否成功
|
|
|
*/
|
|
|
public boolean login() {
|
|
|
- int maxRetries = 3;
|
|
|
+ int maxRetries = 5;
|
|
|
+ // 先去登录页
|
|
|
+ page.navigate(getLoginUrl());
|
|
|
for (int i = 0; i < maxRetries; i++) {
|
|
|
try {
|
|
|
- // 先去登录页
|
|
|
- page.navigate(getLoginUrl());
|
|
|
log.info("尝试登录{}... 第 {} 次", getLoginUrl(), (i + 1));
|
|
|
// 执行子类的动作
|
|
|
if (doLoginAction()) {
|
|
|
@@ -170,7 +168,17 @@ public abstract class AbstractPlaywrightService implements ScraperService {
|
|
|
|
|
|
// 截图保留现场,方便排查
|
|
|
try {
|
|
|
- page.screenshot(new Page.ScreenshotOptions().setPath(Paths.get("login_error_" + i + ".png")));
|
|
|
+ // 1. 获取 JAR 包所在的绝对路径 (Spring Boot 专属神器)
|
|
|
+ String jarPath = new ApplicationHome(getClass()).getSource().getParent();
|
|
|
+ // 2. 构造子文件夹路径 (例如: /usr/program/jav/images)
|
|
|
+ Path dirPath = Paths.get(jarPath, "files/playwright"); // 假设子文件夹叫 images
|
|
|
+ // 3. 关键一步:如果文件夹不存在,必须先创建!
|
|
|
+ if (!Files.exists(dirPath)) Files.createDirectories(dirPath);
|
|
|
+ // 4. 拼接完整的文件路径
|
|
|
+ String timeStr = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
|
|
|
+ Path fullPath = dirPath.resolve("login_error_" + timeStr + "_" + i + ".png");
|
|
|
+
|
|
|
+ page.screenshot(new Page.ScreenshotOptions().setPath(fullPath));
|
|
|
} catch (Exception ignored) {
|
|
|
}
|
|
|
}
|