Просмотр исходного кода

update:javbus站点防屏蔽地址抓取v2

tujidelv 3 лет назад
Родитель
Сommit
3bfea19548
2 измененных файлов с 50 добавлено и 5 удалено
  1. 30 5
      src/main/java/top/lvzhiqiang/config/MyJobs.java
  2. 20 0
      src/test/java/Test5.java

+ 30 - 5
src/main/java/top/lvzhiqiang/config/MyJobs.java

@@ -43,6 +43,7 @@ public class MyJobs {
     @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
     public void checkVideoSite() {
         log.info("checkVideoSite开始==============================");
+
         // 获取javbus官方地址
         DicCode dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 2 == x.getType() && "javbus".equals(x.getCodeKey())).findFirst().get();
         if (dicCode == null) {
@@ -50,23 +51,47 @@ public class MyJobs {
             return;
         }
 
+        // 获取javbusUrlList
+        List<String> javbusUrlList = videoSitePoolMapper.findUrlByType(1);
+
         // 获取javbusNewUrlList
         Set<String> javbusNewUrlList = new HashSet<>();
         try {
-            Document document = Jsoup.connect(dicCode.getCodeValue()).timeout(50000).ignoreContentType(true).get();
+            Document document = Jsoup.connect(dicCode.getCodeValue()).timeout(50000).ignoreContentType(true)
+                    .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
+                    .header("referer", "https://www.javbus.com/").get();
 
             Elements ahrefList = document.select("strong:contains(防屏蔽地址)").next("a");
             for (Element element : ahrefList) {
                 String text = element.text();
-                log.info("javbus防屏蔽地址:{}", text);
+                log.info("Jsoup获取{}防屏蔽地址:{}", dicCode.getCodeValue(), text);
                 javbusNewUrlList.add(text);
             }
         } catch (Exception e) {
-            log.error("Jsoup抓取javbus防屏蔽地址异常", e);
+            log.error("Jsoup获取{}防屏蔽地址异常", dicCode.getCodeValue(), e);
         }
 
-        // 获取javbusUrlList
-        List<String> javbusUrlList = videoSitePoolMapper.findUrlByType(1);
+        if (javbusNewUrlList.size() == 0 && javbusUrlList.size() > 0) {
+            for (String javbusUrl : javbusUrlList) {
+                try {
+                    Document document = Jsoup.connect(javbusUrl).timeout(50000).ignoreContentType(true)
+                            .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
+                            .header("referer", "https://www.javbus.com/").get();
+
+                    Elements ahrefList = document.select("strong:contains(防屏蔽地址)").next("a");
+                    for (Element element : ahrefList) {
+                        String text = element.text();
+                        log.info("Jsoup获取{}防屏蔽地址:{}", javbusUrl, text);
+                        javbusNewUrlList.add(text);
+                    }
+                    if (javbusNewUrlList.size() > 0) {
+                        break;
+                    }
+                } catch (Exception e) {
+                    log.error("Jsoup获取{}防屏蔽地址异常", javbusUrl, e);
+                }
+            }
+        }
 
         if (javbusNewUrlList.size() == 0) {
             log.warn("javbusNewUrlList为空");

+ 20 - 0
src/test/java/Test5.java

@@ -0,0 +1,20 @@
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+
+public class Test5 {
+    public static void main(String[] args) throws IOException {
+        Document document = Jsoup.connect("https://www.javsee.men").timeout(50000).ignoreContentType(true)
+                .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
+                .header("referer", "https://www.javbus.com/").get();
+
+        Elements ahrefList = document.select("strong:contains(防屏蔽地址)").next("a");
+        for (Element element : ahrefList) {
+            String text = element.text();
+            System.out.println(text);
+        }
+    }
+}