Prechádzať zdrojové kódy

add:骑兵数据jsoupVideoInfo

tujidelv 3 rokov pred
rodič
commit
5fceea8d51

+ 17 - 0
src/main/java/top/lvzhiqiang/controller/BgController.java

@@ -134,6 +134,23 @@ public class BgController {
     }
 
     /**
+     * Jsoup VideoInfo
+     *
+     * @author lvzhiqiang
+     * 2022/5/5 18:21
+     */
+    @RequestMapping("/jsoupVideoInfo")
+    @ResponseBody
+    public R jsoupVideoInfo(Integer status) {
+        if (null == status) {
+            status = 1;
+        }
+
+        bgService.jsoupVideoInfo(status);
+        return R.ok();
+    }
+
+    /**
      * findDicCode
      *
      * @author lvzhiqiang

+ 5 - 0
src/main/java/top/lvzhiqiang/entity/VideoInfo.java

@@ -75,6 +75,11 @@ public class VideoInfo implements Serializable {
     private Integer type;
 
     /**
+     * 状态(1:待爬取,2:爬取成功,3:爬取失败)
+     */
+    private Integer status;
+
+    /**
      * 主体是谁
      */
     private String mainWho;

+ 20 - 0
src/main/java/top/lvzhiqiang/mapper/VideoInfoMapper.java

@@ -2,6 +2,7 @@ package top.lvzhiqiang.mapper;
 
 import org.apache.ibatis.annotations.*;
 import top.lvzhiqiang.entity.VideoInfo;
+import top.lvzhiqiang.entity.VideoInfoUncensored;
 
 import java.util.List;
 import java.util.Map;
@@ -59,6 +60,12 @@ public interface VideoInfoMapper {
     List<String> findAllIcode();
 
     /**
+     * 根据状态查询识别码
+     */
+    @Select("SELECT identification_code FROM video_info WHERE delete_flag = 1 and status = #{status}")
+    List<String> findIcodeByStatus(Integer status);
+
+    /**
      * 根据条件查询
      */
     List<VideoInfo> getVideoInfoList(Map<String, Object> params);
@@ -84,4 +91,17 @@ public interface VideoInfoMapper {
 
     @Update("update video_info set delete_flag = 2,modify_time = now() where identification_code = #{identificationCode}")
     void delByCode(String identificationCode);
+
+    @Update("update video_info set length = #{length}, director = #{director}, maker = #{maker}, issuer = #{issuer}, issue_date = #{issueDate}, status = #{status}, modify_time = now() where identification_code = #{identificationCode}")
+    void updateJsoupInfoByCode(VideoInfo videoInfo);
+
+    /**
+     * 更新状态
+     *
+     * @param identificationCode
+     * @param status
+     * @return
+     */
+    @Update("update video_info set status = #{status},modify_time = now() where identification_code = #{identificationCode}")
+    int updateStatus(String identificationCode, Integer status);
 }

+ 8 - 0
src/main/java/top/lvzhiqiang/service/BgService.java

@@ -92,4 +92,12 @@ public interface BgService {
      * 2022/5/4 19:27
      */
     void jsoupVideoInfoUncensored(Integer status);
+
+    /**
+     * Jsoup VideoInfo
+     *
+     * @author lvzhiqiang
+     * 2022/5/5 18:21
+     */
+    void jsoupVideoInfo(Integer status);
 }

+ 113 - 0
src/main/java/top/lvzhiqiang/service/impl/BgServiceImpl.java

@@ -261,6 +261,7 @@ public class BgServiceImpl implements BgService {
      * @author lvzhiqiang
      * 2022/5/4 19:27
      */
+    @Async
     @Override
     public void jsoupVideoInfoUncensored(Integer status) {
         log.info("jsoupVideoInfoUncensored 开始:status={}", status);
@@ -329,6 +330,77 @@ public class BgServiceImpl implements BgService {
     }
 
     /**
+     * Jsoup VideoInfo
+     *
+     * @author lvzhiqiang
+     * 2022/5/5 18:21
+     */
+    @Async
+    @Override
+    public void jsoupVideoInfo(Integer status) {
+        log.info("jsoupVideoInfo 开始:status={}", status);
+
+        // 获取待抓取码列表
+        List<String> icodeList = videoInfoMapper.findIcodeByStatus(status);
+        if (icodeList.size() == 0) {
+            log.warn("status={}的icodeList为空", status);
+            return;
+        }
+        // 获取javbus防屏蔽地址
+        List<String> javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
+        if (javbusUrlList.size() == 0) {
+            log.warn("javbusUrlList为空");
+            return;
+        }
+
+        // 遍历
+        Document document;
+        VideoInfo videoInfo;
+        String javbusUrl;
+        int j = 0;
+        for (int i = 0; i < icodeList.size(); i++) {
+            long start = System.currentTimeMillis();
+            String identificationCode = icodeList.get(i);
+
+            int retryCount = 0;
+            while (retryCount <= 3) {
+                javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size() - 0)));
+                try {
+                    document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCode))
+                            .timeout(50000)
+                            //.proxy()
+                            //.data()
+                            .ignoreContentType(true)
+                            .userAgent(getUserAgent())
+                            .header("referer", "https://www.javbus.com/".concat(identificationCode))
+                            .get();
+
+                    videoInfo = new VideoInfo();
+                    videoInfo.setIdentificationCode(identificationCode);
+                    parseDocument4QiBing(document, identificationCode, videoInfo);
+                    videoInfo.setStatus(2);
+                    videoInfoMapper.updateJsoupInfoByCode(videoInfo);
+
+                    j++;
+
+                    log.info("jsoupVideoInfo success:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
+                    break;
+                } catch (Exception e) {
+                    ++retryCount;
+
+                    if (retryCount < 4) {
+                        log.error("jsoupVideoInfo error重试:i={},retryCount={},time={},identificationCode={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, e);
+                    } else if (retryCount == 4) {
+                        videoInfoMapper.updateStatus(identificationCode, 3);
+                        log.error("jsoupVideoInfo error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode, e);
+                    }
+                }
+            }
+        }
+        log.info("jsoupVideoInfo 统计:total={},success={}", icodeList.size(), j);
+    }
+
+    /**
      * 初始化骑兵数据
      */
     @Override
@@ -699,6 +771,47 @@ public class BgServiceImpl implements BgService {
         }
     }
 
+    private void parseDocument4QiBing(Document document, String identificationCode, VideoInfo videoInfo) throws Exception {
+        Elements container = document.select("div.container");
+        if (container.size() == 0) {
+            throw new Exception("番号无效!");
+        }
+
+        Elements pEles = container.select("div.info > p");
+        // 识别码
+        Element pEle = pEles.get(0);
+        String iCode = pEle.select("span[style]").first().text();
+        if (!identificationCode.equalsIgnoreCase(iCode)) {
+            throw new Exception("番号与站点不一致");
+        }
+        // 发行日期
+        pEle = pEles.get(1);
+        String issueDate = pEle.text().split(":")[1].replace("\"", "").trim();
+        videoInfo.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
+        // 长度
+        pEle = pEles.get(2);
+        String length = pEle.text().split(":")[1].replace("\"", "").trim();
+        videoInfo.setLength(length);
+        // 导演
+        Elements directorEles = container.select("div.info").select("p:contains(導演)");
+        if (directorEles.size() > 0) {
+            pEle = directorEles.first().select("a[href]").first();
+            videoInfo.setDirector(pEle.text());
+        }
+        // 制作商
+        Elements markerEles = container.select("div.info").select("p:contains(製作商)");
+        if (markerEles.size() > 0) {
+            pEle = markerEles.first().select("a[href]").first();
+            videoInfo.setMaker(pEle.text());
+        }
+        // 发行商
+        Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
+        if (issuerEles.size() > 0) {
+            pEle = issuerEles.first().select("a[href]").first();
+            videoInfo.setIssuer(pEle.text());
+        }
+    }
+
     /**
      * 保存文件到本地
      *

+ 9 - 0
src/main/resources/static/bg.html

@@ -156,6 +156,15 @@
     </div>
     <br/>
     <div style="margin-right:20px;">
+        <span class="font">jsoupVideoInfo</span>
+        <form method="post" action="bg/jsoupVideoInfo">
+            <span>status</span>
+            <input type="text" name="status" placeholder="1:未爬取,3:爬取失败。默认是未爬取" style="width: 300px;"/>
+            <input type="submit" value="提交">
+        </form>
+    </div>
+    <br/>
+    <div style="margin-right:20px;">
         <span class="font">insertOrUpdateScoreOrComment</span>
         <form method="post" action="bg/insertOrUpdateScoreOrComment">
             <span>identificationCode</span>