Selaa lähdekoodia

uddate:jsoupIcodePool支持javdb方式v1

tujidelv 2 vuotta sitten
vanhempi
commit
a239147d21

+ 2 - 2
src/main/java/top/lvzhiqiang/config/MyJobs.java

@@ -157,7 +157,7 @@ public class MyJobs {
     public void jsoupIcodePool4CrawingNo() {
         log.warn("jsoupIcodePool4CrawingNo开始==============================");
 
-        bgService.jsoupIcodePool(1, 2, 2);
+        bgService.jsoupIcodePool("javbus", 1, 2, 2);
     }
 
     /**
@@ -167,7 +167,7 @@ public class MyJobs {
     public void jsoupIcodePool4CrawingFail() {
         log.warn("jsoupIcodePool4CrawingFail开始==============================");
 
-        bgService.jsoupIcodePool(3, 2, 2);
+        bgService.jsoupIcodePool("javdb", 3, 2, 2);
     }
 
     /**

+ 8 - 5
src/main/java/top/lvzhiqiang/controller/BgController.java

@@ -95,12 +95,16 @@ public class BgController {
      */
     @RequestMapping("/single4IdentificationCode")
     @ResponseBody
-    public R single4IdentificationCode(String identificationCode) {
+    public R single4IdentificationCode(String identificationCode, String url) {
         if (StringUtils.isEmpty(identificationCode)) {
             throw new ParameterException("identificationCode为空!");
         }
 
-        bgService.single4IdentificationCode(identificationCode);
+        if (StringUtils.isNotEmpty(url) && !url.contains("javdb.com")) {
+            url = "https://javdb.com/v/" + url;
+        }
+
+        bgService.single4IdentificationCode(identificationCode, url);
         return R.ok();
     }
 
@@ -112,7 +116,7 @@ public class BgController {
      */
     @RequestMapping("/jsoupIcodePool")
     @ResponseBody
-    public R jsoupIcodePool(Integer status, Integer isDel, Integer ignoreRetryCount) {
+    public R jsoupIcodePool(String website, Integer status, Integer isDel, Integer ignoreRetryCount) {
         if (null == status) {
             status = 1;
         }
@@ -123,8 +127,7 @@ public class BgController {
             ignoreRetryCount = 2;
         }
 
-        bgService.jsoupIcodePool(status, isDel, ignoreRetryCount);
-        return R.ok();
+        return R.ok().data(bgService.jsoupIcodePool(website, status, isDel, ignoreRetryCount));
     }
 
     /**

+ 1 - 0
src/main/java/top/lvzhiqiang/entity/IcodePool.java

@@ -24,6 +24,7 @@ public class IcodePool implements Serializable {
      * 识别码
      */
     private String identificationCode;
+    private String url;
 
     /**
      * 状态(1:待爬取,2:爬取成功,3:爬取失败,4:主表已存在)

+ 9 - 5
src/main/java/top/lvzhiqiang/mapper/IcodePoolMapper.java

@@ -41,17 +41,17 @@ public interface IcodePoolMapper {
      *
      * @param identificationCode
      */
-    @Insert("INSERT INTO icode_pool(identification_code, create_time, modify_time) " +
-            "VALUES (#{identificationCode}, now(), now())")
-    int insert(String identificationCode);
+    @Insert("INSERT INTO icode_pool(identification_code, url, create_time, modify_time) " +
+            "VALUES (#{identificationCode}, #{url}, now(), now())")
+    int insert(String identificationCode, String url);
 
     /**
      * 根据状态查询识别码
      */
-    @Select("SELECT identification_code FROM icode_pool WHERE delete_flag = 1 and status = #{status} and retry_count < 3")
+    @Select("SELECT concat(identification_code, '|', url) FROM icode_pool WHERE delete_flag = 1 and status = #{status} and retry_count < 3")
     List<String> findIcodeByStatus(Integer status);
 
-    @Select("SELECT identification_code FROM icode_pool WHERE delete_flag = 1 and status = #{status}")
+    @Select("SELECT concat(identification_code, '|', url) FROM icode_pool WHERE delete_flag = 1 and status = #{status}")
     List<String> findIcodeByStatus4IgnoreRetryCount(Integer status);
 
     /**
@@ -60,6 +60,9 @@ public interface IcodePoolMapper {
     @Select("SELECT identification_code FROM icode_pool WHERE delete_flag = 1")
     List<String> findIcode();
 
+    @Select("SELECT identification_code FROM icode_pool WHERE identification_code = #{identificationCode}")
+    List<String> findIcodeByCode(String identificationCode);
+
     /**
      * 更新状态
      *
@@ -71,6 +74,7 @@ public interface IcodePoolMapper {
 
     /**
      * 根据识别码和状态查询
+     *
      * @return
      */
     @Select({"<script>" +

+ 3 - 2
src/main/java/top/lvzhiqiang/service/BgService.java

@@ -30,14 +30,14 @@ public interface BgService {
      *
      * @param status
      */
-    void jsoupIcodePool(Integer status, Integer isDel, Integer ignoreRetryCount);
+    String jsoupIcodePool(String website, Integer status, Integer isDel, Integer ignoreRetryCount);
 
     /**
      * 单个识别码
      *
      * @param identificationCode
      */
-    void single4IdentificationCode(String identificationCode);
+    void single4IdentificationCode(String identificationCode, String url);
 
     /**
      * findDicCode
@@ -110,6 +110,7 @@ public interface BgService {
      * 2022/5/5 18:21
      */
     void jsoupVideoInfo(Integer status, String url, String identificationCode);
+
     void jsoupVideoInfo4javdb(Integer status, String url, String identificationCode);
 
     /**

+ 132 - 27
src/main/java/top/lvzhiqiang/service/impl/BgServiceImpl.java

@@ -515,6 +515,7 @@ public class BgServiceImpl implements BgService {
         }
         log.warn("jsoupVideoInfo 统计:total={},success={}", icodeList.size(), j);
     }
+
     @Async
     @Override
     public void jsoupVideoInfo4javdb(Integer status, String url, String identificationCodeP) {
@@ -910,15 +911,15 @@ public class BgServiceImpl implements BgService {
      * @param identificationCode
      */
     @Override
-    public void single4IdentificationCode(String identificationCode) {
-        List<String> icodePoolList = icodePoolMapper.findIcode();
-
+    public void single4IdentificationCode(String identificationCode, String url) {
         identificationCode = identificationCode.trim().toUpperCase();
-        if (icodePoolList.contains(identificationCode)) {
+
+        List<String> icodePoolList = icodePoolMapper.findIcodeByCode(identificationCode);
+        if (icodePoolList.size() > 0) {
             throw new BusinessException(ResultCodeEnum.PARAM_DATA_EXIST_ERROR);
         }
 
-        icodePoolMapper.insert(identificationCode);
+        icodePoolMapper.insert(identificationCode, url);
     }
 
     /**
@@ -928,13 +929,12 @@ public class BgServiceImpl implements BgService {
      */
     @Async
     @Override
-    public void jsoupIcodePool(Integer status, Integer isDel, Integer ignoreRetryCount) {
-        log.warn("jsoupIcodePool 开始:status={},isDel={},ignoreRetryCount={}", status, isDel, ignoreRetryCount);
+    public String jsoupIcodePool(String website, Integer status, Integer isDel, Integer ignoreRetryCount) {
+        log.warn("jsoupIcodePool 开始:website={},status={},isDel={},ignoreRetryCount={}", website, status, isDel, ignoreRetryCount);
         if (isDel == 1) {
             videoInfoPoolMapper.deleteAll();
         }
 
-
         // 获取待抓取码列表
         List<String> icodePoolList;
         if (1 == ignoreRetryCount) {
@@ -945,13 +945,13 @@ public class BgServiceImpl implements BgService {
 
         if (icodePoolList.size() == 0) {
             log.warn("status={}的icodePoolList为空", status);
-            return;
+            return "size:0";
         }
         // 获取javbus防屏蔽地址
         List<String> javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
         if (javbusUrlList.size() == 0) {
             log.warn("javbusUrlList为空");
-            return;
+            return "javbusUrlList为空";
         }
         // 获取主表所有识别码
         List<String> allIcode = videoInfoMapper.findAllIcode();
@@ -966,15 +966,15 @@ public class BgServiceImpl implements BgService {
         int j = 0;
         for (int i = 0; i < icodePoolList.size(); i++) {
             long start = System.currentTimeMillis();
-            String identificationCode = icodePoolList.get(i);
+            String[] identificationCodeArr = icodePoolList.get(i).split("\\|");
 
             IcodePool icodePool = new IcodePool();
-            icodePool.setIdentificationCode(identificationCode);
-            if (allIcode.contains(identificationCode)) {
+            icodePool.setIdentificationCode(identificationCodeArr[0]);
+            if (allIcode.contains(identificationCodeArr[0])) {
                 icodePool.setStatus(4);
                 icodePool.setRetryCount(0);
                 icodePoolMapper.updateStatus(icodePool);
-                log.warn("jsoupIcodePool exists:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
+                log.warn("jsoupIcodePool exists:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCodeArr[0]);
                 continue;
             }
 
@@ -982,17 +982,27 @@ public class BgServiceImpl implements BgService {
             while (retryCount <= 3) {
                 javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size())));
                 try {
-                    document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCode))
-                            .timeout(50000)
-                            //.proxy()
-                            //.data()
-                            .ignoreContentType(true)
-                            .userAgent(getUserAgent())
-                            .header("referer", javbusUrl.concat("/").concat(identificationCode))
-                            .get();
-
                     videoInfoPool = new VideoInfoPool();
-                    long picTime = parseDocument(document, identificationCode, machiPath, videoInfoPool);
+                    long picTime = 0;
+
+                    if ("javbus".equalsIgnoreCase(website)) {
+                        document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCodeArr[0]))
+                                .timeout(50000)
+                                //.proxy()
+                                //.data()
+                                .ignoreContentType(true)
+                                .userAgent(getUserAgent())
+                                .header("referer", javbusUrl.concat("/").concat(identificationCodeArr[0]))
+                                .get();
+                        picTime = parseDocument(document, identificationCodeArr[0], machiPath, videoInfoPool);
+                    } else if ("javdb".equalsIgnoreCase(website) && StringUtils.isNotEmpty(identificationCodeArr[1])) {
+                        Map<String, String> headerMap = new HashMap<>();
+                        headerMap.put("referer", identificationCodeArr[1]);
+                        beforeProxy();
+                        document = JsoupUtil.requestDocument(identificationCodeArr[1], JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
+                        picTime = parseDocument4Javdb(document, identificationCodeArr[0], machiPath, videoInfoPool);
+                    }
+
                     if (videoInfoPool != null) {
                         icodePool.setStatus(2);
                         icodePool.setRetryCount(retryCount);
@@ -1002,24 +1012,25 @@ public class BgServiceImpl implements BgService {
                         j++;
                     }
 
-                    log.warn("jsoupIcodePool success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, identificationCode);
+                    log.warn("jsoupIcodePool success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, identificationCodeArr[0]);
                     break;
                 } catch (Exception e) {
                     ++retryCount;
 
                     if (retryCount < 4) {
-                        log.error("jsoupIcodePool error重试:i={},retryCount={},time={},identificationCode={},javbusUrl={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, javbusUrl, e);
+                        log.error("jsoupIcodePool error重试:i={},retryCount={},time={},identificationCode={},javbusUrl={}", i, retryCount, System.currentTimeMillis() - start, identificationCodeArr[0], javbusUrl, e);
                     } else if (retryCount == 4) {
                         icodePool.setStatus(3);
                         icodePool.setRetryCount(retryCount - 1);
                         icodePool.setFailureCause(e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200));
                         icodePoolMapper.updateStatus(icodePool);
-                        log.error("jsoupIcodePool error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode, e);
+                        log.error("jsoupIcodePool error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCodeArr[0], e);
                     }
                 }
             }
         }
         log.warn("jsoupIcodePool 统计:total={},success={}", icodePoolList.size(), j);
+        return "total=".concat(String.valueOf(icodePoolList.size())).concat(",success=").concat(String.valueOf(j));
     }
 
     private String getUserAgent() {
@@ -1150,6 +1161,99 @@ public class BgServiceImpl implements BgService {
         return end - start;
     }
 
+    private long parseDocument4Javdb(Document javdbCodeDocument, String identificationCode, String machiPath, VideoInfoPool videoInfoPool) throws Exception {
+        Elements container = javdbCodeDocument.select("section.section > div.container");
+        if (container.size() == 0) {
+            throw new BusinessException(30000, "番号无效!");
+        }
+
+        Elements videoDetail = container.select("div.video-detail");
+        // 名称
+        videoInfoPool.setName(videoDetail.select("h2.title").select("strong.current-title").text().trim());
+
+        Elements moviePanelInfos = videoDetail.select("nav.movie-panel-info");
+        Element pEle = moviePanelInfos.get(0);
+        // 识别码
+        String iCode = pEle.select("div:contains(番號)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
+        if (!identificationCode.equalsIgnoreCase(iCode)) {
+            throw new Exception("番号与站点不一致");
+        }
+        videoInfoPool.setIdentificationCode(iCode);
+        // 发行日期
+        String issueDate = pEle.select("div:contains(日期)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
+        videoInfoPool.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
+        // 长度
+        String length = pEle.select("div:contains(時長)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
+        videoInfoPool.setLength(length);
+        // 导演
+        Elements directorEles = pEle.select("div:contains(導演)").select("span.value");
+        if (directorEles.size() > 0) {
+            videoInfoPool.setDirector(directorEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
+        }
+        // 制作商
+        Elements markerEles = pEle.select("div:contains(片商)").select("span.value");
+        if (markerEles.size() > 0) {
+            videoInfoPool.setMaker(markerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
+        }
+        // 发行商
+        Elements issuerEles = pEle.select("div:contains(發行)").select("span.value");
+        if (issuerEles.size() > 0) {
+            videoInfoPool.setIssuer(issuerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
+        }
+        // 类别
+        Elements genresEles = pEle.select("div:contains(類別)").select("span.value");
+        if (genresEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = genresEles.first().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            videoInfoPool.setGenres(sb.toString());
+        }
+        // 演员
+        Elements castEles = pEle.select("div:contains(演員)").select("span.value");
+        if (castEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = castEles.first().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            videoInfoPool.setCast(sb.toString());
+        }
+        // 图片URL
+        String href = videoDetail.select("div.column-video-cover").select("a > img").first().attr("src");
+
+        long start = System.currentTimeMillis();
+        Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+
+        String fileName = issueDate.concat(" ").concat(videoInfoPool.getIdentificationCode()).concat(" ").concat(videoInfoPool.getName()).replace("?", "?");
+        byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
+        if (imgUrlBytes.length > 251) {
+            byte[] imgUrlDestBytes = new byte[251];
+            System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
+            fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
+        }
+        fileName = fileName.concat(".jpg");
+
+        String machiImgUrl = "码池/".concat(fileName);
+
+        saveFile(response.bodyStream(), machiPath.concat(machiImgUrl));
+        long end = System.currentTimeMillis();
+
+        videoInfoPool.setImgUrl(machiImgUrl);
+
+        videoInfoPool.setCreateTime(LocalDateTime.now());
+        videoInfoPool.setType(1);
+
+        return end - start;
+    }
+
     private void parseDocument4Uncensored(Document document, String identificationCode, String machiPath, VideoInfoUncensored videoInfoUncensored) throws Exception {
         Elements container = document.select("div.container");
         if (container.size() == 0) {
@@ -1358,6 +1462,7 @@ public class BgServiceImpl implements BgService {
 
         return picTime.toString();
     }
+
     private String parseDocument4QiBing4Javdb(Document javdbCodeDocument, String identificationCode, String qibingPath, VideoInfo videoInfo) throws Exception {
         Elements container = javdbCodeDocument.select("section.section > div.container");
         if (container.size() == 0) {

+ 19 - 3
src/main/resources/static/bg.html

@@ -147,6 +147,8 @@
         <form method="post" action="bg/single4IdentificationCode">
             <span>identificationCode</span>
             <input type="text" name="identificationCode" placeholder="识别码"/>
+            <span>url</span>
+            <input type="text" name="url" placeholder="javdb专用,url唯一码"/>
             <input type="submit" value="提交">
         </form>
     </div>
@@ -165,12 +167,26 @@
     <div style="margin-right:20px;">
         <span class="font">jsoupIcodePool</span>
         <form method="post" action="bg/jsoupIcodePool">
+            <span>website</span>
+            <select name="website" style="height: 21.43px;">
+                <option value="javbus">javbus</option>
+                <option value="javdb">javdb</option>
+            </select>
             <span>status</span>
-            <input type="text" name="status" placeholder="1:未爬取,3:爬取失败。默认是未爬取" style="width: 300px;"/>
+            <select name="status" style="height: 21.43px;">
+                <option value="1">未爬取</option>
+                <option value="3">爬取失败</option>
+            </select>
             <span>isDel</span>
-            <input type="text" name="isDel" placeholder="1:是,2:否。默认否"/>
+            <select name="isDel" style="height: 21.43px;">
+                <option value="2">否</option>
+                <option value="1">是</option>
+            </select>
             <span>ignoreRetryCount</span>
-            <input type="text" name="ignoreRetryCount" placeholder="1:是,2:否。默认否"/>
+            <select name="ignoreRetryCount" style="height: 21.43px;">
+                <option value="2">否</option>
+                <option value="1">是</option>
+            </select>
             <input type="submit" value="提交">
         </form>
     </div>