Explorar o código

uddate:jsoupVideoInfo增加javdb数据获取v1

tujidelv %!s(int64=2) %!d(string=hai) anos
pai
achega
ed1f5eb7aa

+ 19 - 2
src/main/java/top/lvzhiqiang/controller/BgController.java

@@ -152,12 +152,29 @@ public class BgController {
      */
     @RequestMapping("/jsoupVideoInfo")
     @ResponseBody
-    public R jsoupVideoInfo(Integer status, String website, String url, String identificationCode) {
+    public R jsoupVideoInfo(Integer status, String url, String identificationCode) {
         if (null == status) {
             status = 1;
         }
 
-        bgService.jsoupVideoInfo(status, website, url, identificationCode);
+        bgService.jsoupVideoInfo(status, url, identificationCode);
+        return R.ok();
+    }
+
+    @RequestMapping("/jsoupVideoInfo4javdb")
+    @ResponseBody
+    public R jsoupVideoInfo4javdb(Integer status, String url, String identificationCode) {
+        if (null == status) {
+            status = 1;
+        }
+
+        if (StringUtils.isEmpty(url) || StringUtils.isEmpty(identificationCode)) {
+            throw new ParameterException("urlidentificationCode或者为空!");
+        } else if (!url.contains("javdb.com")) {
+            url = "https://javdb.com/v/" + url;
+        }
+
+        bgService.jsoupVideoInfo4javdb(status, url, identificationCode);
         return R.ok();
     }
 

+ 2 - 1
src/main/java/top/lvzhiqiang/service/BgService.java

@@ -109,7 +109,8 @@ public interface BgService {
      * @author lvzhiqiang
      * 2022/5/5 18:21
      */
-    void jsoupVideoInfo(Integer status, String website, String url, String identificationCode);
+    void jsoupVideoInfo(Integer status, String url, String identificationCode);
+    void jsoupVideoInfo4javdb(Integer status, String url, String identificationCode);
 
     /**
      * 删除影片

+ 208 - 8
src/main/java/top/lvzhiqiang/service/impl/BgServiceImpl.java

@@ -87,6 +87,16 @@ public class BgServiceImpl implements BgService {
 
     private Proxy proxy = null;
 
+    public void beforeProxy() {
+        if (null == proxy) {
+            if ("dev".equals(env)) {
+                proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress("127.0.0.1", 1080));
+            } else {
+                proxy = Proxy.NO_PROXY;
+            }
+        }
+    }
+
     /**
      * findDicCode
      *
@@ -421,7 +431,7 @@ public class BgServiceImpl implements BgService {
      */
     @Async
     @Override
-    public void jsoupVideoInfo(Integer status, String website, String url, String identificationCodeP) {
+    public void jsoupVideoInfo(Integer status, String url, String identificationCodeP) {
         log.warn("jsoupVideoInfo 开始:status={}", status);
 
         // 获取待抓取码列表
@@ -448,13 +458,7 @@ public class BgServiceImpl implements BgService {
             javbusUrlList.add(url);
         }
 
-        if (null == proxy) {
-            if ("dev".equals(env)) {
-                proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1080));
-            } else {
-                proxy = Proxy.NO_PROXY;
-            }
-        }
+        beforeProxy();
 
         // 获取图片保存路径
         String qibingPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && env.equals(x.getEnv()) && "apics_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
@@ -511,6 +515,72 @@ public class BgServiceImpl implements BgService {
         }
         log.warn("jsoupVideoInfo 统计:total={},success={}", icodeList.size(), j);
     }
+    @Async
+    @Override
+    public void jsoupVideoInfo4javdb(Integer status, String url, String identificationCodeP) {
+        log.warn("jsoupVideoInfo4javdb 开始:status={}", status);
+
+        // 获取待抓取码列表
+        List<String> icodeList = new ArrayList<>();
+        if (StringUtils.isEmpty(identificationCodeP)) {
+            icodeList = videoInfoMapper.findIcodeByStatus(status);
+            if (icodeList.size() == 0) {
+                log.warn("status={}的icodeList为空", status);
+                return;
+            }
+        } else {
+            icodeList.add(identificationCodeP);
+        }
+
+        beforeProxy();
+
+        // 获取图片保存路径
+        String qibingPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && env.equals(x.getEnv()) && "apics_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
+
+        // 遍历
+        Document document;
+        VideoInfo videoInfo;
+        int j = 0;
+        for (int i = 0; i < icodeList.size(); i++) {
+            long start = System.currentTimeMillis();
+            String identificationCode = icodeList.get(i);
+
+            int retryCount = 0;
+            while (retryCount <= 3) {
+                try {
+                    Map<String, String> headerMap = new HashMap<>();
+                    headerMap.put("referer", url);
+                    document = JsoupUtil.requestDocument(url, JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
+
+                    videoInfo = new VideoInfo();
+                    videoInfo.setIdentificationCode(identificationCode);
+                    String picTime = parseDocument4QiBing4Javdb(document, identificationCode, qibingPath, videoInfo);
+                    videoInfo.setStatus(2);
+
+                    // 获取男优
+                    getMaleCast(videoInfo);
+
+                    videoInfoMapper.updateJsoupInfoByCode(videoInfo);
+                    videoInfoOtherMapper.insertOrUpdate(identificationCode, 2);
+
+                    j++;
+
+                    log.warn("jsoupVideoInfo4javdb success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, identificationCode);
+                    break;
+                } catch (Exception e) {
+                    ++retryCount;
+
+                    if (retryCount < 4) {
+                        log.error("jsoupVideoInfo4javdb error重试:i={},retryCount={},time={},identificationCode={},javbusUrl={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, url, e);
+                    } else if (retryCount == 4) {
+                        videoInfoMapper.updateStatus(identificationCode, 3);
+                        log.error("jsoupVideoInfo4javdb error:i={},time={},identificationCode={},javbusUrl={}", i, System.currentTimeMillis() - start, identificationCode, url, e);
+                    }
+                }
+            }
+        }
+        log.warn("jsoupVideoInfo4javdb 统计:total={},success={}", icodeList.size(), j);
+    }
 
     @Override
     public void getMaleCast(VideoInfo videoInfo) {
@@ -1288,6 +1358,136 @@ public class BgServiceImpl implements BgService {
 
         return picTime.toString();
     }
+    private String parseDocument4QiBing4Javdb(Document javdbCodeDocument, String identificationCode, String qibingPath, VideoInfo videoInfo) throws Exception {
+        Elements container = javdbCodeDocument.select("section.section > div.container");
+        if (container.size() == 0) {
+            throw new BusinessException(30000, "番号无效!");
+        }
+
+        Elements videoDetail = container.select("div.video-detail");
+        // 名称
+        videoInfo.setName(videoDetail.select("h2.title").select("strong.current-title").text().trim());
+
+        Elements moviePanelInfos = videoDetail.select("nav.movie-panel-info");
+        Element pEle = moviePanelInfos.get(0);
+        // 识别码
+        String iCode = pEle.select("div:contains(番號)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
+        if (!identificationCode.equalsIgnoreCase(iCode)) {
+            throw new Exception("番号与站点不一致");
+        }
+        // 发行日期
+        String issueDate = pEle.select("div:contains(日期)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
+        videoInfo.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
+        // 长度
+        String length = pEle.select("div:contains(時長)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
+        videoInfo.setLength(length);
+        // 导演
+        Elements directorEles = pEle.select("div:contains(導演)").select("span.value");
+        if (directorEles.size() > 0) {
+            videoInfo.setDirector(directorEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
+        }
+        // 制作商
+        Elements markerEles = pEle.select("div:contains(片商)").select("span.value");
+        if (markerEles.size() > 0) {
+            videoInfo.setMaker(markerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
+        }
+        // 发行商
+        Elements issuerEles = pEle.select("div:contains(發行)").select("span.value");
+        if (issuerEles.size() > 0) {
+            videoInfo.setIssuer(issuerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
+        }
+        // 类别
+        Elements genresEles = pEle.select("div:contains(類別)").select("span.value");
+        if (genresEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = genresEles.first().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            videoInfo.setGenres(sb.toString());
+        }
+        // 演员
+        Elements castEles = pEle.select("div:contains(演員)").select("span.value");
+        if (castEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = castEles.first().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            videoInfo.setCast(sb.toString());
+        }
+
+        // 图片  骑兵步兵
+        String qibings = "骑兵步兵/".concat(iCode);
+        StringBuffer picTime = new StringBuffer("{cover:");
+
+        // 图片URL bigImage
+        String qibingCover = qibings.concat("/cover/");
+
+        String fileName = issueDate.concat(" ").concat(videoInfo.getIdentificationCode()).concat(" ").concat(videoInfo.getName()).replace("?", "?");
+        byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
+        if (imgUrlBytes.length > 251) {
+            byte[] imgUrlDestBytes = new byte[251];
+            System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
+            fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
+        }
+        fileName = fileName.concat(".jpg");
+
+        if (!new File(qibingPath.concat(qibingCover), fileName).exists()) {
+            Elements videoMetaPanel = videoDetail.select("div.column-video-cover");
+            String href = videoMetaPanel.select("a > img").first().attr("src");
+
+            long start = System.currentTimeMillis();
+            Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+
+            saveFile2(response.bodyStream(), qibingPath.concat(qibingCover), fileName);
+            picTime.append(System.currentTimeMillis() - start).append(",");
+        }
+
+        videoInfo.setImgUrl(qibingCover.concat(fileName));
+
+        // 图片URL img_gf
+        String qibingImgGF = qibings.concat("/img_gf/");
+
+        File qibingImgGFFile = new File(qibingPath.concat(qibingImgGF));
+        if (!qibingImgGFFile.exists() || qibingImgGFFile.listFiles().length == 0) {
+            Elements sampleBoxEles = videoDetail.select("div.preview-images").select("a.tile-item");
+            long start2 = System.currentTimeMillis();
+            if (sampleBoxEles.size() > 0) {
+                Connection.Response responseImg;
+
+                for (Element sampleBoxEle : sampleBoxEles) {
+                    String sampleBoxHref = sampleBoxEle.attr("href");
+                    if (!sampleBoxHref.contains("http")) {
+                        sampleBoxHref = sampleBoxEle.attr("abs:href");
+                    }
+
+                    try {
+                        responseImg = Jsoup.connect(sampleBoxHref).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+                    } catch (HttpStatusException e) {
+                        sampleBoxHref = sampleBoxEle.select("img").attr("src");
+                        if (!sampleBoxHref.contains("http")) {
+                            sampleBoxHref = sampleBoxEle.select("img").attr("abs:src");
+                        }
+                        responseImg = Jsoup.connect(sampleBoxHref).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+                    }
+                    String sampleBoxFileName = sampleBoxHref.substring(sampleBoxHref.lastIndexOf("/") + 1);
+                    saveFile2(responseImg.bodyStream(), qibingPath.concat(qibingImgGF), sampleBoxFileName);
+                }
+            } else {
+                log.error("jsoupVideoInfo img_gf null,identificationCode={}", identificationCode);
+            }
+            picTime.append("img_gf:").append(System.currentTimeMillis() - start2).append("}");
+        }
+
+        return picTime.toString();
+    }
 
     /**
      * 保存文件到本地2

+ 16 - 6
src/main/resources/static/bg.html

@@ -185,18 +185,13 @@
     </div>
     <br/>
     <div style="margin-right:20px;">
-        <span class="font">jsoupVideoInfo</span>
+        <span class="font">jsoupVideoInfo4JAVBUS</span>
         <form method="post" action="bg/jsoupVideoInfo">
             <span>status</span>
             <select name="status" style="height: 21.43px;">
                 <option value="1">未爬取</option>
                 <option value="3">爬取失败</option>
             </select>
-            <span>website</span>
-            <select name="website" style="height: 21.43px;">
-                <option value="javbus">javbus</option>
-                <option value="javdb">javdb</option>
-            </select>
             <span>url</span>
             <input type="text" name="url" placeholder="https://www.javbus.com"/>
             <span>identificationCode</span>
@@ -204,6 +199,21 @@
             <input type="submit" value="提交">
         </form>
     </div>
+    <div style="margin-right:20px;">
+        <span class="font">jsoupVideoInfo4JAVDB</span>
+        <form method="post" action="bg/jsoupVideoInfo4javdb">
+            <span>status</span>
+            <select name="status" style="height: 21.43px;">
+                <option value="1">未爬取</option>
+                <option value="3">爬取失败</option>
+            </select>
+            <span>url</span>
+            <input type="text" name="url" placeholder="https://javdb.com/v/?"/>
+            <span>identificationCode</span>
+            <input type="text" name="identificationCode"/>
+            <input type="submit" value="提交">
+        </form>
+    </div>
     <br/>
     <div style="margin-right:20px;">
         <span class="font">insertOrUpdateScoreOrComment</span>