Переглянути джерело

add:jsoupVideoInfoInfantry4JAVDB v1

tujidelv 2 роки тому
батько
коміт
4bb2e2e266

+ 20 - 0
src/main/java/top/lvzhiqiang/controller/BgController.java

@@ -181,6 +181,26 @@ public class BgController {
         return R.ok();
     }
 
+    @RequestMapping("/jsoupVideoInfoInfantry4javdb")
+    @ResponseBody
+    public R jsoupVideoInfoInfantry4javdb(String type, Integer status, String url, String identificationCode) {
+        if (StringUtils.isEmpty(type)) {
+            throw new ParameterException("type不能为空!");
+        }
+        if (null == status) {
+            status = 1;
+        }
+
+        if (StringUtils.isNotEmpty(identificationCode) && StringUtils.isEmpty(url)) {
+            throw new ParameterException("url不能为空!");
+        } else if (StringUtils.isNotEmpty(url) && !url.contains("javdb.com")) {
+            url = "https://javdb.com/v/" + url;
+        }
+
+        bgService.jsoupVideoInfoInfantry4javdb(type, status, url, identificationCode);
+        return R.ok();
+    }
+
     /**
      * insertOrUpdateScoreOrComment
      *

+ 2 - 0
src/main/java/top/lvzhiqiang/entity/VideoInfo.java

@@ -147,4 +147,6 @@ public class VideoInfo implements Serializable {
     private String javdbUrl;
 
     private Integer videoInfoPoolType;
+
+    private String infantryType;
 }

+ 2 - 0
src/main/java/top/lvzhiqiang/entity/VideoInfoInfantry.java

@@ -112,4 +112,6 @@ public class VideoInfoInfantry implements Serializable {
 
     private String score;
     private String comment;
+
+    private String javdbUrl;
 }

+ 17 - 4
src/main/java/top/lvzhiqiang/mapper/VideoInfoInfantryMapper.java

@@ -59,6 +59,9 @@ public interface VideoInfoInfantryMapper {
     @Select("select distinct identification_code from video_info_infantry where type = #{infantryType}")
     List<String> findAllIcode(String infantryType);
 
+    @Select("select * from video_info_infantry where type = #{infantryType} and status = #{status} and delete_flag = 1")
+    List<VideoInfoInfantry> findInfoByTypeAndStatus(String infantryType, Integer status);
+
     /**
      * 根据条件查询
      */
@@ -74,24 +77,34 @@ public interface VideoInfoInfantryMapper {
             "<if test=\"identificationCode != null and identificationCode != ''\">" +
             "   and vi.identification_code like concat('%',#{identificationCode},'%')" +
             "</if>" +
-            "<if test=\"status != null and type != ''\">" +
+            "<if test=\"type != null and type != ''\">" +
+            "   and vi.type = #{type}" +
+            "</if>" +
+            "<if test=\"status != null and status != ''\">" +
             "   and vi.status = #{status}" +
             "</if>" +
             "<if test=\"order != null and order != ''\">" +
             "   order by vi.issue_date ${order}" +
             "</if>" +
             "</script>"})
-    List<VideoInfoUncensored> findByCodeAndType(String identificationCode, Integer status, String order);
+    List<VideoInfoInfantry> findByCodeAndType(String identificationCode, String type, Integer status, String order);
 
     @Update("update video_info_infantry set delete_flag = 2,modify_time = now() where identification_code = #{identificationCode}")
     void delByCode(String identificationCode);
 
-    @Update("update video_info_infantry set length = #{length}, director = #{director}, maker = #{maker}, issuer = #{issuer}, genres = #{genres}, cast = #{cast}, status = #{status}, modify_time = now() where identification_code = #{identificationCode}")
-    void updateJsoupInfoByCode(VideoInfoUncensored videoInfoUncensored);
+    @Update("update video_info_infantry set issue_date = #{issueDate},length = #{length}, director = #{director}, maker = #{maker}, issuer = #{issuer}, genres = #{genres}, cast = #{cast}, status = #{status}, img_url = #{imgUrl}, javdb_url = #{javdbUrl}, modify_time = now() where id = #{id}")
+    void updateJsoupInfoById(VideoInfoInfantry videoInfoInfantry);
 
     @Select("select main_who AS name, count(id) AS count from video_info_infantry where type = #{infantryType} group by main_who order by count(id) desc")
     List<VideoCast> findCast(String infantryType);
 
     @Select("select type AS name, count(id) AS count from video_info_infantry group by type order by count(id) desc")
     List<VideoGenres> findGenres();
+
+    @Select("select vi.id,vi.name,vi.identification_code,vi.issue_date,vi.length,vi.director,vi.maker,vi.issuer,vi.genres" +
+            ",vi.cast,concat('步兵/', vi.type,'/', vi.img_url) imgUrl,vi.javdb_url,vi.video_url,vi.main_who,vi.type AS infantryType, IFNULL(vio.score, 0) AS score, IFNULL(vio.comment, '暂无评论') AS comment, IFNULL(vio.comment_first, '暂无简介') AS commentFirst" +
+            "        from video_info_infantry vi" +
+            "        left join video_info_other vio on vi.identification_code = vio.identification_code and vio.delete_flag = 1" +
+            "        where vi.delete_flag = 1 and vi.identification_code=#{code}")
+    VideoInfo getVideoInfoDetail(String code);
 }

+ 2 - 0
src/main/java/top/lvzhiqiang/service/BgService.java

@@ -150,4 +150,6 @@ public interface BgService {
      * @param videoInfo
      */
     void getMaleCast(VideoInfo videoInfo);
+
+    void jsoupVideoInfoInfantry4javdb(String type, Integer status, String url, String identificationCode);
 }

+ 250 - 0
src/main/java/top/lvzhiqiang/service/impl/BgServiceImpl.java

@@ -583,6 +583,256 @@ public class BgServiceImpl implements BgService {
         log.warn("jsoupVideoInfo4javdb 统计:total={},success={}", icodeList.size(), j);
     }
 
+    @Async
+    @Override
+    public void jsoupVideoInfoInfantry4javdb(String type, Integer status, String url, String identificationCodeP) {
+        log.warn("jsoupVideoInfoInfantry4javdb 开始:status={}", status);
+
+        // 获取待抓取码列表
+        List<VideoInfoInfantry> videoInfoInfantryList;
+        if (StringUtils.isEmpty(identificationCodeP)) {
+            videoInfoInfantryList = videoInfoInfantryMapper.findInfoByTypeAndStatus(type, status);
+            if (videoInfoInfantryList.size() == 0) {
+                log.warn("status={}的videoInfoInfantryList为空", status);
+                return;
+            }
+        } else {
+            videoInfoInfantryList = videoInfoInfantryMapper.findByCodeAndType(identificationCodeP, type, status, null);
+            videoInfoInfantryList.forEach(e -> {
+                e.setJavdbUrl(url);
+            });
+        }
+
+        List<DicCode> dicCodeList = dicCodeMapper.findAll();
+        Map<String, String> javdbConstantMap = dicCodeList.stream()
+                .filter(x -> x.getType() != null && 2 == x.getType() && x.getEnv().contains(env))
+                .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
+
+
+        beforeProxy();
+
+        // 获取图片保存路径
+        String infantryPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && env.equals(x.getEnv()) && "apics_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
+
+        // 遍历
+        Document document;
+        Document javdbSearchDocument;
+        int j = 0;
+        for (int i = 0; i < videoInfoInfantryList.size(); i++) {
+            long start = System.currentTimeMillis();
+            VideoInfoInfantry videoInfoInfantry = videoInfoInfantryList.get(i);
+
+            int retryCount = 0;
+            while (retryCount <= 3) {
+                try {
+                    Thread.sleep(3000);
+
+                    String picTime;
+                    Elements itembSelects;
+                    if (StringUtils.isNotEmpty(videoInfoInfantry.getJavdbUrl())) {
+                        Map<String, String> headerMap = new HashMap<>();
+                        headerMap.put("referer", url);
+                        document = JsoupUtil.requestDocument(url, JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
+                    } else {
+                        String searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(videoInfoInfantry.getIdentificationCode()).concat("&f=all");
+                        Map<String, String> header3Map = new HashMap<>();
+                        header3Map.put("referer", searchUrl);
+
+                        javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
+
+                        itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
+                        if (itembSelects.size() == 0) {
+                            String newName = videoInfoInfantry.getName();
+                            searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(newName).concat("&f=all");
+                            javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
+                            itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
+                        }
+
+                        if (itembSelects.size() == 0) {
+                            throw new BusinessException(30000, "javdb search result null");
+                        }
+
+                        String title;
+                        String code;
+                        String codeUrl = null;
+                        for (Element itembSelect : itembSelects) {
+                            title = itembSelect.select("a.box").get(0).attr("title");
+                            code = itembSelect.select("a.box").get(0).select("div.video-title").select("strong").text();
+                            if (code.equalsIgnoreCase(videoInfoInfantry.getIdentificationCode())) {
+                                codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                break;
+                            }
+
+                            String newName = videoInfoInfantry.getName().replace("●", "さ");
+                            if (title.contains(newName)) {
+                                codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                videoInfoInfantry.setName(newName);
+                                break;
+                            }
+                        }
+                        if (StringUtils.isEmpty(codeUrl)) {
+                            throw new BusinessException(30000, "javdb search result mismatch");
+                        }
+
+                        document = JsoupUtil.requestDocument(codeUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
+                        videoInfoInfantry.setJavdbUrl(codeUrl);
+                    }
+
+                    picTime = parseDocument4Infantry4Javdb(document, infantryPath, videoInfoInfantry);
+                    videoInfoInfantry.setStatus(2);
+                    videoInfoInfantryMapper.updateJsoupInfoById(videoInfoInfantry);
+                    videoInfoOtherMapper.insertOrUpdate(videoInfoInfantry.getIdentificationCode(), 2);
+
+                    j++;
+
+                    log.warn("jsoupVideoInfoInfantry4javdb success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, videoInfoInfantry.getIdentificationCode());
+                    break;
+                } catch (Exception e) {
+                    ++retryCount;
+
+                    if (retryCount < 4) {
+                        log.error("jsoupVideoInfoInfantry4javdb error重试:i={},retryCount={},time={},identificationCode={},javbusUrl={}", i, retryCount, System.currentTimeMillis() - start, videoInfoInfantry.getIdentificationCode(), url, e);
+                    } else if (retryCount == 4) {
+                        videoInfoInfantryMapper.updateStatus(videoInfoInfantry.getIdentificationCode(), 3);
+                        log.error("jsoupVideoInfoInfantry4javdb error:i={},time={},identificationCode={},javbusUrl={}", i, System.currentTimeMillis() - start, videoInfoInfantry.getIdentificationCode(), url, e);
+                    }
+                }
+            }
+        }
+        log.warn("jsoupVideoInfoInfantry4javdb 统计:total={},success={}", videoInfoInfantryList.size(), j);
+    }
+
+    private String parseDocument4Infantry4Javdb(Document document, String infantryPath, VideoInfoInfantry videoInfoInfantry) throws Exception {
+        Elements container = document.select("section.section > div.container");
+        if (container.size() == 0) {
+            throw new BusinessException(30000, "番号无效!");
+        }
+
+        Elements videoDetail = container.select("div.video-detail");
+        // 名称
+        videoInfoInfantry.setName(videoDetail.select("h2.title").select("strong.current-title").text().trim());
+
+        Elements moviePanelInfos = videoDetail.select("nav.movie-panel-info");
+        Element pEle = moviePanelInfos.get(0);
+        // 识别码
+        String iCode = pEle.select("div:contains(番號)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
+        if (!videoInfoInfantry.getIdentificationCode().equalsIgnoreCase(iCode)) {
+            throw new Exception("番号与站点不一致");
+        }
+        // 发行日期
+        String issueDate = pEle.select("div:contains(日期)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
+        videoInfoInfantry.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
+        // 长度
+        String length = pEle.select("div:contains(時長)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
+        videoInfoInfantry.setLength(length);
+        // 导演
+        Elements directorEles = pEle.select("div:contains(導演)").select("span.value");
+        if (directorEles.size() > 0) {
+            videoInfoInfantry.setDirector(directorEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
+        }
+        // 制作商
+        Elements markerEles = pEle.select("div:contains(片商)").select("span.value");
+        if (markerEles.size() > 0) {
+            videoInfoInfantry.setMaker(markerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
+        }
+        // 发行商
+        Elements issuerEles = pEle.select("div:contains(發行)").select("span.value");
+        if (issuerEles.size() > 0) {
+            videoInfoInfantry.setIssuer(issuerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
+        }
+        // 类别
+        Elements genresEles = pEle.select("div:contains(類別)").select("span.value");
+        if (genresEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = genresEles.first().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            videoInfoInfantry.setGenres(sb.toString());
+        }
+        // 演员
+        Elements castEles = pEle.select("div:contains(演員)").select("span.value");
+        if (castEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = castEles.first().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            videoInfoInfantry.setCast(sb.toString());
+        }
+
+        // 图片  步兵
+        String bubings = "步兵/".concat(videoInfoInfantry.getType()).concat("/").concat(iCode);
+        StringBuffer picTime = new StringBuffer("{cover:");
+
+        // 图片URL bigImage
+        String bubingCover = bubings.concat("/cover/");
+
+        String fileName = issueDate.concat(" ").concat(videoInfoInfantry.getIdentificationCode()).concat(" ").concat(videoInfoInfantry.getName()).replace("?", "?");
+        byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
+        if (imgUrlBytes.length > 251) {
+            byte[] imgUrlDestBytes = new byte[251];
+            System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
+            fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
+        }
+        fileName = fileName.concat(".jpg");
+
+        if (!new File(infantryPath.concat(bubingCover), fileName).exists()) {
+            Elements videoMetaPanel = videoDetail.select("div.column-video-cover");
+            String href = videoMetaPanel.select("a > img").first().attr("src");
+
+            long start = System.currentTimeMillis();
+            Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+
+            saveFile2(response.bodyStream(), infantryPath.concat(bubingCover), fileName);
+            picTime.append(System.currentTimeMillis() - start).append(",");
+        }
+
+        videoInfoInfantry.setImgUrl(iCode.concat("/cover/").concat(fileName));
+
+        // 图片URL img_gf
+        String bubingImgGF = bubings.concat("/img_gf/");
+
+        File bubingImgGFFile = new File(infantryPath.concat(bubingImgGF));
+        if (!bubingImgGFFile.exists() || bubingImgGFFile.listFiles().length == 0) {
+            Elements sampleBoxEles = videoDetail.select("div.preview-images").select("a.tile-item");
+            long start2 = System.currentTimeMillis();
+            if (sampleBoxEles.size() > 0) {
+                Connection.Response responseImg;
+
+                for (Element sampleBoxEle : sampleBoxEles) {
+                    String sampleBoxHref = sampleBoxEle.attr("href");
+                    if (!sampleBoxHref.contains("http")) {
+                        sampleBoxHref = sampleBoxEle.attr("abs:href");
+                    }
+
+                    try {
+                        responseImg = Jsoup.connect(sampleBoxHref).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+                    } catch (HttpStatusException e) {
+                        sampleBoxHref = sampleBoxEle.select("img").attr("src");
+                        if (!sampleBoxHref.contains("http")) {
+                            sampleBoxHref = sampleBoxEle.select("img").attr("abs:src");
+                        }
+                        responseImg = Jsoup.connect(sampleBoxHref).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+                    }
+                    String sampleBoxFileName = sampleBoxHref.substring(sampleBoxHref.lastIndexOf("/") + 1);
+                    saveFile2(responseImg.bodyStream(), infantryPath.concat(bubingImgGF), sampleBoxFileName);
+                }
+            } else {
+                log.error("jsoupVideoInfoInfantry img_gf null,identificationCode={}", videoInfoInfantry.getIdentificationCode());
+            }
+            picTime.append("img_gf:").append(System.currentTimeMillis() - start2).append("}");
+        }
+
+        return picTime.toString();
+    }
+
     @Override
     public void getMaleCast(VideoInfo videoInfo) {
         Document document;

+ 29 - 1
src/main/java/top/lvzhiqiang/service/impl/VideoInfoServiceImpl.java

@@ -140,7 +140,35 @@ public class VideoInfoServiceImpl extends BaseServiceImpl<Object> implements Vid
                 videoInfo.getImgSYList().sort(Comparator.naturalOrder());
             }
         } else if ("bubing".equals(type)) {
-            videoInfo = null;
+            videoInfo = videoInfoInfantryMapper.getVideoInfoDetail(code);
+            if (videoInfo == null) {
+                return null;
+            }
+
+            String bubingPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && env.equals(x.getEnv()) && "apics_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
+            String imgPrefixGFPath = "步兵/".concat(videoInfo.getInfantryType()).concat("/").concat(videoInfo.getIdentificationCode()).concat("/img_gf/");
+            String imgPrefixSYPath = "骑兵/".concat(videoInfo.getInfantryType()).concat("/").concat(videoInfo.getIdentificationCode()).concat("/img_sy/");
+            videoInfo.setImgPrefixGFPath(imgPrefixGFPath);
+            videoInfo.setImgPrefixSYPath(imgPrefixSYPath);
+
+            // 获取样品图像-官方
+            File imgGFFile = new File(bubingPath, imgPrefixGFPath);
+            if (imgGFFile.exists()) {
+                File[] files = imgGFFile.listFiles();
+                for (File file : files) {
+                    videoInfo.getImgGFList().add(file.getName());
+                }
+                videoInfo.getImgGFList().sort(Comparator.naturalOrder());
+            }
+            // 获取样品图像-私有
+            File imgSYFile = new File(bubingPath, imgPrefixSYPath);
+            if (imgSYFile.exists()) {
+                File[] files = imgSYFile.listFiles();
+                for (File file : files) {
+                    videoInfo.getImgSYList().add(file.getName());
+                }
+                videoInfo.getImgSYList().sort(Comparator.naturalOrder());
+            }
         } else if ("liuchu".equals(type)) {
             videoInfo = null;
         } else {

+ 18 - 0
src/main/resources/static/bg.html

@@ -230,6 +230,24 @@
             <input type="submit" value="提交">
         </form>
     </div>
+    <div style="margin-right:20px;">
+        <span class="font">jsoupVideoInfoInfantry4JAVDB</span>
+        <form method="post" action="bg/jsoupVideoInfoInfantry4javdb">
+            <select name="type" style="height: 21.43px;">
+                <option value="Carib-Heyzo-1Pondo-Tokyo">Carib-Heyzo-1Pondo-Tokyo</option>
+            </select>
+            <span>status</span>
+            <select name="status" style="height: 21.43px;">
+                <option value="1">未爬取</option>
+                <option value="3">爬取失败</option>
+            </select>
+            <span>url</span>
+            <input type="text" name="url" placeholder="https://javdb.com/v/?"/>
+            <span>identificationCode</span>
+            <input type="text" name="identificationCode"/>
+            <input type="submit" value="提交">
+        </form>
+    </div>
     <br/>
     <div style="margin-right:20px;">
         <span class="font">insertOrUpdateScoreOrComment</span>