Browse Source

update:流出部分字段爬取及页面展示v2

tujidelv 3 years ago
parent
commit
e88e1a9b4b

+ 17 - 0
src/main/java/top/lvzhiqiang/controller/BgController.java

@@ -117,6 +117,23 @@ public class BgController {
     }
 
     /**
+     * Jsoup VideoInfoUncensored
+     *
+     * @author lvzhiqiang
+     * 2022/5/4 19:27
+     */
+    @RequestMapping("/jsoupVideoInfoUncensored")
+    @ResponseBody
+    public R jsoupVideoInfoUncensored(Integer status) {
+        if (null == status) {
+            status = 1;
+        }
+
+        bgService.jsoupVideoInfoUncensored(status);
+        return R.ok();
+    }
+
+    /**
      * findDicCode
      *
      * @author lvzhiqiang

+ 8 - 1
src/main/java/top/lvzhiqiang/controller/QueryHeaderController.java

@@ -7,6 +7,7 @@ import org.springframework.web.bind.annotation.RestController;
 import top.lvzhiqiang.entity.VideoCast;
 import top.lvzhiqiang.entity.VideoGenres;
 import top.lvzhiqiang.mapper.VideoInfoPoolMapper;
+import top.lvzhiqiang.mapper.VideoInfoUncensoredMapper;
 import top.lvzhiqiang.service.VideoCastService;
 import top.lvzhiqiang.service.VideoGenresService;
 
@@ -32,6 +33,9 @@ public class QueryHeaderController {
     @Resource
     private VideoInfoPoolMapper videoInfoPoolMapper;
 
+    @Resource
+    private VideoInfoUncensoredMapper videoInfoUncensoredMapper;
+
     @RequestMapping("/getQueryHeaderInfo")
     @ResponseBody
     public JSONObject getQueryHeaderInfo(String bigType) {
@@ -45,7 +49,10 @@ public class QueryHeaderController {
         } else if ("步兵".equals(bigType)) {
 
         } else if ("流出".equals(bigType)) {
-
+            List<VideoCast> videoCastList = videoInfoUncensoredMapper.findCast();
+            List<VideoGenres> videoGenresList = videoInfoUncensoredMapper.findGenres();
+            result.put("videoCastList", videoCastList);
+            result.put("videoGenresList", videoGenresList);
         } else if ("码池".equals(bigType)) {
             List<VideoCast> videoCastList = videoInfoPoolMapper.findCast();
             List<VideoGenres> videoGenresList = videoInfoPoolMapper.findGenres();

+ 2 - 0
src/main/java/top/lvzhiqiang/entity/VideoCast.java

@@ -56,4 +56,6 @@ public class VideoCast implements Serializable {
      */
     @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
     private LocalDateTime modifyTime;
+
+    private Integer count;
 }

+ 2 - 0
src/main/java/top/lvzhiqiang/entity/VideoGenres.java

@@ -51,4 +51,6 @@ public class VideoGenres implements Serializable {
      */
     @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
     private LocalDateTime modifyTime;
+
+    private Integer count;
 }

+ 1 - 1
src/main/java/top/lvzhiqiang/mapper/VideoInfoPoolMapper.java

@@ -53,7 +53,7 @@ public interface VideoInfoPoolMapper {
     @Select("select '待审查' name union select '审查中' name union select '审查已通过' name union select '审查未通过' name")
     List<VideoGenres> findGenres();
 
-    @Select("SELECT M.cast name " +
+    @Select("SELECT M.cast name,COUNT(M.cast) count " +
             "FROM (" +
             "         SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(vip.cast, ',', B.HELP_TOPIC_ID + 1), ',', - 1) AS cast" +
             "         FROM video_info_pool vip" +

+ 41 - 20
src/main/java/top/lvzhiqiang/mapper/VideoInfoUncensoredMapper.java

@@ -1,6 +1,11 @@
 package top.lvzhiqiang.mapper;
 
-import org.apache.ibatis.annotations.*;
+import org.apache.ibatis.annotations.Delete;
+import org.apache.ibatis.annotations.Insert;
+import org.apache.ibatis.annotations.Select;
+import org.apache.ibatis.annotations.Update;
+import top.lvzhiqiang.entity.VideoCast;
+import top.lvzhiqiang.entity.VideoGenres;
 import top.lvzhiqiang.entity.VideoInfo;
 import top.lvzhiqiang.entity.VideoInfoUncensored;
 
@@ -36,33 +41,31 @@ public interface VideoInfoUncensoredMapper {
     int insertList(List<VideoInfoUncensored> videoInfoUncensoredList);
 
     /**
-     * 新增
-     *
-     * @param videoInfo
+     * 根据状态查询识别码
      */
-    @Insert("INSERT INTO video_info(name, identification_code, issue_date, length, director, maker, issuer, img_url, video_url, type, main_who, create_time, modify_time) " +
-            "VALUES (#{name}, #{identificationCode}, #{issueDate}, #{length}, #{director}, #{maker}, #{issuer}, #{imgUrl}, #{videoUrl}, #{type}, #{mainWho} ,#{createTime}, now())")
-    @Options(useGeneratedKeys = true, keyProperty = "id", keyColumn = "id")
-    int insert(VideoInfo videoInfo);
+    @Select("SELECT identification_code FROM video_info_uncensored WHERE delete_flag = 1 and status = #{status}")
+    List<String> findIcodeByStatus(Integer status);
 
     /**
-     * 查询所有
+     * 更新状态
+     *
+     * @param identificationCode
+     * @param status
+     * @return
      */
-    @Select("select vi.*, IFNULL(vio.score, 0) AS score from video_info vi " +
-            "left join video_info_other vio on vi.identification_code = vio.identification_code and vio.delete_flag = 1 " +
-            "where vi.delete_flag = 1 ORDER BY vi.issue_date desc")
-    List<VideoInfo> findAll();
+    @Update("update video_info_uncensored set status = #{status},modify_time = now() where identification_code = #{identificationCode}")
+    int updateStatus(String identificationCode, Integer status);
 
     /**
      * 查询所有识别码
      */
-    @Select("select distinct identification_code from video_info")
+    @Select("select distinct identification_code from video_info_uncensored")
     List<String> findAllIcode();
 
     /**
      * 根据条件查询
      */
-    List<VideoInfo> getVideoInfoList(Map<String, Object> params);
+    List<VideoInfo> getVideoInfoUncensoredList(Map<String, Object> params);
 
     /**
      * 根据识别码和类型查询
@@ -70,19 +73,37 @@ public interface VideoInfoUncensoredMapper {
      * @return
      */
     @Select({"<script>" +
-            "select vi.*,vio.score,vio.comment from video_info vi left join video_info_other vio on vi.identification_code = vio.identification_code and vio.delete_flag = 1 WHERE vi.delete_flag = 1" +
+            "select vi.*,vio.score,vio.comment from video_info_uncensored vi left join video_info_other vio on vi.identification_code = vio.identification_code and vio.delete_flag = 1 WHERE vi.delete_flag = 1" +
             "<if test=\"identificationCode != null and identificationCode != ''\">" +
             "   and vi.identification_code like concat('%',#{identificationCode},'%')" +
             "</if>" +
-            "<if test=\"type != null and type != ''\">" +
-            "   and vi.type = #{type}" +
+            "<if test=\"status != null and type != ''\">" +
+            "   and vi.status = #{status}" +
             "</if>" +
             "<if test=\"order != null and order != ''\">" +
             "   order by vi.issue_date ${order}" +
             "</if>" +
             "</script>"})
-    List<VideoInfo> findByCodeAndType(String identificationCode, Integer type, String order);
+    List<VideoInfoUncensored> findByCodeAndType(String identificationCode, Integer status, String order);
 
-    @Update("update video_info set delete_flag = 2,modify_time = now() where identification_code = #{identificationCode}")
+    @Update("update video_info_uncensored set delete_flag = 2,modify_time = now() where identification_code = #{identificationCode}")
     void delByCode(String identificationCode);
+
+    @Update("update video_info_uncensored set length = #{length}, director = #{director}, maker = #{maker}, issuer = #{issuer}, genres = #{genres}, cast = #{cast}, status = #{status}, modify_time = now() where identification_code = #{identificationCode}")
+    void updateJsoupInfoByCode(VideoInfoUncensored videoInfoUncensored);
+
+    @Select("SELECT M.cast name,COUNT(M.cast) count " +
+            "FROM (" +
+            "         SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(viu.cast, ',', B.HELP_TOPIC_ID + 1), ',', - 1) AS cast" +
+            "         FROM video_info_uncensored viu" +
+            "                  JOIN MYSQL.HELP_TOPIC B" +
+            "                       ON B.HELP_TOPIC_ID < (LENGTH(viu.cast) - LENGTH(REPLACE(viu.cast, ',', '')) + 1)" +
+            "         WHERE genres != ''" +
+            "     ) M " +
+            "GROUP BY M.cast " +
+            "ORDER BY COUNT(M.cast) DESC LIMIT 30")
+    List<VideoCast> findCast();
+
+    @Select("select main_who AS name, count(id) AS count from video_info_uncensored group by main_who order by count(id) desc")
+    List<VideoGenres> findGenres();
 }

+ 8 - 0
src/main/java/top/lvzhiqiang/service/BgService.java

@@ -84,4 +84,12 @@ public interface BgService {
      * 2022/5/4 9:54
      */
     String insertOrUpdateScoreOrComment(String identificationCode, String score, String comment);
+
+    /**
+     * Jsoup VideoInfoUncensored
+     *
+     * @author lvzhiqiang
+     * 2022/5/4 19:27
+     */
+    void jsoupVideoInfoUncensored(Integer status);
 }

+ 139 - 0
src/main/java/top/lvzhiqiang/service/impl/BgServiceImpl.java

@@ -256,6 +256,79 @@ public class BgServiceImpl implements BgService {
     }
 
     /**
+     * Jsoup VideoInfoUncensored
+     *
+     * @author lvzhiqiang
+     * 2022/5/4 19:27
+     */
+    @Override
+    public void jsoupVideoInfoUncensored(Integer status) {
+        log.info("jsoupVideoInfoUncensored 开始:status={}", status);
+
+        // 获取待抓取码列表
+        List<String> icodeList = videoInfoUncensoredMapper.findIcodeByStatus(status);
+        if (icodeList.size() == 0) {
+            log.warn("status={}的icodeList为空", status);
+            return;
+        }
+        // 获取javbus防屏蔽地址
+        List<String> javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
+        if (javbusUrlList.size() == 0) {
+            log.warn("javbusUrlList为空");
+            return;
+        }
+
+        // 获取码池图片保存路径
+        String liuchuPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "liuchu_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
+
+        // 遍历
+        Document document;
+        VideoInfoUncensored videoInfoUncensored;
+        String javbusUrl;
+        int j = 0;
+        for (int i = 0; i < icodeList.size(); i++) {
+            long start = System.currentTimeMillis();
+            String identificationCode = icodeList.get(i);
+
+            int retryCount = 0;
+            while (retryCount <= 3) {
+                javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size() - 0)));
+                try {
+                    document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCode))
+                            .timeout(50000)
+                            //.proxy()
+                            //.data()
+                            .ignoreContentType(true)
+                            .userAgent(getUserAgent())
+                            .header("referer", "https://www.javbus.com/".concat(identificationCode))
+                            .get();
+
+                    videoInfoUncensored = new VideoInfoUncensored();
+                    videoInfoUncensored.setIdentificationCode(identificationCode);
+                    parseDocument4Uncensored(document, identificationCode, liuchuPath, videoInfoUncensored);
+                    videoInfoUncensored.setStatus(2);
+                    videoInfoUncensoredMapper.updateJsoupInfoByCode(videoInfoUncensored);
+
+                    j++;
+
+                    log.info("jsoupVideoInfoUncensored success:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
+                    break;
+                } catch (Exception e) {
+                    ++retryCount;
+
+                    if (retryCount < 4) {
+                        log.error("jsoupIcodePool error重试:i={},retryCount={},time={},identificationCode={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, e);
+                    } else if (retryCount == 4) {
+                        videoInfoUncensoredMapper.updateStatus(identificationCode, 3);
+                        log.error("jsoupIcodePool error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode, e);
+                    }
+                }
+            }
+        }
+        log.info("jsoupVideoInfoUncensored 统计:total={},success={}", icodeList.size(), j);
+    }
+
+    /**
      * 初始化骑兵数据
      */
     @Override
@@ -560,6 +633,72 @@ public class BgServiceImpl implements BgService {
         return end - start;
     }
 
+    private void parseDocument4Uncensored(Document document, String identificationCode, String machiPath, VideoInfoUncensored videoInfoUncensored) throws Exception {
+        Elements container = document.select("div.container");
+        if (container.size() == 0) {
+            throw new Exception("番号无效!");
+        }
+
+        Elements pEles = container.select("div.info > p");
+        // 识别码
+        Element pEle = pEles.get(0);
+        String iCode = pEle.select("span[style]").first().text();
+        if (!identificationCode.equalsIgnoreCase(iCode)) {
+            throw new Exception("番号与站点不一致");
+        }
+        // 长度
+        pEle = pEles.get(2);
+        String length = pEle.text().split(":")[1].replace("\"", "").trim();
+        videoInfoUncensored.setLength(length);
+        // 导演
+        Elements directorEles = container.select("div.info").select("p:contains(導演)");
+        if (directorEles.size() > 0) {
+            pEle = directorEles.first().select("a[href]").first();
+            videoInfoUncensored.setDirector(pEle.text());
+        }
+        // 制作商
+        Elements markerEles = container.select("div.info").select("p:contains(製作商)");
+        if (markerEles.size() > 0) {
+            pEle = markerEles.first().select("a[href]").first();
+            videoInfoUncensored.setMaker(pEle.text());
+        }
+        // 发行商
+        Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
+        if (issuerEles.size() > 0) {
+            pEle = issuerEles.first().select("a[href]").first();
+            videoInfoUncensored.setIssuer(pEle.text());
+        }
+        // 类别
+        Elements genresEles = container.select("div.info").select("p:contains(類別)");
+        if (genresEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text()).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            videoInfoUncensored.setGenres(sb.toString());
+        }
+        // 演员
+        Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
+        if (castEles.size() > 0) {
+            Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
+            if (castElesTemp.size() == 0) {
+                StringBuffer sb = new StringBuffer();
+                Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
+                for (Element ahrefEle : ahrefEles) {
+                    sb.append(ahrefEle.text()).append(",");
+                }
+                if (sb.length() > 0) {
+                    sb = sb.deleteCharAt(sb.length() - 1);
+                }
+                videoInfoUncensored.setCast(sb.toString());
+            }
+        }
+    }
+
     /**
      * 保存文件到本地
      *

+ 4 - 1
src/main/java/top/lvzhiqiang/service/impl/VideoInfoServiceImpl.java

@@ -6,6 +6,7 @@ import org.springframework.stereotype.Service;
 import top.lvzhiqiang.entity.VideoInfo;
 import top.lvzhiqiang.mapper.VideoInfoMapper;
 import top.lvzhiqiang.mapper.VideoInfoPoolMapper;
+import top.lvzhiqiang.mapper.VideoInfoUncensoredMapper;
 import top.lvzhiqiang.service.VideoInfoService;
 
 import javax.annotation.Resource;
@@ -26,6 +27,8 @@ public class VideoInfoServiceImpl extends BaseServiceImpl<Object> implements Vid
     private VideoInfoMapper videoInfoMapper;
     @Resource
     private VideoInfoPoolMapper videoInfoPoolMapper;
+    @Resource
+    private VideoInfoUncensoredMapper videoInfoUncensoredMapper;
 
     /**
      * 查询所有
@@ -67,7 +70,7 @@ public class VideoInfoServiceImpl extends BaseServiceImpl<Object> implements Vid
         } else if ("步兵".equals(bigType)) {
 
         } else if ("流出".equals(bigType)) {
-
+            videoInfoList = videoInfoUncensoredMapper.getVideoInfoUncensoredList(params);
         } else if ("码池".equals(bigType)) {
             videoInfoList = videoInfoPoolMapper.getVideoInfoPoolList(params);
         } else {

+ 23 - 0
src/main/resources/mapper/VideoInfoUncensoredMapper.xml

@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
+        "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
+
+<!-- 当Mapper接口和XML文件关联的时候,namespace的值就需要配置成接口的全限定名称 -->
+<mapper namespace="top.lvzhiqiang.mapper.VideoInfoUncensoredMapper">
+
+    <select id="getVideoInfoUncensoredList" resultType="top.lvzhiqiang.entity.VideoInfo">
+        select vi.id,vi.name,vi.identification_code,vi.issue_date,vi.modify_time,vi.img_url,vi.video_url,concat_ws('||', vi.cast, vi.genres) main_who,IFNULL(vio.score, 0) AS score, IFNULL(vio.comment, '暂无评论') AS comment
+        from video_info_uncensored vi
+        left join video_info_other vio on vi.identification_code = vio.identification_code and vio.delete_flag = 1
+        where vi.delete_flag = 1
+        <if test="keyword != null and keyword != ''">
+            and (vi.name like #{keyword} or vi.identification_code like #{keyword})
+        </if>
+        <if test="genres != null and genres != ''">
+            and vi.main_who = #{genres}
+        </if>
+        <if test="cast != null and cast != ''">
+            and vi.cast like concat('%',#{cast},'%')
+        </if>
+    </select>
+</mapper>

+ 9 - 0
src/main/resources/static/bg.html

@@ -147,6 +147,15 @@
     </div>
     <br/>
     <div style="margin-right:20px;">
+        <span class="font">jsoupVideoInfoUncensored</span>
+        <form method="post" action="bg/jsoupVideoInfoUncensored">
+            <span>status</span>
+            <input type="text" name="status" placeholder="1:未爬取,3:爬取失败。默认是未爬取" style="width: 300px;"/>
+            <input type="submit" value="提交">
+        </form>
+    </div>
+    <br/>
+    <div style="margin-right:20px;">
         <span class="font">insertOrUpdateScoreOrComment</span>
         <form method="post" action="bg/insertOrUpdateScoreOrComment">
             <span>identificationCode</span>

+ 3 - 3
src/main/resources/static/js/my-video.js

@@ -199,7 +199,7 @@ function getQueryHeaderInfo(bigType, startFlag) {
                     var str = "";
                     for (var i = 0; i < videoCastList.length; i++) {
                         var videoCast = videoCastList[i];
-                        str += "<li><a class=\"btn searchbtn\">" + videoCast.name + "</a></li>";
+                        str += "<li><a class=\"btn searchbtn\" title='" + videoCast.count + "'>" + videoCast.name + "</a></li>";
                     }
                     $("#yanyuandiv").html(str);
                 }
@@ -208,7 +208,7 @@ function getQueryHeaderInfo(bigType, startFlag) {
                     var str = "";
                     for (var i = 0; i < videoGenresList.length; i++) {
                         var videoGenres = videoGenresList[i];
-                        str += "<li><a class=\"btn searchbtn\">" + videoGenres.name + "</a></li>";
+                        str += "<li><a class=\"btn searchbtn\" title='" + videoGenres.count + "'>" + videoGenres.name + "</a></li>";
                     }
                     $("#leixingdiv").html(str);
                 }
@@ -267,7 +267,7 @@ function initContentEvent() {
 
         if ($("#playvideo").css("display") === 'none') {
             $("#playvideo").css("display", "block");
-            $("#playvideo").find("video").attr("src", "qibing/" + $(this).attr("videoUrl"));
+            $("#playvideo").find("video").attr("src", prepath + "/" + $(this).attr("videoUrl"));
             $("#playvideo").find("video").prop('muted', true);
         } else if ($("#playvideo").css("display") === 'block') {
             $("#playvideo").css("display", "none");