Browse Source

update:影片图片文件存储+爬取逻辑调整v1

tujidelv 3 năm trước cách đây
mục cha
commit
640c81f029

+ 5 - 0
src/main/java/top/lvzhiqiang/entity/DicCode.java

@@ -41,6 +41,11 @@ public class DicCode implements Serializable {
     private Integer type;
     private Integer type;
 
 
     /**
     /**
+     * 环境类型{dev,test}
+     */
+    private String env;
+
+    /**
      * 删除标志{1:正常,2:已删除}
      * 删除标志{1:正常,2:已删除}
      */
      */
     private Integer deleteFlag;
     private Integer deleteFlag;

+ 16 - 1
src/main/java/top/lvzhiqiang/entity/VideoInfo.java

@@ -7,7 +7,8 @@ import top.lvzhiqiang.util.DateUtils;
 import java.io.Serializable;
 import java.io.Serializable;
 import java.time.LocalDate;
 import java.time.LocalDate;
 import java.time.LocalDateTime;
 import java.time.LocalDateTime;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.List;
 
 
 /**
 /**
  * 电影信息
  * 电影信息
@@ -60,6 +61,16 @@ public class VideoInfo implements Serializable {
     private String issuer;
     private String issuer;
 
 
     /**
     /**
+     * 类别
+     */
+    private String genres;
+
+    /**
+     * 演员
+     */
+    private String cast;
+
+    /**
      * 图片URL
      * 图片URL
      */
      */
     private String imgUrl;
     private String imgUrl;
@@ -103,4 +114,8 @@ public class VideoInfo implements Serializable {
 
 
     private String score;
     private String score;
     private String comment;
     private String comment;
+    private String commentFirst;
+    private String imgPrefixPath;
+    private List<String> imgGFList = new ArrayList<>();
+    private List<String> imgSYList = new ArrayList<>();
 }
 }

+ 11 - 2
src/main/java/top/lvzhiqiang/mapper/VideoInfoOtherMapper.java

@@ -4,10 +4,8 @@ import org.apache.ibatis.annotations.Delete;
 import org.apache.ibatis.annotations.Insert;
 import org.apache.ibatis.annotations.Insert;
 import org.apache.ibatis.annotations.Select;
 import org.apache.ibatis.annotations.Select;
 import org.apache.ibatis.annotations.Update;
 import org.apache.ibatis.annotations.Update;
-import top.lvzhiqiang.entity.VideoInfoPool;
 
 
 import java.math.BigDecimal;
 import java.math.BigDecimal;
-import java.util.List;
 
 
 /**
 /**
  * 电影信息其他Mapper
  * 电影信息其他Mapper
@@ -43,4 +41,15 @@ public interface VideoInfoOtherMapper {
     @Insert("INSERT INTO video_info_other(identification_code, score, comment, create_time, modify_time) " +
     @Insert("INSERT INTO video_info_other(identification_code, score, comment, create_time, modify_time) " +
             "VALUES (#{identificationCode},#{score},#{comment}, now(), now())")
             "VALUES (#{identificationCode},#{score},#{comment}, now(), now())")
     void insertScoreOrComment(String identificationCode, BigDecimal score, String comment);
     void insertScoreOrComment(String identificationCode, BigDecimal score, String comment);
+
+    /**
+     * 新增/修改
+     *
+     * @param identificationCode
+     * @param picFlag
+     */
+    @Insert("INSERT INTO video_info_other(identification_code, pic_flag, create_time, modify_time) " +
+            "VALUES (#{identificationCode}, #{picFlag}, now(), now()) " +
+            "ON DUPLICATE KEY UPDATE pic_flag=values(pic_flag),modify_time=now()")
+    int insertOrUpdate(String identificationCode, Integer picFlag);
 }
 }

+ 153 - 28
src/main/java/top/lvzhiqiang/service/impl/BgServiceImpl.java

@@ -6,6 +6,7 @@ import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
 import org.jsoup.select.Elements;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.scheduling.annotation.Async;
 import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Service;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Propagation;
 import org.springframework.transaction.annotation.Propagation;
@@ -69,6 +70,9 @@ public class BgServiceImpl implements BgService {
     @Resource
     @Resource
     private VideoInfoInfantryMapper videoInfoInfantryMapper;
     private VideoInfoInfantryMapper videoInfoInfantryMapper;
 
 
+    @Value("${spring.profiles.active}")
+    private String env;
+
     /**
     /**
      * findDicCode
      * findDicCode
      *
      *
@@ -356,6 +360,9 @@ public class BgServiceImpl implements BgService {
             return;
             return;
         }
         }
 
 
+        // 获取图片保存路径
+        String qibingPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && env.equals(x.getEnv()) && "apics_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
+
         // 遍历
         // 遍历
         Document document;
         Document document;
         VideoInfo videoInfo;
         VideoInfo videoInfo;
@@ -380,13 +387,14 @@ public class BgServiceImpl implements BgService {
 
 
                     videoInfo = new VideoInfo();
                     videoInfo = new VideoInfo();
                     videoInfo.setIdentificationCode(identificationCode);
                     videoInfo.setIdentificationCode(identificationCode);
-                    parseDocument4QiBing(document, identificationCode, videoInfo);
+                    String picTime = parseDocument4QiBing(document, identificationCode, qibingPath, videoInfo);
                     videoInfo.setStatus(2);
                     videoInfo.setStatus(2);
                     videoInfoMapper.updateJsoupInfoByCode(videoInfo);
                     videoInfoMapper.updateJsoupInfoByCode(videoInfo);
+                    videoInfoOtherMapper.insertOrUpdate(identificationCode, 2);
 
 
                     j++;
                     j++;
 
 
-                    log.warn("jsoupVideoInfo success:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
+                    log.warn("jsoupVideoInfo success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, identificationCode);
                     break;
                     break;
                 } catch (Exception e) {
                 } catch (Exception e) {
                     ++retryCount;
                     ++retryCount;
@@ -799,12 +807,21 @@ public class BgServiceImpl implements BgService {
         }
         }
     }
     }
 
 
-    private void parseDocument4QiBing(Document document, String identificationCode, VideoInfo videoInfo) throws Exception {
+    private String parseDocument4QiBing(Document document, String identificationCode, String qibingPath, VideoInfo videoInfo) throws Exception {
         Elements container = document.select("div.container");
         Elements container = document.select("div.container");
         if (container.size() == 0) {
         if (container.size() == 0) {
             throw new Exception("番号无效!");
             throw new Exception("番号无效!");
         }
         }
 
 
+        // 名称
+        String h3 = container.select("h3").first().text();
+        String[] nameArr = h3.split("\\s+");
+        if (nameArr.length > 1) {
+            videoInfo.setName(h3.substring(nameArr[0].length()).trim());
+        } else {
+            videoInfo.setName(nameArr[0]);
+        }
+
         Elements pEles = container.select("div.info > p");
         Elements pEles = container.select("div.info > p");
         // 识别码
         // 识别码
         Element pEle = pEles.get(0);
         Element pEle = pEles.get(0);
@@ -838,6 +855,115 @@ public class BgServiceImpl implements BgService {
             pEle = issuerEles.first().select("a[href]").first();
             pEle = issuerEles.first().select("a[href]").first();
             videoInfo.setIssuer(pEle.text());
             videoInfo.setIssuer(pEle.text());
         }
         }
+        // 类别
+        Elements genresEles = container.select("div.info").select("p:contains(類別)");
+        if (genresEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text()).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            videoInfo.setGenres(sb.toString());
+        }
+        // 演员
+        Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
+        if (castEles.size() > 0) {
+            Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
+            if (castElesTemp.size() == 0) {
+                StringBuffer sb = new StringBuffer();
+                Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
+                for (Element ahrefEle : ahrefEles) {
+                    sb.append(ahrefEle.text()).append(",");
+                }
+                if (sb.length() > 0) {
+                    sb = sb.deleteCharAt(sb.length() - 1);
+                }
+                videoInfo.setCast(sb.toString());
+            }
+        }
+
+        // 图片  骑兵步兵
+        String qibings = "骑兵步兵/".concat(iCode);
+        StringBuffer picTime = new StringBuffer("{cover:");
+
+        // 图片URL bigImage
+        String href = container.select("a.bigImage").first().attr("abs:href");
+
+        long start = System.currentTimeMillis();
+        Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+
+        String fileName = issueDate.concat(" ").concat(h3);
+        byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
+        if (imgUrlBytes.length > 251) {
+            byte[] imgUrlDestBytes = new byte[251];
+            System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
+            fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
+        }
+        fileName = fileName.concat(".jpg");
+
+        String qibingCover = qibings.concat("/cover/");
+        saveFile2(response.bodyStream(), qibingPath.concat(qibingCover), fileName);
+        picTime.append(System.currentTimeMillis() - start).append(",");
+
+        videoInfo.setImgUrl(qibingCover.concat(fileName));
+
+        // 图片URL img_gf
+        Elements sampleBoxEles = container.select("div#sample-waterfall").select("a");
+        long start2 = System.currentTimeMillis();
+        if (sampleBoxEles.size() > 0) {
+            Connection.Response responseImg;
+            String qibingImgGF = qibings.concat("/img_gf/");
+            for (Element sampleBoxEle : sampleBoxEles) {
+                String sampleBoxHref = sampleBoxEle.attr("href");
+                if (!sampleBoxHref.contains("http")) {
+                    sampleBoxHref = sampleBoxEle.attr("abs:href");
+                }
+
+                responseImg = Jsoup.connect(sampleBoxHref).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+                String sampleBoxFileName = sampleBoxHref.substring(sampleBoxHref.lastIndexOf("/") + 1);
+                saveFile2(responseImg.bodyStream(), qibingPath.concat(qibingImgGF), sampleBoxFileName);
+            }
+        } else {
+            log.error("jsoupVideoInfo img_gf null,identificationCode={}", identificationCode);
+        }
+        picTime.append("img_gf:").append(System.currentTimeMillis() - start2).append("}");
+
+        return picTime.toString();
+    }
+
+    /**
+     * 保存文件到本地2
+     *
+     * @param bufferedInputStream
+     * @param savePathPrex
+     * @param fileName
+     */
+    private void saveFile2(BufferedInputStream bufferedInputStream, String savePathPrex, String fileName) throws IOException {
+        File savePathPrexFile = new File(savePathPrex);
+        // 判断目录是否存在,如果不存在则新建
+        if (!savePathPrexFile.exists()) {
+            boolean result = savePathPrexFile.mkdirs();
+            if (!result) {
+                throw new IOException("创建目录失败!");
+            }
+        }
+
+        //一次最多读取1k
+        byte[] buffer = new byte[1024];
+        //实际读取的长度
+        int readLenghth;
+        //创建的一个写出的缓冲流
+        BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(new File(savePathPrexFile, fileName)));
+        //文件逐步写入本地
+        while ((readLenghth = bufferedInputStream.read(buffer, 0, 1024)) != -1) {//先读出来,保存在buffer数组中
+            bufferedOutputStream.write(buffer, 0, readLenghth);//再从buffer中取出来保存到本地
+        }
+        //关闭缓冲流
+        bufferedOutputStream.close();
+        bufferedInputStream.close();
     }
     }
 
 
     /**
     /**
@@ -875,21 +1001,22 @@ public class BgServiceImpl implements BgService {
         } else if (filePath.contains("步兵")) {
         } else if (filePath.contains("步兵")) {
             type = 2;
             type = 2;
         }
         }
+        Set<String> sameNameSet = new HashSet<>();
         for (File file : files) {
         for (File file : files) {
             if (file.isDirectory()) {
             if (file.isDirectory()) {
                 // 文件夹
                 // 文件夹
                 getAllFilePaths(file.getAbsolutePath(), javAllInfo, allIcode);
                 getAllFilePaths(file.getAbsolutePath(), javAllInfo, allIcode);
             } else {
             } else {
                 String fileName = file.getName();
                 String fileName = file.getName();
-                if (fileName.endsWith(".jpg") || (fileName.endsWith(".lnk") && fileName.contains(".jpg"))) {
+                if (!fileName.endsWith(".jpg") && !fileName.endsWith(".txt")) {
                     String parentName = file.getParentFile().getName();
                     String parentName = file.getParentFile().getName();
                     // 识别码
                     // 识别码
-                    String name = fileName.substring(10).replace(".jpg", "").trim();
+                    String name = fileName.substring(10, fileName.lastIndexOf(".")).trim();
                     String[] nameArr = name.split("\\s+");
                     String[] nameArr = name.split("\\s+");
                     try {
                     try {
                         boolean isMain = false;
                         boolean isMain = false;
-                        if (fileName.endsWith(".jpg")) {
-                            if (allIcode.contains(nameArr[0])) {
+                        if (!fileName.endsWith(".lnk")) {
+                            if (allIcode.contains(nameArr[0]) || sameNameSet.contains(nameArr[0])) {
                                 continue;
                                 continue;
                             }
                             }
 
 
@@ -897,20 +1024,23 @@ public class BgServiceImpl implements BgService {
                             // 获取正片信息
                             // 获取正片信息
                             VideoInfo videoInfo = new VideoInfo();
                             VideoInfo videoInfo = new VideoInfo();
                             // 发行日期
                             // 发行日期
-                            String issueDate = fileName.substring(0, 10);
-                            videoInfo.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
+                            //String issueDate = fileName.substring(0, 10);
+                            //videoInfo.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
+                            // 识别码
                             videoInfo.setIdentificationCode(nameArr[0]);
                             videoInfo.setIdentificationCode(nameArr[0]);
                             // 名称
                             // 名称
-                            if (nameArr.length > 1) {
-                                videoInfo.setName(name.substring(nameArr[0].length()).trim());
-                            } else {
-                                videoInfo.setName(nameArr[0]);
-                            }
+                            //if (nameArr.length > 1) {
+                            //    videoInfo.setName(name.substring(nameArr[0].length()).trim());
+                            //} else {
+                            //    videoInfo.setName(nameArr[0]);
+                            //}
 
 
                             // 类型
                             // 类型
                             videoInfo.setType(type);
                             videoInfo.setType(type);
                             // 图片URL
                             // 图片URL
-                            videoInfo.setImgUrl(parentName.concat("/").concat(fileName));
+                            //videoInfo.setImgUrl(parentName.concat("/").concat(fileName));
+                            // 视频URL
+                            videoInfo.setVideoUrl(parentName.concat("/").concat(fileName));
                             // 创建时间 TODO
                             // 创建时间 TODO
                             // 修改时间
                             // 修改时间
                             videoInfo.setCreateTime(Instant.ofEpochMilli(file.lastModified()).atZone(ZoneOffset.ofHours(8)).toLocalDateTime());
                             videoInfo.setCreateTime(Instant.ofEpochMilli(file.lastModified()).atZone(ZoneOffset.ofHours(8)).toLocalDateTime());
@@ -918,6 +1048,8 @@ public class BgServiceImpl implements BgService {
                             videoInfo.setMainWho(parentName);
                             videoInfo.setMainWho(parentName);
 
 
                             javAllInfo.getVideoInfoList().add(videoInfo);
                             javAllInfo.getVideoInfoList().add(videoInfo);
+
+                            sameNameSet.add(nameArr[0]);
                         }
                         }
 
 
                         if (parentName.contains("类别")) {
                         if (parentName.contains("类别")) {
@@ -951,15 +1083,8 @@ public class BgServiceImpl implements BgService {
                         System.err.println("error:" + file.getAbsolutePath());
                         System.err.println("error:" + file.getAbsolutePath());
                         System.err.println("error reason:" + e.getMessage());
                         System.err.println("error reason:" + e.getMessage());
                     }
                     }
-                } else if (!fileName.endsWith(".jpg") && !fileName.endsWith(".lnk")) {
-                    String[] nameArr = fileName.substring(0, fileName.lastIndexOf(".")).split("\\s+");
-                    if (allIcode.contains(nameArr[1])) {
-                        continue;
-                    }
-
-                    String parentName = file.getParentFile().getName();
-
-                    javAllInfo.getVideoUrlMap().put(nameArr[1], parentName.concat("/").concat(fileName));
+                } else {
+                    System.out.println("other type file:" + file.getAbsolutePath());
                 }
                 }
             }
             }
         }
         }
@@ -1113,10 +1238,10 @@ public class BgServiceImpl implements BgService {
 
 
         // 保存影片信息
         // 保存影片信息
         List<VideoInfo> videoInfoList = javAllInfo.getVideoInfoList();
         List<VideoInfo> videoInfoList = javAllInfo.getVideoInfoList();
-        Map<String, String> videoUrlMap = javAllInfo.getVideoUrlMap();
-        videoInfoList.parallelStream().forEach(e -> {
-            e.setVideoUrl(videoUrlMap.get(e.getIdentificationCode()));
-        });
+        //Map<String, String> videoUrlMap = javAllInfo.getVideoUrlMap();
+        //videoInfoList.parallelStream().forEach(e -> {
+        //    e.setVideoUrl(videoUrlMap.get(e.getIdentificationCode()));
+        //});
         //for (VideoInfo videoInfo : videoInfoList) {
         //for (VideoInfo videoInfo : videoInfoList) {
         //    try {
         //    try {
         //        videoInfoMapper.insert(videoInfo);
         //        videoInfoMapper.insert(videoInfo);

+ 1 - 1
src/main/resources/application-dev.yml

@@ -37,7 +37,7 @@ spring:
       # 连接池名称
       # 连接池名称
       pool-name: HikariCP
       pool-name: HikariCP
       # 此属性控制池中连接的最长生命周期,值0表示无限生命周期,默认1800000即30分钟
       # 此属性控制池中连接的最长生命周期,值0表示无限生命周期,默认1800000即30分钟
-      max-lifetime: 60000
+      max-lifetime: 120000
       # 数据库连接超时时间,默认30秒,即30000
       # 数据库连接超时时间,默认30秒,即30000
       connection-timeout: 60000
       connection-timeout: 60000
       # 生效超时
       # 生效超时