Переглянути джерело

add:jsoupIkoaMovieDownloadLog v1

lvzhiqiang 3 роки тому
батько
коміт
1bfc8c540b

+ 22 - 0
src/main/java/top/lvzhiqiang/controller/CrawlerController.java

@@ -96,6 +96,28 @@ public class CrawlerController {
     }
 
     /**
+     * Jsoup IkoaMovieDownloadLog
+     *
+     * @author lvzhiqiang
+     * 2022/10/10 15:37
+     */
+    @RequestMapping("/jsoupIkoaMovieDownloadLog")
+    @ResponseBody
+    public R jsoupIkoaMovieDownloadLog(Integer status, Integer isDel, Integer ignoreRetryCount) throws Exception {
+        if (null == status) {
+            // status = 1;
+        }
+        if (null == isDel) {
+            isDel = 2;
+        }
+        if (null == ignoreRetryCount) {
+            // ignoreRetryCount = 2;
+        }
+
+        return R.ok().data(crawlerService.jsoupIkoaMovieDownloadLog(status, isDel, ignoreRetryCount));
+    }
+
+    /**
      * jsoupIkoaMovie4VideoInfo
      *
      * @author lvzhiqiang

+ 59 - 0
src/main/java/top/lvzhiqiang/entity/CrawlerIkoaDownloadLog.java

@@ -0,0 +1,59 @@
+package top.lvzhiqiang.entity;
+
+import com.fasterxml.jackson.annotation.JsonFormat;
+import lombok.Data;
+
+import java.io.Serializable;
+import java.time.LocalDateTime;
+
+/**
+ * 爬虫IKOA下载记录表
+ *
+ * @author lvzhiqiang
+ * 2022/10/10 15:37
+ */
+@Data
+public class CrawlerIkoaDownloadLog implements Serializable {
+
+    /**
+     * 主键
+     */
+    private Long id;
+
+    /**
+     * cid
+     */
+    private String cid;
+
+    /**
+     * package_image
+     */
+    private String packageImage;
+
+    /**
+     * ua
+     */
+    private String ua;
+
+    /**
+     * status
+     */
+    private Integer status;
+
+    /**
+     * 删除标志{1:正常,2:已删除}
+     */
+    private Integer deleteFlag;
+
+    /**
+     * 创建时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime createTime;
+
+    /**
+     * 最后修改时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime modifyTime;
+}

+ 75 - 0
src/main/java/top/lvzhiqiang/mapper/CrawlerIkoaDownloadLogMapper.java

@@ -0,0 +1,75 @@
+package top.lvzhiqiang.mapper;
+
+import org.apache.ibatis.annotations.Delete;
+import org.apache.ibatis.annotations.Insert;
+import org.apache.ibatis.annotations.Options;
+import org.apache.ibatis.annotations.Select;
+import top.lvzhiqiang.entity.CrawlerIkoaDownloadLog;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 爬虫IKOA下载记录表Mapper
+ *
+ * @author lvzhiqiang
+ * 2022/10/10 15:37
+ */
+public interface CrawlerIkoaDownloadLogMapper {
+
+    /**
+     * 删除所有
+     */
+    @Delete("DELETE FROM crawler_ikoa_downloadlog where 1=1")
+    void deleteAll();
+
+    /**
+     * 批量新增
+     *
+     * @param crawlerIkoaDownloadLogList
+     */
+    @Insert({"<script>" +
+            "INSERT INTO crawler_ikoa_downloadlog(cid, package_image, ua, status, create_time, modify_time) " +
+            "VALUES " +
+            "<foreach collection='list' item='cidl' index=\"index\" separator=\",\">" +
+            "   (#{cidl.cid}, #{cidl.packageImage}, #{cidl.ua}, #{cidl.status}, #{cidl.createTime}, #{cidl.modifyTime})" +
+            " </foreach>" +
+            "</script>"})
+    int insertList(List<CrawlerIkoaDownloadLog> crawlerIkoaDownloadLogList);
+
+    /**
+     * 新增
+     *
+     * @param crawlerIkoaDownloadLog
+     */
+    @Insert("INSERT INTO crawler_ikoa_downloadlog(cid, package_image, ua, status, create_time, modify_time) " +
+            "VALUES (#{cid}, #{packageImage}, #{ua}, #{status}, #{createTime}, #{modifyTime})")
+    @Options(useGeneratedKeys = true, keyProperty = "id", keyColumn = "id")
+    int insert(CrawlerIkoaDownloadLog crawlerIkoaDownloadLog);
+
+    /**
+     * 查询所有
+     */
+    @Select("SELECT * FROM crawler_ikoa_downloadlog WHERE delete_flag = 1")
+    List<CrawlerIkoaDownloadLog> findAll();
+
+    /**
+     * 查询最新的一条
+     */
+    @Select("SELECT * FROM crawler_ikoa_downloadlog WHERE delete_flag = 1 order by create_time desc limit 1")
+    CrawlerIkoaDownloadLog findLatestInfo();
+
+    @Select({"<script>" +
+            "select * from crawler_ikoa_downloadlog WHERE delete_flag = 1" +
+            "<if test=\"title != null and title != ''\">" +
+            "   and resource_title like concat('%',#{title},'%')" +
+            "</if>" +
+            "<if test=\"type != null and type != ''\">" +
+            "   and resource_type = #{type}" +
+            "</if>" +
+            "<if test=\"order != null and order != ''\">" +
+            "   order by ${orderField} ${order}" +
+            "</if>" +
+            "</script>"})
+    List<CrawlerIkoaDownloadLog> findIkoaDownloadLog4MultipleParams(Map<String, Object> params);
+}

+ 4 - 0
src/main/java/top/lvzhiqiang/service/CrawlerService.java

@@ -1,5 +1,7 @@
 package top.lvzhiqiang.service;
 
+import com.alibaba.fastjson.JSONObject;
+
 /**
  * Crawler Service
  *
@@ -41,4 +43,6 @@ public interface CrawlerService {
      * 2022/9/26 23:22
      */
     String findIkoaMovieDownloadLog(String page, String sort) throws Exception;
+
+    JSONObject jsoupIkoaMovieDownloadLog(Integer status, Integer isDel, Integer ignoreRetryCount) throws Exception;
 }

+ 145 - 0
src/main/java/top/lvzhiqiang/service/impl/CrawlerServiceImpl.java

@@ -12,10 +12,12 @@ import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Propagation;
 import org.springframework.transaction.annotation.Transactional;
 import org.springframework.util.StopWatch;
+import top.lvzhiqiang.entity.CrawlerIkoaDownloadLog;
 import top.lvzhiqiang.entity.CrawlerXiaoeknowCourse;
 import top.lvzhiqiang.entity.DicCode;
 import top.lvzhiqiang.entity.Temp4k;
 import top.lvzhiqiang.exception.BusinessException;
+import top.lvzhiqiang.mapper.CrawlerIkoaDownloadLogMapper;
 import top.lvzhiqiang.mapper.CrawlerXiaoeknowCourseMapper;
 import top.lvzhiqiang.mapper.DicCodeMapper;
 import top.lvzhiqiang.mapper.TempMapper;
@@ -45,6 +47,8 @@ public class CrawlerServiceImpl implements CrawlerService {
     @Resource
     private CrawlerXiaoeknowCourseMapper crawlerXiaoeknowCourseMapper;
     @Resource
+    private CrawlerIkoaDownloadLogMapper crawlerIkoaDownloadLogMapper;
+    @Resource
     private DicCodeMapper dicCodeMapper;
     @Resource
     private TempMapper tempMapper;
@@ -510,6 +514,147 @@ public class CrawlerServiceImpl implements CrawlerService {
         return sb.toString();
     }
 
+    /**
+     * Jsoup IkoaMovieDownloadLog
+     *
+     * @author lvzhiqiang
+     * 2022/10/10 15:37
+     */
+    //@Async
+    @Override
+    @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
+    public JSONObject jsoupIkoaMovieDownloadLog(Integer status, Integer isDel, Integer ignoreRetryCount) throws Exception {
+        log.warn("jsoupIkoaMovieDownloadLog 开始:status={},isDel={},ignoreRetryCount={}", status, isDel, ignoreRetryCount);
+        StopWatch stopWatch = new StopWatch();
+        stopWatch.start();
+        if (isDel == 1) {
+            crawlerIkoaDownloadLogMapper.deleteAll();
+        }
+
+        // 获取最新的一条
+        CrawlerIkoaDownloadLog latestInfo = crawlerIkoaDownloadLogMapper.findLatestInfo();
+        LocalDateTime latestDate = latestInfo == null ? LocalDateTime.of(1970, 1, 1, 0, 0, 0) : latestInfo.getCreateTime();
+
+        // 获取ikoa常量MAP
+        Map<String, String> ikoaConstantMap = dicCodeMapper.findAll().stream()
+                .filter(x -> "ikoa".equals(x.getCodeDesc()) && x.getEnv().contains(env))
+                .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
+        // 代理及TOKEN设置
+        Proxy proxy;
+        if ("dev".equals(env)) {
+            proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress("127.0.0.1", 1080));
+        } else {
+            proxy = Proxy.NO_PROXY;
+        }
+        if (StringUtils.isEmpty(ikoaToken)) {
+            generateIkoaToken(ikoaConstantMap, proxy);
+        }
+
+        Connection.Response response;
+        StringBuffer sb = new StringBuffer();
+        JSONObject result = null;
+
+        // 查全部
+        JSONArray ja = new JSONArray();
+        // 查第一页
+        int totalPage = 0;
+        for (int i = 0; i < 3; i++) {
+            try {
+                response = Jsoup.connect(ikoaConstantMap.get("download_log_url"))
+                        .header("Authorization", "Token " + ikoaToken)
+                        .timeout(50000)
+                        .proxy(proxy)
+                        .data("page", "1")
+                        .ignoreContentType(true)
+                        .userAgent(getUserAgent())
+                        .header("referer", ikoaConstantMap.get("referer"))
+                        .method(Connection.Method.GET)
+                        .execute();
+                result = JSONObject.parseObject(response.body());
+                if (isDel == 1) {
+                    ja.addAll(result.getJSONArray("data"));
+                }
+                totalPage = result.getInteger("total_page");
+                break;
+            } catch (HttpStatusException hse) {
+                generateIkoaToken(ikoaConstantMap, proxy);
+            } catch (Exception e) {
+                log.error("jsoup IKOA DownloadLog异常,ikoaConstantMap={},result={}", ikoaConstantMap, result.toString(), e);
+                if (i == 2) {
+                    throw new Exception("jsoup IKOA DownloadLog异常!");
+                }
+            }
+        }
+        // 查后面的
+        if (totalPage > 1) {
+            outer:
+            for (int j = totalPage; j <= 2; j--) {
+                for (int k = 0; k < 3; k++) {
+                    try {
+                        response = Jsoup.connect(ikoaConstantMap.get("download_log_url"))
+                                .header("Authorization", "Token " + ikoaToken)
+                                .timeout(50000)
+                                .proxy(proxy)
+                                .data("page", String.valueOf(j))
+                                .ignoreContentType(true)
+                                .userAgent(getUserAgent())
+                                .header("referer", ikoaConstantMap.get("referer"))
+                                .method(Connection.Method.GET)
+                                .execute();
+                        result = JSONObject.parseObject(response.body());
+                        ja.addAll(result.getJSONArray("data"));
+
+                        LocalDateTime created = LocalDateTime.parse(result.getJSONArray("data").getJSONObject(0).getString("created"), DateUtils.dateTimeFormatter2);
+                        if (created.isBefore(latestDate) || created.isEqual(latestDate)) {
+                            break outer;
+                        } else {
+                            break;
+                        }
+                    } catch (HttpStatusException hse) {
+                        generateIkoaToken(ikoaConstantMap, proxy);
+                    } catch (Exception e) {
+                        log.error("jsoup IKOA DownloadLog异常,ikoaConstantMap={},result={}", ikoaConstantMap, result.toString(), e);
+                        if (k == 2) {
+                            throw new Exception("jsoup IKOA DownloadLog异常!");
+                        }
+                    }
+                }
+            }
+        }
+
+        List<CrawlerIkoaDownloadLog> ikoaDownloadLogList = new ArrayList<>();
+        int currentPageIndex = 0;
+        for (Object o : ja) {
+            JSONObject jo = (JSONObject) o;
+            ++currentPageIndex;
+
+            LocalDateTime created = LocalDateTime.parse(jo.getString("created"), DateUtils.dateTimeFormatter2);
+            if (created.isAfter(latestDate)) {
+                CrawlerIkoaDownloadLog crawlerIkoaDownloadLog = new CrawlerIkoaDownloadLog();
+                crawlerIkoaDownloadLog.setCid(jo.getString("product_id"));
+                crawlerIkoaDownloadLog.setPackageImage(jo.getString("package_image"));
+                crawlerIkoaDownloadLog.setUa(jo.getString("ua"));
+                crawlerIkoaDownloadLog.setStatus(jo.getInteger("status"));
+                crawlerIkoaDownloadLog.setCreateTime(LocalDateTime.parse(jo.getString("created"), DateUtils.dateTimeFormatter2));
+                crawlerIkoaDownloadLog.setModifyTime(LocalDateTime.parse(jo.getString("updated"), DateUtils.dateTimeFormatter2));
+                ikoaDownloadLogList.add(crawlerIkoaDownloadLog);
+                log.warn("jsoupIkoaMovieDownloadLog success:currentPageIndex={},cid={}", currentPageIndex, crawlerIkoaDownloadLog.getCid());
+            }
+        }
+
+        if (ikoaDownloadLogList.size() > 0) {
+            crawlerIkoaDownloadLogMapper.insertList(ikoaDownloadLogList);
+        }
+
+        stopWatch.stop();
+        log.warn("jsoupIkoaMovieDownloadLog 结束:insertTotalNum={},耗时={}", ikoaDownloadLogList.size(), stopWatch.getTotalTimeSeconds());
+
+        JSONObject jsonObject = new JSONObject();
+        jsonObject.put("insertTotalNum", ikoaDownloadLogList.size());
+        jsonObject.put("time", stopWatch.getTotalTimeSeconds());
+        return jsonObject;
+    }
+
     private void parseIkoaMovieDownloadLog(JSONArray result, StringBuffer sb, String sort) {
         sb.append("total:".concat(String.valueOf(result.size())));
         sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>序号</th><th>cid</th><th>封面</th><th>User-Agent</th><th>影片当前状态</th><th>下载记录创建时间</th><th>最后一次修改时间</th></tr>");

+ 2 - 0
src/main/java/top/lvzhiqiang/util/DateUtils.java

@@ -45,6 +45,8 @@ public class DateUtils {
 
 	public static final DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern(PATTERN_TO_SECONDS);
 
+	public static final DateTimeFormatter dateTimeFormatter2 = DateTimeFormatter.ISO_OFFSET_DATE_TIME;
+
 	/**
 	* @Title: getToday 
 	* @Description: 获取今天的日期

+ 12 - 0
src/main/resources/static/crawler.html

@@ -107,6 +107,18 @@
         </form>
     </div>
     <div style="margin-right:20px;">
+        <span class="font">jsoupIkoaMovieDownloadLog</span>
+        <form method="post" action="bg/crawler/jsoupIkoaMovieDownloadLog">
+            <span>status</span>
+            <input type="text" name="status" placeholder="1:未爬取,3:爬取失败。默认是未爬取" style="width: 300px;"/>
+            <span>isDel</span>
+            <input type="text" name="isDel" placeholder="1:是,2:否。默认否"/>
+            <span>ignoreRetryCount</span>
+            <input type="text" name="ignoreRetryCount" placeholder="1:是,2:否。默认否"/>
+            <input type="submit" value="提交">
+        </form>
+    </div>
+    <div style="margin-right:20px;">
         <span class="font">jsoupIkoaMovie4VideoInfo</span>
         <form method="post" action="bg/crawler/jsoupIkoaMovie4VideoInfo">
             <span>码率</span>

+ 5 - 0
src/test/java/Test.java

@@ -9,6 +9,7 @@ import top.lvzhiqiang.util.DateUtils;
 import java.io.*;
 import java.time.LocalDate;
 import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
 
 public class Test {
     public static void main(String[] args) {
@@ -53,5 +54,9 @@ public class Test {
 
 
         System.out.println((int)(0 + Math.random() * (2 - 0)));
+
+
+        LocalDateTime parse = LocalDateTime.parse("2022-09-16T16:29:04.467000+08:00", DateTimeFormatter.ISO_OFFSET_DATE_TIME);
+        System.out.println(parse);
     }
 }