|
|
@@ -12,10 +12,12 @@ import org.springframework.stereotype.Service;
|
|
|
import org.springframework.transaction.annotation.Propagation;
|
|
|
import org.springframework.transaction.annotation.Transactional;
|
|
|
import org.springframework.util.StopWatch;
|
|
|
+import top.lvzhiqiang.entity.CrawlerIkoaDownloadLog;
|
|
|
import top.lvzhiqiang.entity.CrawlerXiaoeknowCourse;
|
|
|
import top.lvzhiqiang.entity.DicCode;
|
|
|
import top.lvzhiqiang.entity.Temp4k;
|
|
|
import top.lvzhiqiang.exception.BusinessException;
|
|
|
+import top.lvzhiqiang.mapper.CrawlerIkoaDownloadLogMapper;
|
|
|
import top.lvzhiqiang.mapper.CrawlerXiaoeknowCourseMapper;
|
|
|
import top.lvzhiqiang.mapper.DicCodeMapper;
|
|
|
import top.lvzhiqiang.mapper.TempMapper;
|
|
|
@@ -45,6 +47,8 @@ public class CrawlerServiceImpl implements CrawlerService {
|
|
|
@Resource
|
|
|
private CrawlerXiaoeknowCourseMapper crawlerXiaoeknowCourseMapper;
|
|
|
@Resource
|
|
|
+ private CrawlerIkoaDownloadLogMapper crawlerIkoaDownloadLogMapper;
|
|
|
+ @Resource
|
|
|
private DicCodeMapper dicCodeMapper;
|
|
|
@Resource
|
|
|
private TempMapper tempMapper;
|
|
|
@@ -510,6 +514,147 @@ public class CrawlerServiceImpl implements CrawlerService {
|
|
|
return sb.toString();
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Jsoup IkoaMovieDownloadLog
|
|
|
+ *
|
|
|
+ * @author lvzhiqiang
|
|
|
+ * 2022/10/10 15:37
|
|
|
+ */
|
|
|
+ //@Async
|
|
|
+ @Override
|
|
|
+ @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
+ public JSONObject jsoupIkoaMovieDownloadLog(Integer status, Integer isDel, Integer ignoreRetryCount) throws Exception {
|
|
|
+ log.warn("jsoupIkoaMovieDownloadLog 开始:status={},isDel={},ignoreRetryCount={}", status, isDel, ignoreRetryCount);
|
|
|
+ StopWatch stopWatch = new StopWatch();
|
|
|
+ stopWatch.start();
|
|
|
+ if (isDel == 1) {
|
|
|
+ crawlerIkoaDownloadLogMapper.deleteAll();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 获取最新的一条
|
|
|
+ CrawlerIkoaDownloadLog latestInfo = crawlerIkoaDownloadLogMapper.findLatestInfo();
|
|
|
+ LocalDateTime latestDate = latestInfo == null ? LocalDateTime.of(1970, 1, 1, 0, 0, 0) : latestInfo.getCreateTime();
|
|
|
+
|
|
|
+ // 获取ikoa常量MAP
|
|
|
+ Map<String, String> ikoaConstantMap = dicCodeMapper.findAll().stream()
|
|
|
+ .filter(x -> "ikoa".equals(x.getCodeDesc()) && x.getEnv().contains(env))
|
|
|
+ .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
|
+ // 代理及TOKEN设置
|
|
|
+ Proxy proxy;
|
|
|
+ if ("dev".equals(env)) {
|
|
|
+ proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress("127.0.0.1", 1080));
|
|
|
+ } else {
|
|
|
+ proxy = Proxy.NO_PROXY;
|
|
|
+ }
|
|
|
+ if (StringUtils.isEmpty(ikoaToken)) {
|
|
|
+ generateIkoaToken(ikoaConstantMap, proxy);
|
|
|
+ }
|
|
|
+
|
|
|
+ Connection.Response response;
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
+ JSONObject result = null;
|
|
|
+
|
|
|
+ // 查全部
|
|
|
+ JSONArray ja = new JSONArray();
|
|
|
+ // 查第一页
|
|
|
+ int totalPage = 0;
|
|
|
+ for (int i = 0; i < 3; i++) {
|
|
|
+ try {
|
|
|
+ response = Jsoup.connect(ikoaConstantMap.get("download_log_url"))
|
|
|
+ .header("Authorization", "Token " + ikoaToken)
|
|
|
+ .timeout(50000)
|
|
|
+ .proxy(proxy)
|
|
|
+ .data("page", "1")
|
|
|
+ .ignoreContentType(true)
|
|
|
+ .userAgent(getUserAgent())
|
|
|
+ .header("referer", ikoaConstantMap.get("referer"))
|
|
|
+ .method(Connection.Method.GET)
|
|
|
+ .execute();
|
|
|
+ result = JSONObject.parseObject(response.body());
|
|
|
+ if (isDel == 1) {
|
|
|
+ ja.addAll(result.getJSONArray("data"));
|
|
|
+ }
|
|
|
+ totalPage = result.getInteger("total_page");
|
|
|
+ break;
|
|
|
+ } catch (HttpStatusException hse) {
|
|
|
+ generateIkoaToken(ikoaConstantMap, proxy);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("jsoup IKOA DownloadLog异常,ikoaConstantMap={},result={}", ikoaConstantMap, result.toString(), e);
|
|
|
+ if (i == 2) {
|
|
|
+ throw new Exception("jsoup IKOA DownloadLog异常!");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 查后面的
|
|
|
+ if (totalPage > 1) {
|
|
|
+ outer:
|
|
|
+ for (int j = totalPage; j <= 2; j--) {
|
|
|
+ for (int k = 0; k < 3; k++) {
|
|
|
+ try {
|
|
|
+ response = Jsoup.connect(ikoaConstantMap.get("download_log_url"))
|
|
|
+ .header("Authorization", "Token " + ikoaToken)
|
|
|
+ .timeout(50000)
|
|
|
+ .proxy(proxy)
|
|
|
+ .data("page", String.valueOf(j))
|
|
|
+ .ignoreContentType(true)
|
|
|
+ .userAgent(getUserAgent())
|
|
|
+ .header("referer", ikoaConstantMap.get("referer"))
|
|
|
+ .method(Connection.Method.GET)
|
|
|
+ .execute();
|
|
|
+ result = JSONObject.parseObject(response.body());
|
|
|
+ ja.addAll(result.getJSONArray("data"));
|
|
|
+
|
|
|
+ LocalDateTime created = LocalDateTime.parse(result.getJSONArray("data").getJSONObject(0).getString("created"), DateUtils.dateTimeFormatter2);
|
|
|
+ if (created.isBefore(latestDate) || created.isEqual(latestDate)) {
|
|
|
+ break outer;
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ } catch (HttpStatusException hse) {
|
|
|
+ generateIkoaToken(ikoaConstantMap, proxy);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("jsoup IKOA DownloadLog异常,ikoaConstantMap={},result={}", ikoaConstantMap, result.toString(), e);
|
|
|
+ if (k == 2) {
|
|
|
+ throw new Exception("jsoup IKOA DownloadLog异常!");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ List<CrawlerIkoaDownloadLog> ikoaDownloadLogList = new ArrayList<>();
|
|
|
+ int currentPageIndex = 0;
|
|
|
+ for (Object o : ja) {
|
|
|
+ JSONObject jo = (JSONObject) o;
|
|
|
+ ++currentPageIndex;
|
|
|
+
|
|
|
+ LocalDateTime created = LocalDateTime.parse(jo.getString("created"), DateUtils.dateTimeFormatter2);
|
|
|
+ if (created.isAfter(latestDate)) {
|
|
|
+ CrawlerIkoaDownloadLog crawlerIkoaDownloadLog = new CrawlerIkoaDownloadLog();
|
|
|
+ crawlerIkoaDownloadLog.setCid(jo.getString("product_id"));
|
|
|
+ crawlerIkoaDownloadLog.setPackageImage(jo.getString("package_image"));
|
|
|
+ crawlerIkoaDownloadLog.setUa(jo.getString("ua"));
|
|
|
+ crawlerIkoaDownloadLog.setStatus(jo.getInteger("status"));
|
|
|
+ crawlerIkoaDownloadLog.setCreateTime(LocalDateTime.parse(jo.getString("created"), DateUtils.dateTimeFormatter2));
|
|
|
+ crawlerIkoaDownloadLog.setModifyTime(LocalDateTime.parse(jo.getString("updated"), DateUtils.dateTimeFormatter2));
|
|
|
+ ikoaDownloadLogList.add(crawlerIkoaDownloadLog);
|
|
|
+ log.warn("jsoupIkoaMovieDownloadLog success:currentPageIndex={},cid={}", currentPageIndex, crawlerIkoaDownloadLog.getCid());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ikoaDownloadLogList.size() > 0) {
|
|
|
+ crawlerIkoaDownloadLogMapper.insertList(ikoaDownloadLogList);
|
|
|
+ }
|
|
|
+
|
|
|
+ stopWatch.stop();
|
|
|
+ log.warn("jsoupIkoaMovieDownloadLog 结束:insertTotalNum={},耗时={}", ikoaDownloadLogList.size(), stopWatch.getTotalTimeSeconds());
|
|
|
+
|
|
|
+ JSONObject jsonObject = new JSONObject();
|
|
|
+ jsonObject.put("insertTotalNum", ikoaDownloadLogList.size());
|
|
|
+ jsonObject.put("time", stopWatch.getTotalTimeSeconds());
|
|
|
+ return jsonObject;
|
|
|
+ }
|
|
|
+
|
|
|
private void parseIkoaMovieDownloadLog(JSONArray result, StringBuffer sb, String sort) {
|
|
|
sb.append("total:".concat(String.valueOf(result.size())));
|
|
|
sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>序号</th><th>cid</th><th>封面</th><th>User-Agent</th><th>影片当前状态</th><th>下载记录创建时间</th><th>最后一次修改时间</th></tr>");
|