|
|
@@ -3,6 +3,7 @@ package top.lvzhiqiang.service.impl;
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
import com.github.pagehelper.PageHelper;
|
|
|
import com.github.pagehelper.PageInfo;
|
|
|
+import com.xxl.job.core.context.XxlJobHelper;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.jsoup.Connection;
|
|
|
import org.jsoup.HttpStatusException;
|
|
|
@@ -13,12 +14,17 @@ import org.jsoup.select.Elements;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.scheduling.annotation.Async;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.transaction.annotation.Propagation;
|
|
|
+import org.springframework.transaction.annotation.Transactional;
|
|
|
import org.springframework.util.StopWatch;
|
|
|
+import top.lvzhiqiang.config.WebAppConfig;
|
|
|
import top.lvzhiqiang.entity.CrawlerJavbusLog;
|
|
|
import top.lvzhiqiang.entity.CrawlerJavbusProfile;
|
|
|
import top.lvzhiqiang.entity.DicCode;
|
|
|
+import top.lvzhiqiang.entity.VideoSitePool;
|
|
|
import top.lvzhiqiang.mapper.CrawlerJavbusProfileMapper;
|
|
|
import top.lvzhiqiang.mapper.DicCodeMapper;
|
|
|
+import top.lvzhiqiang.mapper.VideoSitePoolMapper;
|
|
|
import top.lvzhiqiang.service.Crawler4JavbusService;
|
|
|
import top.lvzhiqiang.util.DateUtils;
|
|
|
import top.lvzhiqiang.util.JsoupUtil;
|
|
|
@@ -48,6 +54,8 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
|
|
|
@Resource
|
|
|
private DicCodeMapper dicCodeMapper;
|
|
|
@Resource
|
|
|
+ private VideoSitePoolMapper videoSitePoolMapper;
|
|
|
+ @Resource
|
|
|
private CrawlerJavbusProfileMapper crawlerJavbusProfileMapper;
|
|
|
@Value("${spring.profiles.active}")
|
|
|
private String env;
|
|
|
@@ -96,6 +104,88 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
|
|
|
return javbusCookiesMap;
|
|
|
}
|
|
|
|
|
|
+ @Override
|
|
|
+ @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
+ public void checkJavbusVideoSite() {
|
|
|
+ XxlJobHelper.log("checkVideoSite开始==============================");
|
|
|
+
|
|
|
+ // 获取javbus官方地址
|
|
|
+ DicCode dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 2 == x.getType() && "javbus".equals(x.getCodeKey())).findFirst().get();
|
|
|
+ if (dicCode == null) {
|
|
|
+ XxlJobHelper.log("javbus官方站点为Null");
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 获取javbusUrlList
|
|
|
+ List<String> javbusUrlList = videoSitePoolMapper.findUrlByType(1);
|
|
|
+
|
|
|
+ // 获取javbusNewUrlList
|
|
|
+ Set<String> javbusNewUrlList = new HashSet<>();
|
|
|
+ try {
|
|
|
+ Document document = Jsoup.connect(dicCode.getCodeValue()).timeout(50000).ignoreContentType(true)
|
|
|
+ .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
|
|
|
+ .header("referer", "https://www.javbus.com/").get();
|
|
|
+
|
|
|
+ Elements ahrefList = document.select("strong:contains(防屏蔽地址)").next("a");
|
|
|
+ for (Element element : ahrefList) {
|
|
|
+ String text = element.text();
|
|
|
+ XxlJobHelper.log("Jsoup获取{}防屏蔽地址:{}", dicCode.getCodeValue(), text);
|
|
|
+ javbusNewUrlList.add(text);
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("Jsoup获取{}防屏蔽地址异常", dicCode.getCodeValue(), e);
|
|
|
+ XxlJobHelper.log("Jsoup获取{}防屏蔽地址异常", dicCode.getCodeValue());
|
|
|
+ XxlJobHelper.log(e);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (javbusNewUrlList.size() == 0) {
|
|
|
+ XxlJobHelper.log("javbusNewUrlList为空");
|
|
|
+ }
|
|
|
+ if (javbusNewUrlList.size() == 0 && javbusUrlList.size() == 0) {
|
|
|
+ XxlJobHelper.log("javbusUrlList和javbusNewUrlList为空");
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 校验新地址
|
|
|
+ List<String> javbusNewUrlFinalList = javbusNewUrlList.stream().filter(e -> !javbusUrlList.contains(e)).collect(Collectors.toList());
|
|
|
+ List<VideoSitePool> videoSitePoolList = new ArrayList<>();
|
|
|
+ VideoSitePool videoSitePool;
|
|
|
+ for (String javbusNewUrlFinal : javbusNewUrlFinalList) {
|
|
|
+ try {
|
|
|
+ Jsoup.connect(javbusNewUrlFinal).timeout(50000);
|
|
|
+
|
|
|
+ videoSitePool = new VideoSitePool();
|
|
|
+ videoSitePool.setUrl(javbusNewUrlFinal);
|
|
|
+ videoSitePool.setType(1);
|
|
|
+ videoSitePoolList.add(videoSitePool);
|
|
|
+ XxlJobHelper.log("javbusNewUrlFinalList:javbus防屏蔽地址有效!javbusUrl={}", javbusNewUrlFinal);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("javbusNewUrlFinalList:javbus防屏蔽地址失效!javbusUrl={}", javbusNewUrlFinal, e);
|
|
|
+ XxlJobHelper.log("javbusNewUrlFinalList:javbus防屏蔽地址失效!javbusUrl={}", javbusNewUrlFinal);
|
|
|
+ XxlJobHelper.log(e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (videoSitePoolList.size() > 0) {
|
|
|
+ videoSitePoolMapper.insertList(videoSitePoolList);
|
|
|
+ }
|
|
|
+ // 校验存量地址
|
|
|
+ for (String javbusUrl : javbusUrlList) {
|
|
|
+ int deleteFlag = 1;
|
|
|
+ try {
|
|
|
+ Jsoup.connect(javbusUrl).timeout(50000);
|
|
|
+ XxlJobHelper.log("javbusUrlList:javbus防屏蔽地址有效!javbusUrl={}", javbusUrl);
|
|
|
+ } catch (Exception e) {
|
|
|
+ deleteFlag = 2;
|
|
|
+ log.error("javbusUrlList:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl, e);
|
|
|
+ XxlJobHelper.log("javbusUrlList:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl);
|
|
|
+ XxlJobHelper.log(e);
|
|
|
+ }
|
|
|
+ videoSitePoolMapper.updateDeleteFlag(javbusUrl, deleteFlag);
|
|
|
+ }
|
|
|
+
|
|
|
+ XxlJobHelper.log("checkVideoSite结束==============================");
|
|
|
+ }
|
|
|
+
|
|
|
@Async
|
|
|
@Override
|
|
|
public void jsoupJavbusProfile(Long start, Integer limit) throws Exception {
|