| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- package top.lvzhiqiang.config;
- import lombok.extern.slf4j.Slf4j;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- import org.springframework.scheduling.annotation.Scheduled;
- import org.springframework.stereotype.Component;
- import org.springframework.transaction.annotation.Propagation;
- import org.springframework.transaction.annotation.Transactional;
- import top.lvzhiqiang.entity.DicCode;
- import top.lvzhiqiang.entity.VideoSitePool;
- import top.lvzhiqiang.mapper.VideoSitePoolMapper;
- import top.lvzhiqiang.service.*;
- import javax.annotation.Resource;
- import java.util.ArrayList;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Set;
- import java.util.stream.Collectors;
- /**
- * 定时任务
- *
- * @author lvzhiqiang
- * 2022/4/28 15:49
- */
- @Component
- @Slf4j
- public class MyJavJobs {
- @Resource
- private VideoSitePoolMapper videoSitePoolMapper;
- @Resource
- private BgService bgService;
- @Resource
- private CrawlerService crawlerService;
- @Resource
- private Crawler4JavbusService crawler4JavbusService;
- @Resource
- private Crawler4JavdbService crawler4JavdbService;
- @Resource
- private Crawler4LoveFootService crawler4LoveFootService;
- private static final String SCHEDULED_ZONE = "Asia/Shanghai";
- /**
- * 每天06:00 校验站点有效性
- */
- @Scheduled(cron = "0 0 6 * * ?", zone = SCHEDULED_ZONE)
- //@Scheduled(cron = "0 10 19 * * ?",zone = SCHEDULED_ZONE)
- @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
- public void checkVideoSite() {
- log.warn("checkVideoSite开始==============================");
- // 获取javbus官方地址
- DicCode dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 2 == x.getType() && "javbus".equals(x.getCodeKey())).findFirst().get();
- if (dicCode == null) {
- log.warn("javbus官方站点为Null");
- return;
- }
- // 获取javbusUrlList
- List<String> javbusUrlList = videoSitePoolMapper.findUrlByType(1);
- // 获取javbusNewUrlList
- Set<String> javbusNewUrlList = new HashSet<>();
- try {
- Document document = Jsoup.connect(dicCode.getCodeValue()).timeout(50000).ignoreContentType(true)
- .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
- .header("referer", "https://www.javbus.com/").get();
- Elements ahrefList = document.select("strong:contains(防屏蔽地址)").next("a");
- for (Element element : ahrefList) {
- String text = element.text();
- log.warn("Jsoup获取{}防屏蔽地址:{}", dicCode.getCodeValue(), text);
- javbusNewUrlList.add(text);
- }
- } catch (Exception e) {
- log.error("Jsoup获取{}防屏蔽地址异常", dicCode.getCodeValue(), e);
- }
- if (javbusNewUrlList.size() == 0) {
- log.warn("javbusNewUrlList为空");
- }
- if (javbusNewUrlList.size() == 0 && javbusUrlList.size() == 0) {
- log.warn("javbusUrlList和javbusNewUrlList为空");
- return;
- }
- // 校验新地址
- List<String> javbusNewUrlFinalList = javbusNewUrlList.stream().filter(e -> !javbusUrlList.contains(e)).collect(Collectors.toList());
- List<VideoSitePool> videoSitePoolList = new ArrayList<>();
- VideoSitePool videoSitePool;
- for (String javbusNewUrlFinal : javbusNewUrlFinalList) {
- try {
- Jsoup.connect(javbusNewUrlFinal).timeout(50000);
- videoSitePool = new VideoSitePool();
- videoSitePool.setUrl(javbusNewUrlFinal);
- videoSitePool.setType(1);
- videoSitePoolList.add(videoSitePool);
- log.warn("javbusNewUrlFinalList:javbus防屏蔽地址有效!javbusUrl={}", javbusNewUrlFinal);
- } catch (Exception e) {
- log.error("javbusNewUrlFinalList:javbus防屏蔽地址失效!javbusUrl={}", javbusNewUrlFinal, e);
- }
- }
- if (videoSitePoolList.size() > 0) {
- videoSitePoolMapper.insertList(videoSitePoolList);
- }
- // 校验存量地址
- for (String javbusUrl : javbusUrlList) {
- int deleteFlag = 1;
- try {
- Jsoup.connect(javbusUrl).timeout(50000);
- log.warn("javbusUrlList:javbus防屏蔽地址有效!javbusUrl={}", javbusUrl);
- } catch (Exception e) {
- deleteFlag = 2;
- log.error("javbusUrlList:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl, e);
- }
- videoSitePoolMapper.updateDeleteFlag(javbusUrl, deleteFlag);
- }
- log.warn("checkVideoSite结束==============================");
- }
- /**
- * 每天20:00 Jsoup码池
- */
- @Scheduled(cron = "0 00 20 * * ?", zone = SCHEDULED_ZONE)
- public void jsoupIcodePool4CrawingNo() {
- log.warn("jsoupIcodePool4CrawingNo开始==============================");
- bgService.jsoupIcodePool("javbus", 1, 2, 2);
- }
- /**
- * 每天20:30 Jsoup码池
- */
- //@Scheduled(cron = "0 30 20 * * ?", zone = SCHEDULED_ZONE)
- public void jsoupIcodePool4CrawingFail() {
- log.warn("jsoupIcodePool4CrawingFail开始==============================");
- bgService.jsoupIcodePool("javdb", 3, 2, 2);
- }
- /**
- * 每天18:00 jsoupXiaoeknowCourse
- */
- @Scheduled(cron = "0 00 18 * * ?", zone = SCHEDULED_ZONE)
- @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
- public void jsoupXiaoeknowCourse() {
- log.warn("jsoupXiaoeknowCourse开始==============================");
- crawlerService.jsoupXiaoeknowCourse(null, 2, null);
- }
- /**
- * 每隔一小时执行一次 jsoupJavbusProfile
- */
- // @Scheduled(cron = "0 0 * * * ?", zone = SCHEDULED_ZONE)
- public void jsoupJavbusProfileJob() throws Exception {
- log.warn("jsoupJavbusProfileJob开始==============================");
- crawler4JavbusService.jsoupJavbusProfile(null, null);
- }
- /**
- * 每天23:50 jsoupLoveFoot
- */
- @Scheduled(cron = "0 50 23 * * ?", zone = SCHEDULED_ZONE)
- public void jsoupLoveFoot() throws Exception {
- log.warn("jsoupLoveFoot开始==============================");
- crawler4LoveFootService.jsoupLoveFoot4avnoashi(null, 2, 2);
- crawler4LoveFootService.jsoupLoveFoot4jpfoot(null, 2, 2);
- crawler4LoveFootService.jsoupLoveFoot4feetpassion(null, 2, 2);
- }
- /**
- * 每天23:55 jsoupLoveFoot4CrawingFail
- */
- //@Scheduled(cron = "0 55 23 * * ?", zone = SCHEDULED_ZONE)
- public void jsoupLoveFoot4CrawingFail() {
- log.warn("jsoupLoveFoot4CrawingFail开始==============================");
- crawler4LoveFootService.jsoupLoveFoot4CrawingFail(4, 2, "javbus", null);
- }
- }
|