MyJobs.java 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. package top.lvzhiqiang.config;
  2. import lombok.extern.slf4j.Slf4j;
  3. import org.jsoup.Jsoup;
  4. import org.jsoup.nodes.Document;
  5. import org.jsoup.nodes.Element;
  6. import org.jsoup.select.Elements;
  7. import org.springframework.scheduling.annotation.Scheduled;
  8. import org.springframework.stereotype.Component;
  9. import org.springframework.transaction.annotation.Propagation;
  10. import org.springframework.transaction.annotation.Transactional;
  11. import top.lvzhiqiang.entity.DicCode;
  12. import top.lvzhiqiang.entity.VideoSitePool;
  13. import top.lvzhiqiang.mapper.VideoSitePoolMapper;
  14. import top.lvzhiqiang.service.BgService;
  15. import top.lvzhiqiang.service.Crawler4JavbusService;
  16. import top.lvzhiqiang.service.Crawler4LoveFootService;
  17. import top.lvzhiqiang.service.CrawlerService;
  18. import javax.annotation.Resource;
  19. import java.util.ArrayList;
  20. import java.util.HashSet;
  21. import java.util.List;
  22. import java.util.Set;
  23. import java.util.stream.Collectors;
  24. /**
  25. * 定时任务
  26. *
  27. * @author lvzhiqiang
  28. * 2022/4/28 15:49
  29. */
  30. @Component
  31. @Slf4j
  32. public class MyJobs {
  33. @Resource
  34. private VideoSitePoolMapper videoSitePoolMapper;
  35. @Resource
  36. private BgService bgService;
  37. @Resource
  38. private CrawlerService crawlerService;
  39. @Resource
  40. private Crawler4JavbusService crawler4JavbusService;
  41. @Resource
  42. private Crawler4LoveFootService crawler4LoveFootService;
  43. private static final String SCHEDULED_ZONE = "Asia/Shanghai";
  44. /**
  45. * 每天06:00 校验站点有效性
  46. */
  47. @Scheduled(cron = "0 0 6 * * ?", zone = SCHEDULED_ZONE)
  48. //@Scheduled(cron = "0 10 19 * * ?",zone = SCHEDULED_ZONE)
  49. @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
  50. public void checkVideoSite() {
  51. log.warn("checkVideoSite开始==============================");
  52. // 获取javbus官方地址
  53. DicCode dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 2 == x.getType() && "javbus".equals(x.getCodeKey())).findFirst().get();
  54. if (dicCode == null) {
  55. log.warn("javbus官方站点为Null");
  56. return;
  57. }
  58. // 获取javbusUrlList
  59. List<String> javbusUrlList = videoSitePoolMapper.findUrlByType(1);
  60. // 获取javbusNewUrlList
  61. Set<String> javbusNewUrlList = new HashSet<>();
  62. try {
  63. Document document = Jsoup.connect(dicCode.getCodeValue()).timeout(50000).ignoreContentType(true)
  64. .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
  65. .header("referer", "https://www.javbus.com/").get();
  66. Elements ahrefList = document.select("strong:contains(防屏蔽地址)").next("a");
  67. for (Element element : ahrefList) {
  68. String text = element.text();
  69. log.warn("Jsoup获取{}防屏蔽地址:{}", dicCode.getCodeValue(), text);
  70. javbusNewUrlList.add(text);
  71. }
  72. } catch (Exception e) {
  73. log.error("Jsoup获取{}防屏蔽地址异常", dicCode.getCodeValue(), e);
  74. }
  75. if (javbusNewUrlList.size() == 0 && javbusUrlList.size() > 0) {
  76. for (String javbusUrl : javbusUrlList) {
  77. try {
  78. Document document = Jsoup.connect(javbusUrl).timeout(50000).ignoreContentType(true)
  79. .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
  80. .header("referer", "https://www.javbus.com/").get();
  81. Elements ahrefList = document.select("strong:contains(防屏蔽地址)").next("a");
  82. for (Element element : ahrefList) {
  83. String text = element.text();
  84. log.info("Jsoup获取{}防屏蔽地址:{}", javbusUrl, text);
  85. javbusNewUrlList.add(text);
  86. }
  87. if (javbusNewUrlList.size() > 0) {
  88. break;
  89. }
  90. } catch (Exception e) {
  91. log.error("Jsoup获取{}防屏蔽地址异常", javbusUrl, e);
  92. }
  93. }
  94. }
  95. if (javbusNewUrlList.size() == 0) {
  96. log.warn("javbusNewUrlList为空");
  97. }
  98. if (javbusNewUrlList.size() == 0 && javbusUrlList.size() == 0) {
  99. log.warn("javbusUrlList和javbusNewUrlList为空");
  100. return;
  101. }
  102. // 校验新地址
  103. List<String> javbusNewUrlFinalList = javbusNewUrlList.stream().filter(e -> !javbusUrlList.contains(e)).collect(Collectors.toList());
  104. List<VideoSitePool> videoSitePoolList = new ArrayList<>();
  105. VideoSitePool videoSitePool;
  106. for (String javbusNewUrlFinal : javbusNewUrlFinalList) {
  107. try {
  108. Jsoup.connect(javbusNewUrlFinal).timeout(50000);
  109. videoSitePool = new VideoSitePool();
  110. videoSitePool.setUrl(javbusNewUrlFinal);
  111. videoSitePool.setType(1);
  112. videoSitePoolList.add(videoSitePool);
  113. log.warn("javbusNewUrlFinalList:javbus防屏蔽地址有效!javbusUrl={}", javbusNewUrlFinal);
  114. } catch (Exception e) {
  115. log.error("javbusNewUrlFinalList:javbus防屏蔽地址失效!javbusUrl={}", javbusNewUrlFinal, e);
  116. }
  117. }
  118. if (videoSitePoolList.size() > 0) {
  119. videoSitePoolMapper.insertList(videoSitePoolList);
  120. }
  121. // 校验存量地址
  122. for (String javbusUrl : javbusUrlList) {
  123. int deleteFlag = 1;
  124. try {
  125. Jsoup.connect(javbusUrl).timeout(50000);
  126. log.warn("javbusUrlList:javbus防屏蔽地址有效!javbusUrl={}", javbusUrl);
  127. } catch (Exception e) {
  128. deleteFlag = 2;
  129. log.error("javbusUrlList:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl, e);
  130. }
  131. videoSitePoolMapper.updateDeleteFlag(javbusUrl, deleteFlag);
  132. }
  133. log.warn("checkVideoSite结束==============================");
  134. }
  135. /**
  136. * 每天20:00 Jsoup码池
  137. */
  138. @Scheduled(cron = "0 00 20 * * ?", zone = SCHEDULED_ZONE)
  139. public void jsoupIcodePool4CrawingNo() {
  140. log.warn("jsoupIcodePool4CrawingNo开始==============================");
  141. bgService.jsoupIcodePool(1, 2, 2);
  142. }
  143. /**
  144. * 每天20:30 Jsoup码池
  145. */
  146. @Scheduled(cron = "0 30 20 * * ?", zone = SCHEDULED_ZONE)
  147. public void jsoupIcodePool4CrawingFail() {
  148. log.warn("jsoupIcodePool4CrawingFail开始==============================");
  149. bgService.jsoupIcodePool(3, 2, 2);
  150. }
  151. /**
  152. * 每天18:00 jsoupXiaoeknowCourse
  153. */
  154. @Scheduled(cron = "0 00 18 * * ?", zone = SCHEDULED_ZONE)
  155. @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
  156. public void jsoupXiaoeknowCourse() {
  157. log.warn("jsoupXiaoeknowCourse开始==============================");
  158. crawlerService.jsoupXiaoeknowCourse(null, 2, null);
  159. }
  160. /**
  161. * 每隔一小时执行一次 jsoupJavbusProfile
  162. */
  163. @Scheduled(cron = "0 0 * * * ?", zone = SCHEDULED_ZONE)
  164. public void jsoupJavbusProfileJob() throws Exception {
  165. log.warn("jsoupJavbusProfileJob开始==============================");
  166. crawler4JavbusService.jsoupJavbusProfile(null, null);
  167. }
  168. /**
  169. * 每天23:50 jsoupLoveFoot
  170. */
  171. @Scheduled(cron = "0 50 23 * * ?", zone = SCHEDULED_ZONE)
  172. public void jsoupLoveFoot() throws Exception {
  173. log.warn("jsoupLoveFoot开始==============================");
  174. crawler4LoveFootService.jsoupLoveFoot4avnoashi(null, 2, 2);
  175. }
  176. /**
  177. * 每天23:55 jsoupLoveFoot4CrawingFail
  178. */
  179. // @Scheduled(cron = "0 55 23 * * ?", zone = SCHEDULED_ZONE)
  180. public void jsoupLoveFoot4CrawingFail() {
  181. log.warn("jsoupLoveFoot4CrawingFail开始==============================");
  182. crawler4LoveFootService.jsoupLoveFoot4CrawingFail(4, 2);
  183. }
  184. }