|
@@ -195,6 +195,63 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
log.warn("jjsoupLoveFoot4CrawingFail 结束:totalCount={},successCount={},time={}", loveFootList.size(), successCount, stopWatch.getTotalTimeSeconds());
|
|
log.warn("jjsoupLoveFoot4CrawingFail 结束:totalCount={},successCount={},time={}", loveFootList.size(), successCount, stopWatch.getTotalTimeSeconds());
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ @Async
|
|
|
|
|
+ @Override
|
|
|
|
|
+ @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
|
|
+ public void jsoupLoveFoot4CrawingFail2(Integer status, Integer ignoreRetryCount, String website, String identificationCode) {
|
|
|
|
|
+ log.warn("jsoupLoveFoot4CrawingFail2 开始");
|
|
|
|
|
+ StopWatch stopWatch = new StopWatch();
|
|
|
|
|
+ stopWatch.start();
|
|
|
|
|
+
|
|
|
|
|
+ // 获取待抓取码列表
|
|
|
|
|
+ List<CrawlerLoveFoot> loveFootList = crawlerLoveFootMapper.findByCodeAndType(identificationCode, null, null);
|
|
|
|
|
+
|
|
|
|
|
+ if (loveFootList.size() == 0) {
|
|
|
|
|
+ log.warn("loveFootList为空");
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ log.warn("jsoupLoveFoot4CrawingFail2 loveFootList size={}", loveFootList.size());
|
|
|
|
|
+
|
|
|
|
|
+ List<DicCode> dicCodeList = dicCodeMapper.findAll();
|
|
|
|
|
+ // 获取常量MAP
|
|
|
|
|
+ javbusConstantMap = dicCodeList.stream()
|
|
|
|
|
+ .filter(x -> x.getType() != null && 1 == x.getType() && x.getEnv().contains(env))
|
|
|
|
|
+ .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
|
|
|
+ javdbConstantMap = dicCodeList.stream()
|
|
|
|
|
+ .filter(x -> x.getType() != null && 2 == x.getType() && x.getEnv().contains(env))
|
|
|
|
|
+ .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
|
|
|
+
|
|
|
|
|
+ // 获取javbus防屏蔽地址
|
|
|
|
|
+ if ("javbus".equals(website)) {
|
|
|
|
|
+ javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
|
|
|
|
|
+ if (javbusUrlList.size() == 0) {
|
|
|
|
|
+ log.warn("javbusUrlList为空");
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 代理及TOKEN设置
|
|
|
|
|
+ beforeProxy();
|
|
|
|
|
+ // 解析原始站点
|
|
|
|
|
+
|
|
|
|
|
+ // 通过关键词获取识别码
|
|
|
|
|
+ CrawlerLoveFoot crawlerLoveFoot = loveFootList.get(0);
|
|
|
|
|
+ try {
|
|
|
|
|
+ String message = parseKeywordsToCode(crawlerLoveFoot, crawlerLoveFoot.getName(), "javdb");
|
|
|
|
|
+ if (StringUtils.isNotEmpty(message)) {
|
|
|
|
|
+ throw new Exception(message);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ crawlerLoveFootMapper.insertOrUpdate4jpfoot(crawlerLoveFoot);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("jsoupLoveFoot4CrawingFail2 detail fail,sourceUrl={}", crawlerLoveFoot.getOrginJpfootUrl(), e);
|
|
|
|
|
+ crawlerLoveFoot.setFailureCause(e.getMessage());
|
|
|
|
|
+ crawlerLoveFootMapper.insertOrUpdate4jpfoot(crawlerLoveFoot);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ log.warn("jsoupLoveFoot4CrawingFail2 结束:totalCount={},time={}", loveFootList.size(), stopWatch.getTotalTimeSeconds());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
@Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
@Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
public int jsoupLoveFoot4CrawingFailSub(List<CrawlerLoveFoot> loveFootList, String website) {
|
|
public int jsoupLoveFoot4CrawingFailSub(List<CrawlerLoveFoot> loveFootList, String website) {
|
|
|
int successCount = 0;
|
|
int successCount = 0;
|