|
@@ -57,9 +57,11 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
|
|
|
|
|
Map<String, String> footConstantMap = null;
|
|
Map<String, String> footConstantMap = null;
|
|
|
Map<String, String> javbusConstantMap = null;
|
|
Map<String, String> javbusConstantMap = null;
|
|
|
|
|
+ Map<String, String> javdbConstantMap = null;
|
|
|
List<String> javbusUrlList = null;
|
|
List<String> javbusUrlList = null;
|
|
|
Map<String, String> headerMap = new HashMap<>();
|
|
Map<String, String> headerMap = new HashMap<>();
|
|
|
Map<String, String> header2Map = new HashMap<>();
|
|
Map<String, String> header2Map = new HashMap<>();
|
|
|
|
|
+ Map<String, String> header3Map = new HashMap<>();
|
|
|
Proxy proxy = null;
|
|
Proxy proxy = null;
|
|
|
|
|
|
|
|
public void beforeProxy() {
|
|
public void beforeProxy() {
|
|
@@ -104,6 +106,219 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
log.warn("jsoupFoot4avnoashi 结束:time={}", stopWatch.getTotalTimeSeconds());
|
|
log.warn("jsoupFoot4avnoashi 结束:time={}", stopWatch.getTotalTimeSeconds());
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ @Async
|
|
|
|
|
+ @Override
|
|
|
|
|
+ @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
|
|
+ public void jsoupLoveFoot4CrawingFail(Integer status, Integer ignoreRetryCount) {
|
|
|
|
|
+ log.warn("jjsoupLoveFoot4CrawingFail 开始");
|
|
|
|
|
+ StopWatch stopWatch = new StopWatch();
|
|
|
|
|
+ stopWatch.start();
|
|
|
|
|
+
|
|
|
|
|
+ // 获取待抓取码列表
|
|
|
|
|
+ List<CrawlerLoveFoot> loveFootList;
|
|
|
|
|
+ if (1 == ignoreRetryCount) {
|
|
|
|
|
+ loveFootList = crawlerLoveFootMapper.findInfoByStatus4IgnoreRetryCount(status);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ loveFootList = crawlerLoveFootMapper.findInfoByStatus(status);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (loveFootList.size() == 0) {
|
|
|
|
|
+ log.warn("loveFootList为空");
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ log.warn("jsoupLoveFoot4CrawingFail loveFootList size={}", loveFootList.size());
|
|
|
|
|
+
|
|
|
|
|
+ List<DicCode> dicCodeList = dicCodeMapper.findAll();
|
|
|
|
|
+ // 获取常量MAP
|
|
|
|
|
+ javbusConstantMap = dicCodeList.stream()
|
|
|
|
|
+ .filter(x -> x.getType() != null && 1 == x.getType() && x.getEnv().contains(env))
|
|
|
|
|
+ .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
|
|
|
+ javdbConstantMap = dicCodeList.stream()
|
|
|
|
|
+ .filter(x -> x.getType() != null && 2 == x.getType() && x.getEnv().contains(env))
|
|
|
|
|
+ .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
|
|
|
+ // 代理及TOKEN设置
|
|
|
|
|
+ beforeProxy();
|
|
|
|
|
+ // 解析原始站点
|
|
|
|
|
+
|
|
|
|
|
+ jsoupLoveFoot4CrawingFailSub(loveFootList);
|
|
|
|
|
+ log.warn("jjsoupLoveFoot4CrawingFail 结束:time={}", stopWatch.getTotalTimeSeconds());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
|
|
+ public void jsoupLoveFoot4CrawingFailSub(List<CrawlerLoveFoot> loveFootList) {
|
|
|
|
|
+ Document javdbSearchDocument;
|
|
|
|
|
+ Document javdbCodeDocument;
|
|
|
|
|
+ for (CrawlerLoveFoot crawlerLoveFoot : loveFootList) {
|
|
|
|
|
+ String message = null;
|
|
|
|
|
+ int retryCount = 0;
|
|
|
|
|
+ while (retryCount <= 3) {
|
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
|
|
+ String javdbSearchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(crawlerLoveFoot.getName()).concat("&f=all");
|
|
|
|
|
+ header3Map.put("referer", javdbSearchUrl);
|
|
|
|
|
+ try {
|
|
|
|
|
+ javdbSearchDocument = JsoupUtil.requestDocument(javdbSearchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
|
|
+
|
|
|
|
|
+ Elements itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
|
|
|
|
|
+ if (itembSelects.size() == 0) {
|
|
|
|
|
+ String newName = crawlerLoveFoot.getName().substring(crawlerLoveFoot.getName().length() / 2);
|
|
|
|
|
+ javdbSearchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(newName).concat("&f=all");
|
|
|
|
|
+ javdbSearchDocument = JsoupUtil.requestDocument(javdbSearchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
|
|
+ itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (itembSelects.size() == 0) {
|
|
|
|
|
+ throw new BusinessException(30000, "javdb search result null");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 获取codeUrl
|
|
|
|
|
+ String codeUrl = null;
|
|
|
|
|
+ String title;
|
|
|
|
|
+ for (Element itembSelect : itembSelects) {
|
|
|
|
|
+ title = itembSelect.select("a.box").get(0).attr("title");
|
|
|
|
|
+ if (title.contains(crawlerLoveFoot.getName())) {
|
|
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ String newName = crawlerLoveFoot.getName().replace("●", "さ");
|
|
|
|
|
+ if (title.contains(newName)) {
|
|
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
|
|
+ crawlerLoveFoot.setName(newName);
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (StringUtils.isEmpty(codeUrl)) {
|
|
|
|
|
+ throw new BusinessException(30000, "javdb search result mismatch");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 解析codeUrl
|
|
|
|
|
+ javdbCodeDocument = JsoupUtil.requestDocument(codeUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
|
|
+ long picTime = parseJavdbCodeDocument(javdbCodeDocument, crawlerLoveFoot);
|
|
|
|
|
+
|
|
|
|
|
+ crawlerLoveFoot.setJavdbUrl(codeUrl);
|
|
|
|
|
+ crawlerLoveFoot.setRetryCount(retryCount);
|
|
|
|
|
+ crawlerLoveFoot.setType(2);
|
|
|
|
|
+ crawlerLoveFoot.setStatus(3);
|
|
|
|
|
+ log.warn("jsoupLoveFoot4CrawingFailSub parseKeywordsToCode success,keywords={},code={},picTime={},time={}", crawlerLoveFoot.getName(), crawlerLoveFoot.getIdentificationCode(), picTime, System.currentTimeMillis() - start);
|
|
|
|
|
+
|
|
|
|
|
+ break;
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ ++retryCount;
|
|
|
|
|
+
|
|
|
|
|
+ if (retryCount < 4) {
|
|
|
|
|
+ log.error("jsoupLoveFoot4CrawingFailSub error重试:,retryCount={},time={},javdbSearchUrl={}", retryCount, System.currentTimeMillis() - start, javdbSearchUrl, e);
|
|
|
|
|
+ } else if (retryCount == 4) {
|
|
|
|
|
+ message = e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (e instanceof BusinessException) {
|
|
|
|
|
+ message = e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200);
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (StringUtils.isNotEmpty(message)) {
|
|
|
|
|
+ CrawlerLoveFoot crawlerLoveFoot2 = new CrawlerLoveFoot();
|
|
|
|
|
+ crawlerLoveFoot2.setId(crawlerLoveFoot.getId());
|
|
|
|
|
+ crawlerLoveFoot2.setFailureCause(message);
|
|
|
|
|
+ crawlerLoveFoot2.setRetryCount(retryCount);
|
|
|
|
|
+ crawlerLoveFootMapper.updateInfoById(crawlerLoveFoot2);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ crawlerLoveFoot.setFailureCause("");
|
|
|
|
|
+ crawlerLoveFootMapper.updateInfoById(crawlerLoveFoot);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private long parseJavdbCodeDocument(Document javdbCodeDocument, CrawlerLoveFoot crawlerLoveFoot) throws IOException {
|
|
|
|
|
+ Elements container = javdbCodeDocument.select("section.section > div.container");
|
|
|
|
|
+ if (container.size() == 0) {
|
|
|
|
|
+ throw new BusinessException(30000, "番号无效!");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ Elements videoDetail = container.select("div.video-detail");
|
|
|
|
|
+ // 名称
|
|
|
|
|
+ // crawlerLoveFoot.setName(videoDetail.select("h2.title").select("strong.current-title").text().trim());
|
|
|
|
|
+
|
|
|
|
|
+ Elements moviePanelInfos = videoDetail.select("nav.movie-panel-info");
|
|
|
|
|
+ Element pEle = moviePanelInfos.get(0);
|
|
|
|
|
+ // 识别码
|
|
|
|
|
+ String iCode = pEle.select("div:contains(番號)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
|
|
+ crawlerLoveFoot.setIdentificationCode(iCode);
|
|
|
|
|
+ // 发行日期
|
|
|
|
|
+ String issueDate = pEle.select("div:contains(日期)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
|
|
+ crawlerLoveFoot.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
|
|
|
|
|
+ // 长度
|
|
|
|
|
+ String length = pEle.select("div:contains(時長)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
|
|
+ crawlerLoveFoot.setLength(length);
|
|
|
|
|
+ // 导演
|
|
|
|
|
+ Elements directorEles = pEle.select("div:contains(導演)").select("span.value");
|
|
|
|
|
+ if (directorEles.size() > 0) {
|
|
|
|
|
+ crawlerLoveFoot.setDirector(directorEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
|
|
+ }
|
|
|
|
|
+ // 制作商/片商
|
|
|
|
|
+ Elements markerEles = pEle.select("div:contains(片商)").select("span.value");
|
|
|
|
|
+ if (markerEles.size() > 0) {
|
|
|
|
|
+ crawlerLoveFoot.setMaker(markerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
|
|
+ }
|
|
|
|
|
+ // 发行商
|
|
|
|
|
+ Elements issuerEles = pEle.select("div:contains(發行)").select("span.value");
|
|
|
|
|
+ if (issuerEles.size() > 0) {
|
|
|
|
|
+ crawlerLoveFoot.setIssuer(issuerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
|
|
+ }
|
|
|
|
|
+ // 类别
|
|
|
|
|
+ Elements genresEles = pEle.select("div:contains(類別)").select("span.value");
|
|
|
|
|
+ if (genresEles.size() > 0) {
|
|
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
|
|
+ Elements ahrefEles = genresEles.first().select("a[href]");
|
|
|
|
|
+ for (Element ahrefEle : ahrefEles) {
|
|
|
|
|
+ sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (sb.length() > 0) {
|
|
|
|
|
+ sb = sb.deleteCharAt(sb.length() - 1);
|
|
|
|
|
+ }
|
|
|
|
|
+ crawlerLoveFoot.setGenres(sb.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ // 演员
|
|
|
|
|
+ Elements castEles = pEle.select("div:contains(演員)").select("span.value");
|
|
|
|
|
+ if (castEles.size() > 0) {
|
|
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
|
|
+ Elements ahrefEles = castEles.first().select("a[href]");
|
|
|
|
|
+ for (Element ahrefEle : ahrefEles) {
|
|
|
|
|
+ sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (sb.length() > 0) {
|
|
|
|
|
+ sb = sb.deleteCharAt(sb.length() - 1);
|
|
|
|
|
+ }
|
|
|
|
|
+ crawlerLoveFoot.setCast(sb.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ // 图片URL
|
|
|
|
|
+ Elements videoMetaPanel = videoDetail.select("div.column-video-cover");
|
|
|
|
|
+ String href = videoMetaPanel.select("a > img").first().attr("src");
|
|
|
|
|
+
|
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
|
|
+ Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
|
|
+
|
|
|
|
|
+ String fileName = issueDate.concat(" ").concat(iCode).concat(" ").concat(crawlerLoveFoot.getName());
|
|
|
|
|
+ byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
|
|
|
|
|
+ if (imgUrlBytes.length > 251) {
|
|
|
|
|
+ byte[] imgUrlDestBytes = new byte[251];
|
|
|
|
|
+ System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
|
|
|
|
|
+ fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
|
|
|
|
|
+ }
|
|
|
|
|
+ fileName = fileName.concat(".jpg");
|
|
|
|
|
+
|
|
|
|
|
+ String machiImgUrl = "足舐/".concat(fileName);
|
|
|
|
|
+
|
|
|
|
|
+ saveFile(response.bodyStream(), javbusConstantMap.get("apics_path").concat(machiImgUrl));
|
|
|
|
|
+ long end = System.currentTimeMillis();
|
|
|
|
|
+
|
|
|
|
|
+ crawlerLoveFoot.setImgUrl(machiImgUrl);
|
|
|
|
|
+
|
|
|
|
|
+ return end - start;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
@Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
@Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
public void jsoupLoveFoot4avnoashiSub(Integer status, Integer ignoreRetryCount) throws Exception {
|
|
public void jsoupLoveFoot4avnoashiSub(Integer status, Integer ignoreRetryCount) throws Exception {
|
|
|
CrawlerLoveFoot latestLoveFoot = crawlerLoveFootMapper.findLatestInfo();
|
|
CrawlerLoveFoot latestLoveFoot = crawlerLoveFootMapper.findLatestInfo();
|
|
@@ -208,7 +423,7 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
|
|
|
|
|
Elements itembSelects = javbusSearchDocument.select("div#waterfall").select("div.item");
|
|
Elements itembSelects = javbusSearchDocument.select("div#waterfall").select("div.item");
|
|
|
if (itembSelects.size() == 0) {
|
|
if (itembSelects.size() == 0) {
|
|
|
- throw new BusinessException(30000, "search result null");
|
|
|
|
|
|
|
+ throw new BusinessException(30000, "javbus search result null");
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// 获取codeUrl
|
|
// 获取codeUrl
|
|
@@ -361,6 +576,12 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
bufferedOutputStream.close();
|
|
bufferedOutputStream.close();
|
|
|
bufferedInputStream.close();
|
|
bufferedInputStream.close();
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ public static void main(String[] args) {
|
|
|
|
|
+ String s = "嫉妬に狂った愛人のエグい杭打ちピストンにどハマり…都合の良いオンナのはずが快楽沼へ引きずり込まれた僕 七ツ森りり";
|
|
|
|
|
+ String newName = s.substring(s.length() / 2);
|
|
|
|
|
+ System.out.println(newName);
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|