|
@@ -126,6 +126,9 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
javbusConstantMap = dicCodeList.stream()
|
|
javbusConstantMap = dicCodeList.stream()
|
|
|
.filter(x -> x.getType() != null && 1 == x.getType() && x.getEnv().contains(env))
|
|
.filter(x -> x.getType() != null && 1 == x.getType() && x.getEnv().contains(env))
|
|
|
.collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
.collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
|
|
|
+ javdbConstantMap = dicCodeList.stream()
|
|
|
|
|
+ .filter(x -> x.getType() != null && 2 == x.getType() && x.getEnv().contains(env))
|
|
|
|
|
+ .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
|
// 获取javbus防屏蔽地址
|
|
// 获取javbus防屏蔽地址
|
|
|
javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
|
|
javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
|
|
|
if (javbusUrlList.size() == 0) {
|
|
if (javbusUrlList.size() == 0) {
|
|
@@ -204,6 +207,8 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
String searchUrl = null;
|
|
String searchUrl = null;
|
|
|
Elements itembSelects = null;
|
|
Elements itembSelects = null;
|
|
|
try {
|
|
try {
|
|
|
|
|
+ Thread.sleep(3000);
|
|
|
|
|
+
|
|
|
String javbusCodeUrl = null;
|
|
String javbusCodeUrl = null;
|
|
|
if ("javbus".equals(website)) {
|
|
if ("javbus".equals(website)) {
|
|
|
String javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size())));
|
|
String javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size())));
|
|
@@ -362,7 +367,7 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
crawlerLoveFoot.setRetryCount(retryCount);
|
|
crawlerLoveFoot.setRetryCount(retryCount);
|
|
|
crawlerLoveFoot.setType(2);
|
|
crawlerLoveFoot.setType(2);
|
|
|
crawlerLoveFoot.setStatus(3);
|
|
crawlerLoveFoot.setStatus(3);
|
|
|
- log.warn("jsoupLoveFoot4CrawingFailSub parseKeywordsToCode success,keywords={},code={},picTime={},time={}", crawlerLoveFoot.getName(), crawlerLoveFoot.getIdentificationCode(), picTime, System.currentTimeMillis() - start);
|
|
|
|
|
|
|
+ log.warn("jsoupLoveFoot4CrawingFailSub parseKeywordsToCode success,num={},keywords={},code={},picTime={},time={}", successCount, crawlerLoveFoot.getName(), crawlerLoveFoot.getIdentificationCode(), picTime, System.currentTimeMillis() - start);
|
|
|
|
|
|
|
|
break;
|
|
break;
|
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
@@ -389,7 +394,14 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
crawlerLoveFootMapper.updateInfoById(crawlerLoveFoot2);
|
|
crawlerLoveFootMapper.updateInfoById(crawlerLoveFoot2);
|
|
|
} else {
|
|
} else {
|
|
|
crawlerLoveFoot.setFailureCause("");
|
|
crawlerLoveFoot.setFailureCause("");
|
|
|
- crawlerLoveFootMapper.updateInfoById(crawlerLoveFoot);
|
|
|
|
|
|
|
+
|
|
|
|
|
+ Integer exist = crawlerLoveFootMapper.existLoveFootByCode(crawlerLoveFoot.getIdentificationCode());
|
|
|
|
|
+ if (exist != null) {
|
|
|
|
|
+ crawlerLoveFootMapper.updateInfoByCode(crawlerLoveFoot);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ crawlerLoveFootMapper.updateInfoById(crawlerLoveFoot);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
successCount++;
|
|
successCount++;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -495,6 +507,11 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
latestDate = latestLoveFoot.getUpdateDate();
|
|
latestDate = latestLoveFoot.getUpdateDate();
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ List<DicCode> dicCodeList = dicCodeMapper.findAll();
|
|
|
|
|
+ javdbConstantMap = dicCodeList.stream()
|
|
|
|
|
+ .filter(x -> x.getType() != null && 2 == x.getType() && x.getEnv().contains(env))
|
|
|
|
|
+ .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
|
|
|
+
|
|
|
String avnoashiUrl = footConstantMap.get("avnoashi_url");
|
|
String avnoashiUrl = footConstantMap.get("avnoashi_url");
|
|
|
headerMap.put("referer", avnoashiUrl);
|
|
headerMap.put("referer", avnoashiUrl);
|
|
|
header2Map.put("referer", avnoashiUrl.concat("?sort=newer"));
|
|
header2Map.put("referer", avnoashiUrl.concat("?sort=newer"));
|
|
@@ -542,7 +559,9 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
crawlerLoveFoot.setType(2);
|
|
crawlerLoveFoot.setType(2);
|
|
|
crawlerLoveFoot.setStatus(3);
|
|
crawlerLoveFoot.setStatus(3);
|
|
|
crawlerLoveFoot.setCreateTime(LocalDateTime.now());
|
|
crawlerLoveFoot.setCreateTime(LocalDateTime.now());
|
|
|
- String message = parseKeywordsToCode(crawlerLoveFoot, keywords);
|
|
|
|
|
|
|
+
|
|
|
|
|
+ Thread.sleep(3000);
|
|
|
|
|
+ String message = parseKeywordsToCode(crawlerLoveFoot, keywords, "javdb");
|
|
|
if (StringUtils.isNotEmpty(message)) {
|
|
if (StringUtils.isNotEmpty(message)) {
|
|
|
statusInt = 4;
|
|
statusInt = 4;
|
|
|
throw new Exception(message);
|
|
throw new Exception(message);
|
|
@@ -634,7 +653,9 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
crawlerLoveFoot.setType(2);
|
|
crawlerLoveFoot.setType(2);
|
|
|
crawlerLoveFoot.setStatus(3);
|
|
crawlerLoveFoot.setStatus(3);
|
|
|
crawlerLoveFoot.setCreateTime(LocalDateTime.now());
|
|
crawlerLoveFoot.setCreateTime(LocalDateTime.now());
|
|
|
- String message = parseKeywordsToCode(crawlerLoveFoot, keywords);
|
|
|
|
|
|
|
+
|
|
|
|
|
+ Thread.sleep(3000);
|
|
|
|
|
+ String message = parseKeywordsToCode(crawlerLoveFoot, keywords, "javdb");
|
|
|
if (StringUtils.isNotEmpty(message)) {
|
|
if (StringUtils.isNotEmpty(message)) {
|
|
|
statusInt = 4;
|
|
statusInt = 4;
|
|
|
throw new Exception(message);
|
|
throw new Exception(message);
|
|
@@ -667,99 +688,149 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- private String parseKeywordsToCode(CrawlerLoveFoot crawlerLoveFoot, String keywords) {
|
|
|
|
|
|
|
+ private String parseKeywordsToCode(CrawlerLoveFoot crawlerLoveFoot, String keywords, String website) {
|
|
|
int retryCount = 0;
|
|
int retryCount = 0;
|
|
|
Document javbusSearchDocument = null;
|
|
Document javbusSearchDocument = null;
|
|
|
Document javbusCodeDocument;
|
|
Document javbusCodeDocument;
|
|
|
|
|
+ Document javdbSearchDocument;
|
|
|
|
|
+ Document javdbCodeDocument;
|
|
|
String message = null;
|
|
String message = null;
|
|
|
while (retryCount <= 3) {
|
|
while (retryCount <= 3) {
|
|
|
long start = System.currentTimeMillis();
|
|
long start = System.currentTimeMillis();
|
|
|
|
|
+ Elements itembSelects = null;
|
|
|
try {
|
|
try {
|
|
|
- String javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size())));
|
|
|
|
|
- String javbusSearchUrl = javbusUrl.concat("/search/").concat(keywords).concat("&parent=ce");
|
|
|
|
|
- try {
|
|
|
|
|
- javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
|
|
|
- } catch (Exception ee) {
|
|
|
|
|
- String newName = keywords.substring(keywords.length() / 2);
|
|
|
|
|
- javbusSearchUrl = javbusUrl.concat("/search/").concat(newName).concat("&parent=ce");
|
|
|
|
|
|
|
+ if ("javbus".equals(website)) {
|
|
|
|
|
+ String javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size())));
|
|
|
|
|
+ String javbusSearchUrl = javbusUrl.concat("/search/").concat(keywords).concat("&parent=ce");
|
|
|
try {
|
|
try {
|
|
|
javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
|
- } catch (Exception eee) {
|
|
|
|
|
- newName = newName.substring(newName.length() / 2);
|
|
|
|
|
|
|
+ } catch (Exception ee) {
|
|
|
|
|
+ String newName = keywords.substring(keywords.length() / 2);
|
|
|
javbusSearchUrl = javbusUrl.concat("/search/").concat(newName).concat("&parent=ce");
|
|
javbusSearchUrl = javbusUrl.concat("/search/").concat(newName).concat("&parent=ce");
|
|
|
try {
|
|
try {
|
|
|
javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
|
- } catch (Exception eeee) {
|
|
|
|
|
- // throw new BusinessException(30000, "javbus search result null");
|
|
|
|
|
|
|
+ } catch (Exception eee) {
|
|
|
|
|
+ newName = newName.substring(newName.length() / 2);
|
|
|
|
|
+ javbusSearchUrl = javbusUrl.concat("/search/").concat(newName).concat("&parent=ce");
|
|
|
|
|
+ try {
|
|
|
|
|
+ javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
|
|
|
+ } catch (Exception eeee) {
|
|
|
|
|
+ // throw new BusinessException(30000, "javbus search result null");
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
- }
|
|
|
|
|
|
|
|
|
|
- if (null == javbusSearchDocument) {
|
|
|
|
|
- String newName = keywords.replace("●", "");
|
|
|
|
|
- javbusSearchUrl = javbusUrl.concat("/search/").concat(newName).concat("&parent=ce");
|
|
|
|
|
- try {
|
|
|
|
|
- javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
|
|
|
- } catch (Exception ee) {
|
|
|
|
|
- newName = newName.substring(0, newName.length() / 2);
|
|
|
|
|
|
|
+ if (null == javbusSearchDocument) {
|
|
|
|
|
+ String newName = keywords.replace("●", "");
|
|
|
javbusSearchUrl = javbusUrl.concat("/search/").concat(newName).concat("&parent=ce");
|
|
javbusSearchUrl = javbusUrl.concat("/search/").concat(newName).concat("&parent=ce");
|
|
|
try {
|
|
try {
|
|
|
javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
|
- } catch (Exception eee) {
|
|
|
|
|
|
|
+ } catch (Exception ee) {
|
|
|
newName = newName.substring(0, newName.length() / 2);
|
|
newName = newName.substring(0, newName.length() / 2);
|
|
|
javbusSearchUrl = javbusUrl.concat("/search/").concat(newName).concat("&parent=ce");
|
|
javbusSearchUrl = javbusUrl.concat("/search/").concat(newName).concat("&parent=ce");
|
|
|
try {
|
|
try {
|
|
|
javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
|
- } catch (Exception eeee) {
|
|
|
|
|
- throw new BusinessException(30000, "javbus search result null");
|
|
|
|
|
|
|
+ } catch (Exception eee) {
|
|
|
|
|
+ newName = newName.substring(0, newName.length() / 2);
|
|
|
|
|
+ javbusSearchUrl = javbusUrl.concat("/search/").concat(newName).concat("&parent=ce");
|
|
|
|
|
+ try {
|
|
|
|
|
+ javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
|
|
|
+ } catch (Exception eeee) {
|
|
|
|
|
+ throw new BusinessException(30000, "javbus search result null");
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
- }
|
|
|
|
|
|
|
|
|
|
- Elements itembSelects = javbusSearchDocument.select("div#waterfall").select("div.item");
|
|
|
|
|
- if (itembSelects.size() == 0) {
|
|
|
|
|
- throw new BusinessException(30000, "javbus search result null");
|
|
|
|
|
|
|
+ itembSelects = javbusSearchDocument.select("div#waterfall").select("div.item");
|
|
|
|
|
+ if (itembSelects.size() == 0) {
|
|
|
|
|
+ throw new BusinessException(30000, "javbus search result null");
|
|
|
|
|
+ }
|
|
|
|
|
+ } else if ("javdb".equals(website)) {
|
|
|
|
|
+ String searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(crawlerLoveFoot.getName()).concat("&f=all");
|
|
|
|
|
+ header3Map.put("referer", searchUrl);
|
|
|
|
|
+
|
|
|
|
|
+ javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
|
|
+
|
|
|
|
|
+ itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
|
|
|
|
|
+ if (itembSelects.size() == 0) {
|
|
|
|
|
+ String newName = crawlerLoveFoot.getName().substring(crawlerLoveFoot.getName().length() / 2);
|
|
|
|
|
+ searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(newName).concat("&f=all");
|
|
|
|
|
+ javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
|
|
+ itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (itembSelects.size() == 0) {
|
|
|
|
|
+ throw new BusinessException(30000, "javdb search result null");
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// 获取codeUrl
|
|
// 获取codeUrl
|
|
|
String codeUrl = null;
|
|
String codeUrl = null;
|
|
|
String title;
|
|
String title;
|
|
|
- for (Element itembSelect : itembSelects) {
|
|
|
|
|
- title = itembSelect.select("a.movie-box").get(0).select("div.photo-frame > img").attr("title");
|
|
|
|
|
- if (title.contains(keywords)) {
|
|
|
|
|
- codeUrl = itembSelect.select("a.movie-box").get(0).attr("abs:href");
|
|
|
|
|
- break;
|
|
|
|
|
|
|
+
|
|
|
|
|
+ if ("javbus".equals(website)) {
|
|
|
|
|
+ for (Element itembSelect : itembSelects) {
|
|
|
|
|
+ title = itembSelect.select("a.movie-box").get(0).select("div.photo-frame > img").attr("title");
|
|
|
|
|
+ if (title.contains(keywords)) {
|
|
|
|
|
+ codeUrl = itembSelect.select("a.movie-box").get(0).attr("abs:href");
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ String newName = keywords.replace("●", "さ");
|
|
|
|
|
+ if (title.contains(newName)) {
|
|
|
|
|
+ codeUrl = itembSelect.select("a.movie-box").get(0).attr("abs:href");
|
|
|
|
|
+ crawlerLoveFoot.setName(newName);
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ String[] newNameArr = keywords.split("●");
|
|
|
|
|
+ int matchCount = 0;
|
|
|
|
|
+ for (String s : newNameArr) {
|
|
|
|
|
+ if (title.contains(s)) {
|
|
|
|
|
+ matchCount++;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ if (newNameArr.length == matchCount) {
|
|
|
|
|
+ codeUrl = itembSelect.select("a.movie-box").get(0).attr("abs:href");
|
|
|
|
|
+ crawlerLoveFoot.setName(title);
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- String newName = keywords.replace("●", "さ");
|
|
|
|
|
- if (title.contains(newName)) {
|
|
|
|
|
- codeUrl = itembSelect.select("a.movie-box").get(0).attr("abs:href");
|
|
|
|
|
- crawlerLoveFoot.setName(newName);
|
|
|
|
|
- break;
|
|
|
|
|
|
|
+ if (StringUtils.isEmpty(codeUrl)) {
|
|
|
|
|
+ throw new BusinessException(30000, "javbus search result mismatch");
|
|
|
}
|
|
}
|
|
|
|
|
+ } else if ("javdb".equals(website)) {
|
|
|
|
|
+ for (Element itembSelect : itembSelects) {
|
|
|
|
|
+ title = itembSelect.select("a.box").get(0).attr("title");
|
|
|
|
|
+ if (title.contains(crawlerLoveFoot.getName())) {
|
|
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- String[] newNameArr = keywords.split("●");
|
|
|
|
|
- int matchCount = 0;
|
|
|
|
|
- for (String s : newNameArr) {
|
|
|
|
|
- if (title.contains(s)) {
|
|
|
|
|
- matchCount++;
|
|
|
|
|
|
|
+ String newName = crawlerLoveFoot.getName().replace("●", "さ");
|
|
|
|
|
+ if (title.contains(newName)) {
|
|
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
|
|
+ crawlerLoveFoot.setName(newName);
|
|
|
|
|
+ break;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
- if (newNameArr.length == matchCount) {
|
|
|
|
|
- codeUrl = itembSelect.select("a.movie-box").get(0).attr("abs:href");
|
|
|
|
|
- crawlerLoveFoot.setName(title);
|
|
|
|
|
- break;
|
|
|
|
|
|
|
+ if (StringUtils.isEmpty(codeUrl)) {
|
|
|
|
|
+ throw new BusinessException(30000, "javdb search result mismatch");
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- if (StringUtils.isEmpty(codeUrl)) {
|
|
|
|
|
- throw new BusinessException(30000, "javbus search result mismatch");
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
// 解析codeUrl
|
|
// 解析codeUrl
|
|
|
- javbusCodeDocument = JsoupUtil.requestDocument(codeUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
|
|
|
- long picTime = parseJavbusCodeDocument(javbusCodeDocument, crawlerLoveFoot);
|
|
|
|
|
|
|
+ long picTime = 999;
|
|
|
|
|
+ if ("javbus".equals(website)) {
|
|
|
|
|
+ javbusCodeDocument = JsoupUtil.requestDocument(codeUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
|
|
|
|
|
+ picTime = parseJavbusCodeDocument(javbusCodeDocument, crawlerLoveFoot);
|
|
|
|
|
+ } else if ("javdb".equals(website)) {
|
|
|
|
|
+ javdbCodeDocument = JsoupUtil.requestDocument(codeUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
|
|
+ picTime = parseJavdbCodeDocument(javdbCodeDocument, crawlerLoveFoot);
|
|
|
|
|
+ crawlerLoveFoot.setJavdbUrl(codeUrl);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
crawlerLoveFoot.setRetryCount(retryCount);
|
|
crawlerLoveFoot.setRetryCount(retryCount);
|
|
|
log.warn("jsoupLoveFoot4avnoashiSub parseKeywordsToCode success,keywords={},code={},picTime={},time={}", keywords, crawlerLoveFoot.getIdentificationCode(), picTime, System.currentTimeMillis() - start);
|
|
log.warn("jsoupLoveFoot4avnoashiSub parseKeywordsToCode success,keywords={},code={},picTime={},time={}", keywords, crawlerLoveFoot.getIdentificationCode(), picTime, System.currentTimeMillis() - start);
|