|
|
@@ -205,8 +205,9 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
String message = null;
|
|
|
int retryCount = 0;
|
|
|
|
|
|
- if (StringUtils.isEmpty(crawlerLoveFoot.getName()) && crawlerLoveFoot.getOrginAvnoashiUrl().contains("avnoashi-1.com")) {
|
|
|
+ if (StringUtils.isEmpty(crawlerLoveFoot.getName()) && StringUtils.isNotEmpty(crawlerLoveFoot.getOrginAvnoashiUrl()) && crawlerLoveFoot.getOrginAvnoashiUrl().contains("avnoashi-1.com")) {
|
|
|
try {
|
|
|
+ headerMap.put("referer", crawlerLoveFoot.getOrginAvnoashiUrl());
|
|
|
loveFootDetailDocument = JsoupUtil.requestDocument(crawlerLoveFoot.getOrginAvnoashiUrl(), JsoupUtil.HTTP_GET, proxy, null, header2Map, null);
|
|
|
// 获取关键词
|
|
|
String keywords = loveFootDetailDocument.select("div.postContents").select("td:contains(タイトル)").next("td").text();
|
|
|
@@ -218,8 +219,20 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
} catch (Exception e) {
|
|
|
crawlerLoveFoot.setName("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab");
|
|
|
}
|
|
|
- } else if (StringUtils.isEmpty(crawlerLoveFoot.getName()) && crawlerLoveFoot.getOrginJpfootUrl().contains("jp-foot.net")) {
|
|
|
- crawlerLoveFoot.setName("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
|
|
|
+ } else if (StringUtils.isEmpty(crawlerLoveFoot.getName()) && StringUtils.isNotEmpty(crawlerLoveFoot.getOrginJpfootUrl()) && crawlerLoveFoot.getOrginJpfootUrl().contains("jp-foot.net")) {
|
|
|
+ try {
|
|
|
+ headerMap.put("referer", crawlerLoveFoot.getOrginJpfootUrl());
|
|
|
+ loveFootDetailDocument = JsoupUtil.requestDocument(crawlerLoveFoot.getOrginJpfootUrl(), JsoupUtil.HTTP_GET, proxy, null, header2Map, null);
|
|
|
+ // 获取关键词
|
|
|
+ String keywords = loveFootDetailDocument.select("div.avdetail_detailTop").select("p.avdetail_detailTopTitle").text().trim();
|
|
|
+ if (StringUtils.isNotEmpty(keywords)) {
|
|
|
+ crawlerLoveFoot.setName(keywords);
|
|
|
+ } else {
|
|
|
+ crawlerLoveFoot.setName("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ crawlerLoveFoot.setName("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
while (retryCount <= 3) {
|
|
|
@@ -585,7 +598,6 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
|
|
|
Document loveFootDetailDocument;
|
|
|
boolean tiaoguoFlag = true;
|
|
|
int pageNum = 0;
|
|
|
- outer:
|
|
|
while (true) {
|
|
|
loveFootDocument = JsoupUtil.requestDocument(avnoashiUrl, JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
|
|
|
|