Ver Fonte

update:jsoupLoveFoot优化v111

tujidelv há 2 anos atrás
pai
commit
897d5eb623

+ 2 - 2
src/main/java/top/lvzhiqiang/config/MyJobs.java

@@ -162,7 +162,7 @@ public class MyJobs {
     /**
      * 每天20:30 Jsoup码池
      */
-    @Scheduled(cron = "0 30 20 * * ?", zone = SCHEDULED_ZONE)
+    //@Scheduled(cron = "0 30 20 * * ?", zone = SCHEDULED_ZONE)
     public void jsoupIcodePool4CrawingFail() {
         log.warn("jsoupIcodePool4CrawingFail开始==============================");
 
@@ -209,6 +209,6 @@ public class MyJobs {
     public void jsoupLoveFoot4CrawingFail() {
         log.warn("jsoupLoveFoot4CrawingFail开始==============================");
 
-        crawler4LoveFootService.jsoupLoveFoot4CrawingFail(4, 2, "javbus");
+        crawler4LoveFootService.jsoupLoveFoot4CrawingFail(4, 2, "javbus", null);
     }
 }

+ 3 - 3
src/main/java/top/lvzhiqiang/controller/CrawlerController.java

@@ -238,7 +238,7 @@ public class CrawlerController {
      */
     @RequestMapping("/jsoupLoveFoot")
     @ResponseBody
-    public R jsoupLoveFoot(Integer status, Integer isDel, Integer ignoreRetryCount, String website) throws Exception {
+    public R jsoupLoveFoot(Integer status, Integer isDel, Integer ignoreRetryCount, String website, String identificationCode) throws Exception {
         if (null == isDel) {
             isDel = 2;
         }
@@ -246,8 +246,8 @@ public class CrawlerController {
             ignoreRetryCount = 2;
         }
 
-        if (4 == status) {
-            crawler4LoveFootService.jsoupLoveFoot4CrawingFail(status, ignoreRetryCount, website);
+        if (4 == status || 1 == status || 2 == status) {
+            crawler4LoveFootService.jsoupLoveFoot4CrawingFail(status, ignoreRetryCount, website, identificationCode);
         } else if (status > 10) {
             if (status == 11) {
                 crawler4LoveFootService.jsoupLoveFoot4avnoashi(status, isDel, ignoreRetryCount);

+ 6 - 0
src/main/java/top/lvzhiqiang/mapper/CrawlerLoveFootMapper.java

@@ -131,4 +131,10 @@ public interface CrawlerLoveFootMapper {
 
     @Select("select 1 from crawler_lovefoot_info where identification_code = #{code} limit 1")
     Integer existLoveFootByCode(String code);
+
+    @Select("select 1 from crawler_lovefoot_info where orgin_avnoashi_url = #{sourceUrl} limit 1")
+    Integer existLoveFootByOrginAvnoashiUrl(String sourceUrl);
+
+    @Select("SELECT * FROM crawler_lovefoot_info WHERE orgin_avnoashi_url = #{sourceUrl} limit 1")
+    CrawlerLoveFoot findLoveFootByOrginAvnoashiUrl(String sourceUrl);
 }

+ 1 - 1
src/main/java/top/lvzhiqiang/service/Crawler4LoveFootService.java

@@ -10,7 +10,7 @@ public interface Crawler4LoveFootService {
 
     void jsoupLoveFoot4avnoashi(Integer status, Integer isDel, Integer ignoreRetryCount) throws Exception;
 
-    void jsoupLoveFoot4CrawingFail(Integer status, Integer ignoreRetryCount, String website);
+    void jsoupLoveFoot4CrawingFail(Integer status, Integer ignoreRetryCount, String website, String identificationCode);
 
     void jsoupLoveFoot4jpfoot(Integer status, Integer isDel, Integer ignoreRetryCount) throws Exception;
 }

+ 201 - 70
src/main/java/top/lvzhiqiang/service/impl/Crawler4LoveFootServiceImpl.java

@@ -68,7 +68,7 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
     public void beforeProxy() {
         if (null == proxy) {
             if ("dev".equals(env)) {
-                proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress("127.0.0.1", 1080));
+                proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1080));
             } else {
                 proxy = Proxy.NO_PROXY;
             }
@@ -145,18 +145,24 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
     @Async
     @Override
     @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
-    public void jsoupLoveFoot4CrawingFail(Integer status, Integer ignoreRetryCount, String website) {
+    public void jsoupLoveFoot4CrawingFail(Integer status, Integer ignoreRetryCount, String website, String identificationCode) {
         log.warn("jjsoupLoveFoot4CrawingFail 开始");
         StopWatch stopWatch = new StopWatch();
         stopWatch.start();
 
         // 获取待抓取码列表
         List<CrawlerLoveFoot> loveFootList;
-        if (1 == ignoreRetryCount) {
-            loveFootList = crawlerLoveFootMapper.findInfoByStatus4IgnoreRetryCount(status);
+
+        if (StringUtils.isNotEmpty(identificationCode)) {
+            loveFootList = crawlerLoveFootMapper.findByCodeAndType(identificationCode, null, null);
         } else {
-            loveFootList = crawlerLoveFootMapper.findInfoByStatus(status);
+            if (1 == ignoreRetryCount) {
+                loveFootList = crawlerLoveFootMapper.findInfoByStatus4IgnoreRetryCount(status);
+            } else {
+                loveFootList = crawlerLoveFootMapper.findInfoByStatus(status);
+            }
         }
+
         if (loveFootList.size() == 0) {
             log.warn("loveFootList为空");
             return;
@@ -192,13 +198,27 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
     @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
     public int jsoupLoveFoot4CrawingFailSub(List<CrawlerLoveFoot> loveFootList, String website) {
         int successCount = 0;
+        Document loveFootDetailDocument;
         for (CrawlerLoveFoot crawlerLoveFoot : loveFootList) {
             Document searchDocument = null;
             Document codeDocument;
             String message = null;
             int retryCount = 0;
 
-            if (StringUtils.isEmpty(crawlerLoveFoot.getName())) {
+            if (StringUtils.isEmpty(crawlerLoveFoot.getName()) && crawlerLoveFoot.getOrginAvnoashiUrl().contains("avnoashi-1.com")) {
+                try {
+                    loveFootDetailDocument = JsoupUtil.requestDocument(crawlerLoveFoot.getOrginAvnoashiUrl(), JsoupUtil.HTTP_GET, proxy, null, header2Map, null);
+                    // 获取关键词
+                    String keywords = loveFootDetailDocument.select("div.postContents").select("td:contains(タイトル)").next("td").text();
+                    if (StringUtils.isNotEmpty(keywords)) {
+                        crawlerLoveFoot.setName(keywords);
+                    } else {
+                        crawlerLoveFoot.setName("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab");
+                    }
+                } catch (Exception e) {
+                    crawlerLoveFoot.setName("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab");
+                }
+            } else if (StringUtils.isEmpty(crawlerLoveFoot.getName()) && crawlerLoveFoot.getOrginJpfootUrl().contains("jp-foot.net")) {
                 crawlerLoveFoot.setName("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
             }
 
@@ -210,6 +230,10 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
                     Thread.sleep(3000);
 
                     String javbusCodeUrl = null;
+
+                    // 获取codeUrl
+                    String codeUrl = null;
+                    String title;
                     if ("javbus".equals(website)) {
                         String javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size())));
                         searchUrl = javbusUrl.concat("/search/").concat(crawlerLoveFoot.getName()).concat("&parent=ce");
@@ -273,28 +297,84 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
                             }
                         }
                     } else if ("javdb".equals(website)) {
+                        crawlerLoveFoot.setName(crawlerLoveFoot.getName().replace("%", "%"));
+
                         searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(crawlerLoveFoot.getName()).concat("&f=all");
                         header3Map.put("referer", searchUrl);
 
-                        searchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
+                        subsearch:
+                        {
+                            searchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
+                            itembSelects = searchDocument.select("div.movie-list").select("div.item");
+                            if (itembSelects.size() != 0) {
+                                for (Element itembSelect : itembSelects) {
+                                    title = itembSelect.select("a.box").get(0).attr("title");
+                                    if (title.contains(crawlerLoveFoot.getName())) {
+                                        codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                        break subsearch;
+                                    }
+
+                                    String newName = crawlerLoveFoot.getName().replace("●", "さ");
+                                    if (title.contains(newName)) {
+                                        codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                        crawlerLoveFoot.setName(newName);
+                                        break subsearch;
+                                    }
+
+                                    newName = crawlerLoveFoot.getName().replace("●", "這");
+                                    if (title.contains(newName)) {
+                                        codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                        crawlerLoveFoot.setName(newName);
+                                        break subsearch;
+                                    }
+
+                                    newName = crawlerLoveFoot.getName().replace("○", "〇");
+                                    if (title.contains(newName)) {
+                                        codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                        crawlerLoveFoot.setName(newName);
+                                        break subsearch;
+                                    }
+                                }
+
+                            }
 
-                        itembSelects = searchDocument.select("div.movie-list").select("div.item");
-                        if (itembSelects.size() == 0) {
                             String newName = crawlerLoveFoot.getName().substring(crawlerLoveFoot.getName().length() / 2);
                             searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(newName).concat("&f=all");
                             searchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
                             itembSelects = searchDocument.select("div.movie-list").select("div.item");
-                        }
+                            if (itembSelects.size() != 0) {
+                                for (Element itembSelect : itembSelects) {
+                                    title = itembSelect.select("a.box").get(0).attr("title");
+                                    if (title.contains(crawlerLoveFoot.getName())) {
+                                        codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                        break subsearch;
+                                    }
+
+                                    newName = crawlerLoveFoot.getName().replace("●", "さ");
+                                    if (title.contains(newName)) {
+                                        codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                        crawlerLoveFoot.setName(newName);
+                                        break subsearch;
+                                    }
+
+                                    newName = crawlerLoveFoot.getName().replace("●", "這");
+                                    if (title.contains(newName)) {
+                                        codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                        crawlerLoveFoot.setName(newName);
+                                        break subsearch;
+                                    }
 
-                        if (itembSelects.size() == 0) {
-                            throw new BusinessException(30000, "javdb search result null");
+                                    newName = crawlerLoveFoot.getName().replace("○", "〇");
+                                    if (title.contains(newName)) {
+                                        codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                        crawlerLoveFoot.setName(newName);
+                                        break subsearch;
+                                    }
+                                }
+                            }
                         }
                     }
 
-                    // 获取codeUrl
-                    String codeUrl = null;
-                    String title;
-
                     if ("javbus".equals(website)) {
                         for (Element itembSelect : itembSelects) {
                             title = itembSelect.select("a.movie-box").get(0).select("div.photo-frame > img").attr("title");
@@ -333,20 +413,6 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
                             throw new BusinessException(30000, "javbus search result mismatch");
                         }
                     } else if ("javdb".equals(website)) {
-                        for (Element itembSelect : itembSelects) {
-                            title = itembSelect.select("a.box").get(0).attr("title");
-                            if (title.contains(crawlerLoveFoot.getName())) {
-                                codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
-                                break;
-                            }
-
-                            String newName = crawlerLoveFoot.getName().replace("●", "さ");
-                            if (title.contains(newName)) {
-                                codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
-                                crawlerLoveFoot.setName(newName);
-                                break;
-                            }
-                        }
                         if (StringUtils.isEmpty(codeUrl)) {
                             throw new BusinessException(30000, "javdb search result mismatch");
                         }
@@ -477,7 +543,7 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
         long start = System.currentTimeMillis();
         Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
 
-        String fileName = issueDate.concat(" ").concat(iCode).concat(" ").concat(crawlerLoveFoot.getName());
+        String fileName = issueDate.concat(" ").concat(iCode).concat(" ").concat(StringUtils.escapeJavParam(crawlerLoveFoot.getName()));
         byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
         if (imgUrlBytes.length > 251) {
             byte[] imgUrlDestBytes = new byte[251];
@@ -517,15 +583,28 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
         header2Map.put("referer", avnoashiUrl.concat("?sort=newer"));
         Document loveFootDocument;
         Document loveFootDetailDocument;
+        boolean tiaoguoFlag = true;
+        int pageNum = 0;
         outer:
         while (true) {
             loveFootDocument = JsoupUtil.requestDocument(avnoashiUrl, JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
+
+            pageNum++;
+            if (pageNum > 50) {
+                break;
+            }
+
             log.warn("jsoupLoveFoot4avnoashiSub page success:url={}", avnoashiUrl);
 
             Elements sourceSelects = loveFootDocument.select("div.dividerBottom > div.archive").select("div.archive__contents").select("h2");
             for (Element sourceSelect : sourceSelects) {
                 String sourceUrl = sourceSelect.select("a").attr("abs:href");
 
+                if (tiaoguoFlag) {
+                    tiaoguoFlag = false;
+                    continue;
+                }
+
                 Integer statusInt = 2;
                 Integer typeInt = 1;
                 LocalDate clockDate = null;
@@ -533,14 +612,14 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
                 String keywords = null;
                 try {
                     loveFootDetailDocument = JsoupUtil.requestDocument(sourceUrl, JsoupUtil.HTTP_GET, proxy, null, header2Map, null);
-                    String clockDateStr = loveFootDetailDocument.select("div.viral").select("li.icon-clock").text();
+                    /*String clockDateStr = loveFootDetailDocument.select("div.viral").select("li.icon-clock").text();
                     String updateDateStr = loveFootDetailDocument.select("div.viral").select("li.icon-update").text();
                     clockDate = LocalDate.parse(clockDateStr, DateUtils.dateFormatter3);
                     updateDate = LocalDate.parse(updateDateStr, DateUtils.dateFormatter3);
 
                     if (updateDate.isBefore(latestDate) || updateDate.isEqual(latestDate)) {
                         break outer;
-                    }
+                    }*/
 
                     // 获取关键词
                     keywords = loveFootDetailDocument.select("div.postContents").select("td:contains(タイトル)").next("td").text();
@@ -567,20 +646,26 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
                         throw new Exception(message);
                     }
 
-                    crawlerLoveFootMapper.insertOrUpdate4avnoashi(crawlerLoveFoot);
+                    CrawlerLoveFoot exist = crawlerLoveFootMapper.findLoveFootByOrginAvnoashiUrl(sourceUrl);
+                    if (exist == null || exist.getStatus() != 3) {
+                        crawlerLoveFootMapper.insertOrUpdate4avnoashi(crawlerLoveFoot);
+                    }
                 } catch (Exception e) {
-                    log.error("jsoupLoveFoot4avnoashiSub detail fail,sourceUrl={}", sourceUrl, e);
-                    CrawlerLoveFoot crawlerLoveFoot = new CrawlerLoveFoot();
-                    crawlerLoveFoot.setIdentificationCode(UUID.randomUUID().toString());
-                    crawlerLoveFoot.setOrginAvnoashiUrl(sourceUrl);
-                    crawlerLoveFoot.setClockDate(clockDate);
-                    crawlerLoveFoot.setUpdateDate(updateDate);
-                    crawlerLoveFoot.setName(keywords);
-                    crawlerLoveFoot.setType(typeInt);
-                    crawlerLoveFoot.setStatus(statusInt);
-                    crawlerLoveFoot.setCreateTime(LocalDateTime.now());
-                    crawlerLoveFoot.setFailureCause(e.getMessage());
-                    crawlerLoveFootMapper.insertOrUpdate4avnoashi(crawlerLoveFoot);
+                    Integer exist = crawlerLoveFootMapper.existLoveFootByOrginAvnoashiUrl(sourceUrl);
+                    if (exist == null) {
+                        log.error("jsoupLoveFoot4avnoashiSub detail fail,sourceUrl={}", sourceUrl, e);
+                        CrawlerLoveFoot crawlerLoveFoot = new CrawlerLoveFoot();
+                        crawlerLoveFoot.setIdentificationCode(UUID.randomUUID().toString());
+                        crawlerLoveFoot.setOrginAvnoashiUrl(sourceUrl);
+                        crawlerLoveFoot.setClockDate(clockDate);
+                        crawlerLoveFoot.setUpdateDate(updateDate);
+                        crawlerLoveFoot.setName(keywords);
+                        crawlerLoveFoot.setType(typeInt);
+                        crawlerLoveFoot.setStatus(statusInt);
+                        crawlerLoveFoot.setCreateTime(LocalDateTime.now());
+                        crawlerLoveFoot.setFailureCause(e.getMessage());
+                        crawlerLoveFootMapper.insertOrUpdate4avnoashi(crawlerLoveFoot);
+                    }
                 }
             }
 
@@ -699,6 +784,11 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
             long start = System.currentTimeMillis();
             Elements itembSelects = null;
             try {
+                Thread.sleep(3000);
+
+                // 获取codeUrl
+                String codeUrl = null;
+                String title;
                 if ("javbus".equals(website)) {
                     String javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size())));
                     String javbusSearchUrl = javbusUrl.concat("/search/").concat(keywords).concat("&parent=ce");
@@ -747,28 +837,83 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
                         throw new BusinessException(30000, "javbus search result null");
                     }
                 } else if ("javdb".equals(website)) {
+                    crawlerLoveFoot.setName(crawlerLoveFoot.getName().replace("%", "%").replace("#", "#").replace("?", "?"));
+
                     String searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(crawlerLoveFoot.getName()).concat("&f=all");
                     header3Map.put("referer", searchUrl);
 
-                    javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
+                    subsearch:
+                    {
+                        javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
+                        itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
+                        if (itembSelects.size() != 0) {
+                            for (Element itembSelect : itembSelects) {
+                                title = itembSelect.select("a.box").get(0).attr("title");
+                                if (title.contains(crawlerLoveFoot.getName())) {
+                                    codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                    break subsearch;
+                                }
+
+                                String newName = crawlerLoveFoot.getName().replace("●", "さ");
+                                if (title.contains(newName)) {
+                                    codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                    crawlerLoveFoot.setName(newName);
+                                    break subsearch;
+                                }
+
+                                newName = crawlerLoveFoot.getName().replace("●", "這");
+                                if (title.contains(newName)) {
+                                    codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                    crawlerLoveFoot.setName(newName);
+                                    break subsearch;
+                                }
+
+                                newName = crawlerLoveFoot.getName().replace("○", "〇");
+                                if (title.contains(newName)) {
+                                    codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                    crawlerLoveFoot.setName(newName);
+                                    break subsearch;
+                                }
+                            }
+                        }
 
-                    itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
-                    if (itembSelects.size() == 0) {
                         String newName = crawlerLoveFoot.getName().substring(crawlerLoveFoot.getName().length() / 2);
                         searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(newName).concat("&f=all");
                         javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
                         itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
-                    }
+                        if (itembSelects.size() != 0) {
+                            for (Element itembSelect : itembSelects) {
+                                title = itembSelect.select("a.box").get(0).attr("title");
+                                if (title.contains(crawlerLoveFoot.getName())) {
+                                    codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                    break subsearch;
+                                }
 
-                    if (itembSelects.size() == 0) {
-                        throw new BusinessException(30000, "javdb search result null");
+                                newName = crawlerLoveFoot.getName().replace("●", "さ");
+                                if (title.contains(newName)) {
+                                    codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                    crawlerLoveFoot.setName(newName);
+                                    break subsearch;
+                                }
+
+                                newName = crawlerLoveFoot.getName().replace("●", "這");
+                                if (title.contains(newName)) {
+                                    codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                    crawlerLoveFoot.setName(newName);
+                                    break subsearch;
+                                }
+
+                                newName = crawlerLoveFoot.getName().replace("○", "〇");
+                                if (title.contains(newName)) {
+                                    codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
+                                    crawlerLoveFoot.setName(newName);
+                                    break subsearch;
+                                }
+                            }
+                        }
                     }
                 }
 
-                // 获取codeUrl
-                String codeUrl = null;
-                String title;
-
                 if ("javbus".equals(website)) {
                     for (Element itembSelect : itembSelects) {
                         title = itembSelect.select("a.movie-box").get(0).select("div.photo-frame > img").attr("title");
@@ -802,20 +947,6 @@ public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
                         throw new BusinessException(30000, "javbus search result mismatch");
                     }
                 } else if ("javdb".equals(website)) {
-                    for (Element itembSelect : itembSelects) {
-                        title = itembSelect.select("a.box").get(0).attr("title");
-                        if (title.contains(crawlerLoveFoot.getName())) {
-                            codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
-                            break;
-                        }
-
-                        String newName = crawlerLoveFoot.getName().replace("●", "さ");
-                        if (title.contains(newName)) {
-                            codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
-                            crawlerLoveFoot.setName(newName);
-                            break;
-                        }
-                    }
                     if (StringUtils.isEmpty(codeUrl)) {
                         throw new BusinessException(30000, "javdb search result mismatch");
                     }

+ 3 - 0
src/main/resources/static/crawler.html

@@ -244,6 +244,7 @@
             <span>status</span>
             <select name="status" style="height: 21.43px;">
                 <option value="2">获取关键词失败</option>
+                <option value="1">获取关键词成功</option>
                 <option value="4">爬取失败</option>
                 <option value="11">从0开始avnoashi</option>
                 <option value="12">从0开始jpfoot</option>
@@ -257,6 +258,8 @@
             <input type="text" name="isDel" placeholder="1:是,2:否。默认否"/>
             <span>ignoreRetryCount</span>
             <input type="text" name="ignoreRetryCount" placeholder="1:是,2:否。默认否"/>
+            <span>identificationCode</span>
+            <input type="text" name="identificationCode"/>
             <input type="submit" value="提交">
         </form>
     </div>