|
|
@@ -1,13 +1,15 @@
|
|
|
package top.lvzhiqiang;
|
|
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
+import org.jsoup.nodes.Element;
|
|
|
import org.jsoup.select.Elements;
|
|
|
+import top.lvzhiqiang.entity.CrawlerLoveFoot;
|
|
|
import top.lvzhiqiang.util.DateUtils;
|
|
|
import top.lvzhiqiang.util.JsoupUtil;
|
|
|
+import top.lvzhiqiang.util.StringUtils;
|
|
|
|
|
|
import java.net.InetSocketAddress;
|
|
|
import java.net.Proxy;
|
|
|
-import java.net.URLDecoder;
|
|
|
import java.net.URLEncoder;
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
import java.time.LocalDate;
|
|
|
@@ -15,39 +17,242 @@ import java.util.HashMap;
|
|
|
import java.util.Map;
|
|
|
|
|
|
public class Test7 {
|
|
|
+ private static final Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 7897));
|
|
|
+
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
- String keywords = "動画作品名:義父の濃厚な舌技で舐め堕ちした美人妻 小島みなみ ssni00953";
|
|
|
- keywords = "動画作品名:元地方局アナウンサーの人妻 初ドラマ作品!! 抱かれたくない男に死にたくなるほどイカされて… 田中なな実 jul00935";
|
|
|
- keywords ="動画作品名:義父の濃厚な舌技で舐め堕ちした美人妻 小島みなみ ssni00953";
|
|
|
- keywords = keywords.split(":")[1].trim();
|
|
|
- keywords = keywords.substring(0, keywords.lastIndexOf(" ")).trim();
|
|
|
+ boolean javdbFlag = true;
|
|
|
+ String source = "1";
|
|
|
+ String keywords = "";
|
|
|
+
|
|
|
+ if ("1".equals(source)) {
|
|
|
+ keywords = execFeetpassion();
|
|
|
+ } else if ("2".equals(source)) {
|
|
|
+ keywords = execJpfoot();
|
|
|
+ } else if ("3".equals(source)) {
|
|
|
+ keywords = execAvnoashi();
|
|
|
+ }
|
|
|
System.out.println(keywords);
|
|
|
|
|
|
+ if (javdbFlag && StringUtils.isNotEmpty(keywords)) {
|
|
|
+ parseJavdb(keywords, proxy);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String execAvnoashi() throws Exception {
|
|
|
+ Document loveFootDocument;
|
|
|
+ Document loveFootDetailDocument;
|
|
|
+ String avnoashiUrl = "https://avnoashi-1.com/category/ashi-av/";
|
|
|
+ Map<String, String> headerMap = new HashMap<>();
|
|
|
+ headerMap.put("referer", avnoashiUrl);
|
|
|
+ Map<String, String> header2Map = new HashMap<>();
|
|
|
+ header2Map.put("referer", avnoashiUrl.concat("?sort=newer"));
|
|
|
+ int pageNum = 0;
|
|
|
+ boolean tiaoguoFlag = true;
|
|
|
+ while (true) {
|
|
|
+ loveFootDocument = JsoupUtil.requestDocument(avnoashiUrl, JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
|
|
|
+
|
|
|
+ pageNum++;
|
|
|
+ if (pageNum > 2) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ Elements sourceSelects = loveFootDocument.select("div.dividerBottom > div.archive").select("div.archive__contents").select("h2");
|
|
|
+ for (Element sourceSelect : sourceSelects) {
|
|
|
+ String sourceUrl = sourceSelect.select("a").attr("abs:href");
|
|
|
+
|
|
|
+ if (tiaoguoFlag) {
|
|
|
+ tiaoguoFlag = false;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ Integer statusInt = 2;
|
|
|
+ Integer typeInt = 1;
|
|
|
+ LocalDate clockDate = null;
|
|
|
+ LocalDate updateDate = null;
|
|
|
+ String keywords = null;
|
|
|
+
|
|
|
+ loveFootDetailDocument = JsoupUtil.requestDocument(sourceUrl, JsoupUtil.HTTP_GET, proxy, null, header2Map, null);
|
|
|
+ /*String clockDateStr = loveFootDetailDocument.select("div.viral").select("li.icon-clock").text();
|
|
|
+ String updateDateStr = loveFootDetailDocument.select("div.viral").select("li.icon-update").text();
|
|
|
+ clockDate = LocalDate.parse(clockDateStr, DateUtils.dateFormatter3);
|
|
|
+ updateDate = LocalDate.parse(updateDateStr, DateUtils.dateFormatter3);
|
|
|
+
|
|
|
+ if (updateDate.isBefore(latestDate) || updateDate.isEqual(latestDate)) {
|
|
|
+ break outer;
|
|
|
+ }*/
|
|
|
+
|
|
|
+ // 获取关键词
|
|
|
+ keywords = loveFootDetailDocument.select("div.postContents").select("td:contains(タイトル)").next("td").text();
|
|
|
+ if (StringUtils.isNotEmpty(keywords)) {
|
|
|
+ statusInt = 1;
|
|
|
+ return keywords;
|
|
|
+ } else {
|
|
|
+ throw new Exception("keywords is null");
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ // 继续下一页
|
|
|
+ Elements nextSelects = loveFootDocument.select("ul.pager").select("a:contains(Next)");
|
|
|
+ if (nextSelects.size() > 0) {
|
|
|
+ avnoashiUrl = nextSelects.get(0).attr("abs:href");
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String execJpfoot() throws Exception {
|
|
|
+ String sourceUrl = "https://jp-foot.net/av/h_460mbmh00051/";
|
|
|
+ Map<String, String> headerMap = new HashMap<>();
|
|
|
+ headerMap.put("referer", sourceUrl);
|
|
|
+ Document loveFootDetailDocument = JsoupUtil.requestDocument(sourceUrl, JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
|
|
|
+ String dateStr = loveFootDetailDocument.select("div.avdetail_date").select("span.avdetail_dateText").text();
|
|
|
+
|
|
|
+ // 获取关键词
|
|
|
+ String keywords = loveFootDetailDocument.select("div.avdetail_detailTop").select("p.avdetail_detailTopTitle").text().trim();
|
|
|
+ if (StringUtils.isNotEmpty(keywords)) {
|
|
|
+ return keywords;
|
|
|
+ } else {
|
|
|
+ throw new Exception("keywords is null");
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
+ private static String execFeetpassion() throws Exception {
|
|
|
String sourceUrl = "https://feetpassion.net/ashi-name/ 【新木希空】丸みを帯びた新木希空さんの足の/";
|
|
|
sourceUrl = "https://feetpassion.net/ashi-name/%e3%80%80%e3%80%90%e6%9d%b1%e5%87%9b%e3%80%91%e5%8f%b3%e8%b6%b3%e3%81%af%e3%83%94%e3%83%b3%e3%82%af%e3%80%81%e5%b7%a6%e8%b6%b3%e3%81%af%e3%82%a4%e3%82%a8%e3%83%ad%e3%83%bc%e3%81%ae%e3%83%95%e3%83%83/";
|
|
|
sourceUrl = "https://feetpassion.net/ashi-name/【川上ゆう】暑い夏の今だからこそ、ロングブー/";
|
|
|
+ sourceUrl = "https://feetpassion.net/ashi-name/【女子大生】足舐めしながら生挿入するという完/";
|
|
|
+ sourceUrl = "https://feetpassion.net/ashi-name/有名大学卒-一流企業勤務-夫は会社役員の勝ち組f/";
|
|
|
+ sourceUrl = "https://feetpassion.net/ashi-name/篠田ゆうさんのギリシャ足を間近で見られる動画/";
|
|
|
+ sourceUrl = "https://feetpassion.net/ashi-name/【足裏くすぐり後『ムラムラしてきちゃったから/";
|
|
|
+
|
|
|
|
|
|
- Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 7897));
|
|
|
Map<String, String> headerMap = new HashMap<>();
|
|
|
headerMap.put("referer", sourceUrl);
|
|
|
Document loveFootDetailDocument = JsoupUtil.requestDocument(sourceUrl, JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
|
|
|
String clockDateStr = loveFootDetailDocument.select("div.dividerBottom > ul.dateList-main").select("li.icon-clock").text();
|
|
|
String updateDateStr = loveFootDetailDocument.select("div.dividerBottom > ul.dateList-main").select("li.icon-update").text();
|
|
|
- LocalDate clockDate = LocalDate.parse(clockDateStr, DateUtils.dateFormatter3);
|
|
|
- LocalDate updateDate = LocalDate.parse(updateDateStr, DateUtils.dateFormatter3);
|
|
|
-
|
|
|
+ LocalDate clockDate = null;
|
|
|
+ LocalDate updateDate = null;
|
|
|
+ if (StringUtils.isNotEmpty(clockDateStr)) {
|
|
|
+ clockDate = LocalDate.parse(clockDateStr, DateUtils.dateFormatter3);
|
|
|
+ }
|
|
|
+ if (StringUtils.isNotEmpty(updateDateStr)) {
|
|
|
+ updateDate = LocalDate.parse(updateDateStr, DateUtils.dateFormatter3);
|
|
|
+ }
|
|
|
+ if (clockDate == null) {
|
|
|
+ clockDate = updateDate;
|
|
|
+ }
|
|
|
+ if (updateDate == null) {
|
|
|
+ updateDate = clockDate;
|
|
|
+ }
|
|
|
|
|
|
// 获取关键词
|
|
|
Elements keywordsElements = loveFootDetailDocument.select("div.postContents").select("a > span");
|
|
|
+ String keywords = "";
|
|
|
if (!keywordsElements.isEmpty()) {
|
|
|
keywords = keywordsElements.get(0).text().trim();
|
|
|
} else {
|
|
|
keywordsElements = loveFootDetailDocument.select("div.postContents > section.content").select("p");
|
|
|
keywords = keywordsElements.get(0).text().trim();
|
|
|
keywords = keywords.split(":")[1].trim();
|
|
|
- keywords = keywords.substring(0, keywords.lastIndexOf(" ")).trim();
|
|
|
+
|
|
|
+ if (keywords.lastIndexOf(" ") >= 0) {
|
|
|
+ keywords = keywords.substring(0, keywords.lastIndexOf(" ")).trim();
|
|
|
+ }
|
|
|
}
|
|
|
- System.out.println(keywords);
|
|
|
+ return keywords;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void parseJavdb(String name, Proxy proxy) throws Exception {
|
|
|
+ CrawlerLoveFoot crawlerLoveFoot = new CrawlerLoveFoot();
|
|
|
+ crawlerLoveFoot.setName(name);
|
|
|
+ crawlerLoveFoot.setName(crawlerLoveFoot.getName().replace("%", "%").replace("#", "#").replace("?", "?"));
|
|
|
+ crawlerLoveFoot.setName(URLEncoder.encode(crawlerLoveFoot.getName(), StandardCharsets.UTF_8.toString()));
|
|
|
+
|
|
|
+ Map<String, String> javdbConstantMap = new HashMap<>();
|
|
|
+ javdbConstantMap.put("javdb", "https://javdb.com/");
|
|
|
+ String searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(crawlerLoveFoot.getName()).concat("&f=all");
|
|
|
+ Map<String, String> header3Map = new HashMap<>();
|
|
|
+ header3Map.put("referer", searchUrl);
|
|
|
+
|
|
|
+ String codeUrl = null;
|
|
|
+ subsearch:
|
|
|
+ {
|
|
|
+ Document javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
+ Elements itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
|
|
|
+ String title;
|
|
|
+
|
|
|
+
|
|
|
+ if (itembSelects.size() != 0) {
|
|
|
+ for (Element itembSelect : itembSelects) {
|
|
|
+ title = itembSelect.select("a.box").get(0).attr("title");
|
|
|
+ if (title.contains(crawlerLoveFoot.getName())) {
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
+ break subsearch;
|
|
|
+ }
|
|
|
+
|
|
|
+ String newName = crawlerLoveFoot.getName().replace("●", "さ");
|
|
|
+ if (title.contains(newName)) {
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
+ crawlerLoveFoot.setName(newName);
|
|
|
+ break subsearch;
|
|
|
+ }
|
|
|
+
|
|
|
+ newName = crawlerLoveFoot.getName().replace("●", "這");
|
|
|
+ if (title.contains(newName)) {
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
+ crawlerLoveFoot.setName(newName);
|
|
|
+ break subsearch;
|
|
|
+ }
|
|
|
+
|
|
|
+ newName = crawlerLoveFoot.getName().replace("○", "〇");
|
|
|
+ if (title.contains(newName)) {
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
+ crawlerLoveFoot.setName(newName);
|
|
|
+ break subsearch;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ String newName = crawlerLoveFoot.getName().substring(crawlerLoveFoot.getName().length() / 2);
|
|
|
+ searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(newName).concat("&f=all");
|
|
|
+ javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
+ itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
|
|
|
+ if (itembSelects.size() != 0) {
|
|
|
+ for (Element itembSelect : itembSelects) {
|
|
|
+ title = itembSelect.select("a.box").get(0).attr("title");
|
|
|
+ if (title.contains(crawlerLoveFoot.getName())) {
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
+ break subsearch;
|
|
|
+ }
|
|
|
+
|
|
|
+ newName = crawlerLoveFoot.getName().replace("●", "さ");
|
|
|
+ if (title.contains(newName)) {
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
+ crawlerLoveFoot.setName(newName);
|
|
|
+ break subsearch;
|
|
|
+ }
|
|
|
+
|
|
|
+ newName = crawlerLoveFoot.getName().replace("●", "這");
|
|
|
+ if (title.contains(newName)) {
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
+ crawlerLoveFoot.setName(newName);
|
|
|
+ break subsearch;
|
|
|
+ }
|
|
|
+
|
|
|
+ newName = crawlerLoveFoot.getName().replace("○", "〇");
|
|
|
+ if (title.contains(newName)) {
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
+ crawlerLoveFoot.setName(newName);
|
|
|
+ break subsearch;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+ System.out.println("codeUrl:" + codeUrl);
|
|
|
}
|
|
|
}
|