Forráskód Böngészése

add:jsoupLoveFoot4avnoashiSub v1

tujidelv 3 éve
szülő
commit
f0c21caf95

+ 13 - 0
src/main/java/top/lvzhiqiang/config/MyJobs.java

@@ -14,6 +14,7 @@ import top.lvzhiqiang.entity.VideoSitePool;
 import top.lvzhiqiang.mapper.VideoSitePoolMapper;
 import top.lvzhiqiang.service.BgService;
 import top.lvzhiqiang.service.Crawler4JavbusService;
+import top.lvzhiqiang.service.Crawler4LoveFootService;
 import top.lvzhiqiang.service.CrawlerService;
 
 import javax.annotation.Resource;
@@ -42,6 +43,8 @@ public class MyJobs {
     private CrawlerService crawlerService;
     @Resource
     private Crawler4JavbusService crawler4JavbusService;
+    @Resource
+    private Crawler4LoveFootService crawler4LoveFootService;
 
     private static final String SCHEDULED_ZONE = "Asia/Shanghai";
 
@@ -187,4 +190,14 @@ public class MyJobs {
 
         crawler4JavbusService.jsoupJavbusProfile(null, null);
     }
+
+    /**
+     * 每天21:00 jsoupLoveFoot
+     */
+    @Scheduled(cron = "0 00 21 * * ?", zone = SCHEDULED_ZONE)
+    public void jsoupLoveFoot() throws Exception {
+        log.warn("jsoupLoveFoot开始==============================");
+
+        crawler4LoveFootService.jsoupLoveFoot4avnoashi(null, 2, 2);
+    }
 }

+ 23 - 0
src/main/java/top/lvzhiqiang/controller/CrawlerController.java

@@ -7,6 +7,7 @@ import top.lvzhiqiang.dto.R;
 import top.lvzhiqiang.exception.ParameterException;
 import top.lvzhiqiang.service.Crawler4FacebookService;
 import top.lvzhiqiang.service.Crawler4JavbusService;
+import top.lvzhiqiang.service.Crawler4LoveFootService;
 import top.lvzhiqiang.service.CrawlerService;
 import top.lvzhiqiang.util.StringUtils;
 
@@ -28,6 +29,8 @@ public class CrawlerController {
     private Crawler4FacebookService crawler4FacebookService;
     @Resource
     private Crawler4JavbusService crawler4JavbusService;
+    @Resource
+    private Crawler4LoveFootService crawler4LoveFootService;
 
     /**
      * findXiaoeknowCourse
@@ -226,4 +229,24 @@ public class CrawlerController {
 
         return crawler4JavbusService.findJavbusProfile(keyword, timeDay, pic, orderField, order, pageNo, pageSize);
     }
+
+    /**
+     * Jsoup LoveFoot
+     *
+     * @author lvzhiqiang
+     * 2022/11/26 15:50
+     */
+    @RequestMapping("/jsoupLoveFoot")
+    @ResponseBody
+    public R jsoupLoveFoot(Integer status, Integer isDel, Integer ignoreRetryCount) throws Exception {
+        if (null == isDel) {
+            isDel = 2;
+        }
+        if (null == ignoreRetryCount) {
+            ignoreRetryCount = 2;
+        }
+
+        crawler4LoveFootService.jsoupLoveFoot4avnoashi(status, isDel, ignoreRetryCount);
+        return R.ok();
+    }
 }

+ 129 - 0
src/main/java/top/lvzhiqiang/entity/CrawlerLoveFoot.java

@@ -0,0 +1,129 @@
+package top.lvzhiqiang.entity;
+
+import com.fasterxml.jackson.annotation.JsonFormat;
+import lombok.Data;
+import top.lvzhiqiang.util.DateUtils;
+
+import java.io.Serializable;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+
+/**
+ * LoveFoot 信息表
+ *
+ * @author lvzhiqiang
+ * 2022/11/26 15:50
+ */
+@Data
+public class CrawlerLoveFoot implements Serializable {
+
+    /**
+     * 主键
+     */
+    private Long id;
+
+    /**
+     * 名称
+     */
+    private String name;
+
+    /**
+     * 识别码
+     */
+    private String identificationCode;
+
+    /**
+     * 发行日期
+     */
+    @JsonFormat(pattern = DateUtils.PATTERN_TO_DAYS)
+    private LocalDate issueDate;
+
+    /**
+     * 长度
+     */
+    private String length;
+
+    /**
+     * 导演
+     */
+    private String director;
+
+    /**
+     * 制作商
+     */
+    private String maker;
+
+    /**
+     * 发行商
+     */
+    private String issuer;
+
+    /**
+     * 类别
+     */
+    private String genres;
+
+    /**
+     * 演员
+     */
+    private String cast;
+
+    /**
+     * 图片URL
+     */
+    private String imgUrl;
+
+    /**
+     * 原始URL
+     */
+    private String orginUrl;
+
+    /**
+     * 类型{1:待审查,2:审查已通过,3:审查未通过}
+     */
+    private Integer type;
+
+    /**
+     * 状态(1:获取识别码成功,2:获取识别码失败,3:爬取成功,4:爬取失败)
+     */
+    private Integer status;
+
+    /**
+     * 失败原因
+     */
+    private String failureCause;
+
+    /**
+     * 已重试次数
+     */
+    private Integer retryCount;
+
+    /**
+     * 删除标志{1:正常,2:已删除}
+     */
+    private Integer deleteFlag;
+
+    /**
+     * clock_date
+     */
+    @JsonFormat(pattern = DateUtils.PATTERN_TO_DAYS)
+    private LocalDate clockDate;
+
+    /**
+     * update_date
+     */
+    @JsonFormat(pattern = DateUtils.PATTERN_TO_DAYS)
+    private LocalDate updateDate;
+
+    /**
+     * 创建时间
+     */
+    @JsonFormat(pattern = DateUtils.PATTERN_TO_SECONDS)
+    private LocalDateTime createTime;
+
+    /**
+     * 最后修改时间
+     */
+    @JsonFormat(pattern = DateUtils.PATTERN_TO_SECONDS)
+    private LocalDateTime modifyTime;
+}

+ 76 - 0
src/main/java/top/lvzhiqiang/mapper/CrawlerLoveFootMapper.java

@@ -0,0 +1,76 @@
+package top.lvzhiqiang.mapper;
+
+import org.apache.ibatis.annotations.*;
+import top.lvzhiqiang.entity.CrawlerLoveFoot;
+import top.lvzhiqiang.entity.VideoGenres;
+
+import java.util.List;
+
+/**
+ * LoveFoot Mapper
+ *
+ * @author lvzhiqiang
+ * 2022/11/26 15:50
+ */
+public interface CrawlerLoveFootMapper {
+
+    /**
+     * 删除所有
+     */
+    @Delete("DELETE FROM crawler_lovefoot_info where 1=1")
+    void deleteAll();
+
+    /**
+     * 查询最新的一条
+     */
+    @Select("SELECT * FROM crawler_lovefoot_info WHERE delete_flag = 1 order by update_date desc limit 1")
+    CrawlerLoveFoot findLatestInfo();
+
+    /**
+     * 新增
+     *
+     * @param crawlerLoveFoot
+     */
+    @Insert("INSERT INTO crawler_lovefoot_info(name, identification_code, issue_date, length, director, maker, issuer, genres, cast, img_url, orgin_url, type, status, failure_cause, clock_date, update_date, create_time, modify_time) " +
+            "VALUES (#{name}, #{identificationCode}, #{issueDate}, #{length}, #{director}, #{maker}, #{issuer}, #{genres}, #{cast}, #{imgUrl}, #{orginUrl}, #{type}, #{status}, #{failureCause}, #{clockDate}, #{updateDate}, #{createTime}, now()) " +
+            "ON DUPLICATE KEY UPDATE name=values(name),issue_date=values(issue_date),length=values(length),director=values(director),maker=values(maker),issuer=values(issuer),genres=values(genres),cast=values(cast)," +
+            "img_url=values(img_url),orgin_url=values(orgin_url),type=values(type),status=values(status),failure_cause=values(failure_cause),clock_date=values(clock_date),update_date=values(update_date),modify_time=now()")
+    int insertOrUpdate(CrawlerLoveFoot crawlerLoveFoot);
+
+    /**
+     * 根据条件查询
+     */
+    @Select("select '待审查' name union select '审查中' name union select '审查已通过' name union select '审查未通过' name")
+    List<VideoGenres> findGenres();
+
+    /**
+     * 查询所有
+     */
+    @Select("SELECT * FROM crawler_lovefoot_info WHERE delete_flag = 1")
+    List<CrawlerLoveFoot> findAll();
+
+    /**
+     * 根据识别码和类型查询
+     *
+     * @return
+     */
+    @Select({"<script>" +
+            "select vip.*,vio.score,vio.comment  from crawler_lovefoot_info vip left join video_info_other vio on vip.identification_code = vio.identification_code and vio.delete_flag = 1 WHERE vip.delete_flag = 1" +
+            "<if test=\"identificationCode != null and identificationCode != ''\">" +
+            "   and vip.identification_code like concat('%',#{identificationCode},'%')" +
+            "</if>" +
+            "<if test=\"type != null and type != ''\">" +
+            "   and vip.type = #{type}" +
+            "</if>" +
+            "<if test=\"order != null and order != ''\">" +
+            "   order by vip.issue_date ${order}" +
+            "</if>" +
+            "</script>"})
+    List<CrawlerLoveFoot> findByCodeAndType(String identificationCode, Integer type, String order);
+
+    @Update("update crawler_lovefoot_info set type = #{type},modify_time = now() where identification_code = #{identificationCode}")
+    void updateTypeByCode(String identificationCode, Integer type);
+
+    @Update("update crawler_lovefoot_info set delete_flag = 2,modify_time = now() where identification_code = #{identificationCode}")
+    void delByCode(String identificationCode);
+}

+ 12 - 0
src/main/java/top/lvzhiqiang/service/Crawler4LoveFootService.java

@@ -0,0 +1,12 @@
+package top.lvzhiqiang.service;
+
+/**
+ * Crawler LoveFoot Service
+ *
+ * @author lvzhiqiang
+ * 2022/11/26 15:50
+ */
+public interface Crawler4LoveFootService {
+
+    void jsoupLoveFoot4avnoashi(Integer status, Integer isDel, Integer ignoreRetryCount) throws Exception;
+}

+ 359 - 0
src/main/java/top/lvzhiqiang/service/impl/Crawler4LoveFootServiceImpl.java

@@ -0,0 +1,359 @@
+package top.lvzhiqiang.service.impl;
+
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Connection;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.scheduling.annotation.Async;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Propagation;
+import org.springframework.transaction.annotation.Transactional;
+import org.springframework.util.StopWatch;
+import top.lvzhiqiang.entity.CrawlerLoveFoot;
+import top.lvzhiqiang.entity.DicCode;
+import top.lvzhiqiang.exception.BusinessException;
+import top.lvzhiqiang.mapper.CrawlerLoveFootMapper;
+import top.lvzhiqiang.mapper.DicCodeMapper;
+import top.lvzhiqiang.mapper.VideoSitePoolMapper;
+import top.lvzhiqiang.service.Crawler4LoveFootService;
+import top.lvzhiqiang.util.DateUtils;
+import top.lvzhiqiang.util.JsoupUtil;
+import top.lvzhiqiang.util.StringUtils;
+
+import javax.annotation.Resource;
+import java.io.*;
+import java.net.InetSocketAddress;
+import java.net.Proxy;
+import java.nio.charset.StandardCharsets;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.Collectors;
+
+/**
+ * Crawler LoveFoot ServiceImpl
+ *
+ * @author lvzhiqiang
+ * 2022/10/17 14:47
+ */
+@Service
+@Slf4j
+public class Crawler4LoveFootServiceImpl implements Crawler4LoveFootService {
+
+    @Resource
+    private DicCodeMapper dicCodeMapper;
+    @Resource
+    private CrawlerLoveFootMapper crawlerLoveFootMapper;
+    @Resource
+    private VideoSitePoolMapper videoSitePoolMapper;
+    @Value("${spring.profiles.active}")
+    private String env;
+
+    Map<String, String> footConstantMap = null;
+    Map<String, String> javbusConstantMap = null;
+    List<String> javbusUrlList = null;
+    Map<String, String> headerMap = new HashMap<>();
+    Map<String, String> header2Map = new HashMap<>();
+    Proxy proxy = null;
+
+    public void beforeProxy() {
+        if (null == proxy) {
+            if ("dev".equals(env)) {
+                proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress("127.0.0.1", 1080));
+            } else {
+                proxy = Proxy.NO_PROXY;
+            }
+        }
+    }
+
+    @Async
+    @Override
+    @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
+    public void jsoupLoveFoot4avnoashi(Integer status, Integer isDel, Integer ignoreRetryCount) throws Exception {
+        log.warn("jsoupFoot4avnoashi 开始:status={},isDel={},ignoreRetryCount={}", status, isDel, ignoreRetryCount);
+        StopWatch stopWatch = new StopWatch();
+        stopWatch.start();
+        if (isDel == 1) {
+            crawlerLoveFootMapper.deleteAll();
+        }
+
+        List<DicCode> dicCodeList = dicCodeMapper.findAll();
+        // 获取常量MAP
+        footConstantMap = dicCodeList.stream()
+                .filter(x -> "foot".equals(x.getCodeDesc()) && x.getEnv().contains(env))
+                .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
+        javbusConstantMap = dicCodeList.stream()
+                .filter(x -> x.getType() != null && 1 == x.getType() && x.getEnv().contains(env))
+                .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
+        // 获取javbus防屏蔽地址
+        javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
+        if (javbusUrlList.size() == 0) {
+            log.warn("javbusUrlList为空");
+            return;
+        }
+        // 代理及TOKEN设置
+        beforeProxy();
+        // 解析原始站点
+        jsoupLoveFoot4avnoashiSub(status, ignoreRetryCount);
+        log.warn("jsoupFoot4avnoashi 结束:time={}", stopWatch.getTotalTimeSeconds());
+    }
+
+    @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
+    public void jsoupLoveFoot4avnoashiSub(Integer status, Integer ignoreRetryCount) throws Exception {
+        CrawlerLoveFoot latestLoveFoot = crawlerLoveFootMapper.findLatestInfo();
+
+        LocalDate latestDate;
+        if (latestLoveFoot == null) {
+            latestDate = LocalDate.of(1970, 1, 1);
+        } else {
+            latestDate = latestLoveFoot.getUpdateDate();
+        }
+
+        String avnoashiUrl = footConstantMap.get("avnoashi_url");
+        headerMap.put("referer", avnoashiUrl);
+        header2Map.put("referer", avnoashiUrl.concat("?sort=newer"));
+        Document loveFootDocument;
+        Document loveFootDetailDocument;
+        outer:
+        while (true) {
+            loveFootDocument = JsoupUtil.requestDocument(avnoashiUrl, JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
+            log.warn("jsoupLoveFoot4avnoashiSub page success:url={}", avnoashiUrl);
+
+            Elements sourceSelects = loveFootDocument.select("div.dividerBottom > div.archive").select("div.archive__contents").select("h2");
+            for (Element sourceSelect : sourceSelects) {
+                String sourceUrl = sourceSelect.select("a").attr("abs:href");
+
+                Integer statusInt = 2;
+                try {
+                    loveFootDetailDocument = JsoupUtil.requestDocument(sourceUrl, JsoupUtil.HTTP_GET, proxy, null, header2Map, null);
+                    String clockDateStr = loveFootDetailDocument.select("div.viral").select("li.icon-clock").text();
+                    String updateDateStr = loveFootDetailDocument.select("div.viral").select("li.icon-update").text();
+                    LocalDate clockDate = LocalDate.parse(clockDateStr, DateUtils.dateFormatter3);
+                    LocalDate updateDate = LocalDate.parse(updateDateStr, DateUtils.dateFormatter3);
+
+                    if (updateDate.isBefore(latestDate) || updateDate.isEqual(latestDate)) {
+                        break outer;
+                    }
+
+                    // 获取关键词
+                    String keywords = loveFootDetailDocument.select("div.postContents").select("td:contains(タイトル)").next("td").text();
+                    if (StringUtils.isNotEmpty(keywords)) {
+                        statusInt = 1;
+                        log.warn("jsoupLoveFoot4avnoashiSub parseDetailToKeywords success,sourceUrl={},keywords={}", sourceUrl, keywords);
+                    } else {
+                        throw new Exception("keywords is null");
+                    }
+
+                    // 通过关键词获取识别码
+                    CrawlerLoveFoot crawlerLoveFoot = new CrawlerLoveFoot();
+                    crawlerLoveFoot.setClockDate(clockDate);
+                    crawlerLoveFoot.setUpdateDate(updateDate);
+                    crawlerLoveFoot.setOrginUrl(sourceUrl);
+                    crawlerLoveFoot.setType(1);
+                    crawlerLoveFoot.setStatus(3);
+                    String message = parseKeywordsToCode(crawlerLoveFoot, keywords);
+                    if (StringUtils.isNotEmpty(message)) {
+                        statusInt = 4;
+                        throw new Exception(message);
+                    }
+
+                    crawlerLoveFootMapper.insertOrUpdate(crawlerLoveFoot);
+                } catch (Exception e) {
+                    log.error("jsoupLoveFoot4avnoashiSub detail fail,sourceUrl={}", sourceUrl, e);
+                    CrawlerLoveFoot crawlerLoveFoot = new CrawlerLoveFoot();
+                    crawlerLoveFoot.setIdentificationCode(UUID.randomUUID().toString());
+                    crawlerLoveFoot.setOrginUrl(sourceUrl);
+                    crawlerLoveFoot.setType(1);
+                    crawlerLoveFoot.setStatus(statusInt);
+                    crawlerLoveFoot.setCreateTime(LocalDateTime.now());
+                    crawlerLoveFoot.setFailureCause(e.getMessage());
+                    crawlerLoveFootMapper.insertOrUpdate(crawlerLoveFoot);
+                }
+            }
+
+            // 继续下一页
+            Elements nextSelects = loveFootDocument.select("ul.pager").select("a:contains(Next)");
+            if (nextSelects.size() > 0) {
+                avnoashiUrl = nextSelects.get(0).attr("abs:href");
+            } else {
+                break;
+            }
+        }
+    }
+
+    private String parseKeywordsToCode(CrawlerLoveFoot crawlerLoveFoot, String keywords) {
+        int retryCount = 0;
+        Document javbusSearchDocument;
+        Document javbusCodeDocument;
+        String message = null;
+        while (retryCount <= 3) {
+            long start = System.currentTimeMillis();
+            String javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size())));
+            String javbusSearchUrl = javbusUrl.concat("/search/").concat(keywords).concat("&parent=ce");
+            try {
+                javbusSearchDocument = JsoupUtil.requestDocument(javbusSearchUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
+
+                Elements itembSelects = javbusSearchDocument.select("div#waterfall").select("div.item");
+                if (itembSelects.size() == 0) {
+                    throw new BusinessException(30000, "search result null");
+                }
+
+                // 获取codeUrl
+                String codeUrl = itembSelects.select("a.movie-box").get(0).attr("abs:href");
+                // 解析codeUrl
+                javbusCodeDocument = JsoupUtil.requestDocument(codeUrl, JsoupUtil.HTTP_GET, proxy, null, null, null);
+                long picTime = parseJavbusCodeDocument(javbusCodeDocument, crawlerLoveFoot);
+
+                crawlerLoveFoot.setRetryCount(retryCount);
+                log.warn("jsoupLoveFoot4avnoashiSub parseKeywordsToCode success,keywords={},code={},picTime={},time={}", keywords, crawlerLoveFoot.getIdentificationCode(), picTime, System.currentTimeMillis() - start);
+
+                break;
+            } catch (Exception e) {
+                ++retryCount;
+
+                if (retryCount < 4) {
+                    log.error("javbusSearch error重试:,retryCount={},time={},keywords={}", retryCount, System.currentTimeMillis() - start, keywords, e);
+                } else if (retryCount == 4) {
+                    message = e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200);
+                }
+
+                if (e instanceof BusinessException) {
+                    message = e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200);
+                    break;
+                }
+            }
+        }
+        return message;
+    }
+
+    private long parseJavbusCodeDocument(Document document, CrawlerLoveFoot crawlerLoveFoot) throws Exception {
+        Elements container = document.select("div.container");
+        if (container.size() == 0) {
+            throw new BusinessException(30000, "番号无效!");
+        }
+
+        // 名称
+        String h3 = container.select("h3").first().text();
+        String[] nameArr = h3.split("\\s+");
+        if (nameArr.length > 1) {
+            crawlerLoveFoot.setName(h3.substring(nameArr[0].length()).trim());
+        } else {
+            crawlerLoveFoot.setName(nameArr[0]);
+        }
+
+        Elements pEles = container.select("div.info > p");
+        // 识别码
+        Element pEle = pEles.get(0);
+        String iCode = pEle.select("span[style]").first().text();
+        crawlerLoveFoot.setIdentificationCode(iCode);
+        // 发行日期
+        pEle = pEles.get(1);
+        String issueDate = pEle.text().split(":")[1].replace("\"", "").trim();
+        crawlerLoveFoot.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
+        // 长度
+        pEle = pEles.get(2);
+        String length = pEle.text().split(":")[1].replace("\"", "").trim();
+        crawlerLoveFoot.setLength(length);
+        // 导演
+        Elements directorEles = container.select("div.info").select("p:contains(導演)");
+        if (directorEles.size() > 0) {
+            pEle = directorEles.first().select("a[href]").first();
+            crawlerLoveFoot.setDirector(pEle.text());
+        }
+        // 制作商
+        Elements markerEles = container.select("div.info").select("p:contains(製作商)");
+        if (markerEles.size() > 0) {
+            pEle = markerEles.first().select("a[href]").first();
+            crawlerLoveFoot.setMaker(pEle.text());
+        }
+        // 发行商
+        Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
+        if (issuerEles.size() > 0) {
+            pEle = issuerEles.first().select("a[href]").first();
+            crawlerLoveFoot.setIssuer(pEle.text());
+        }
+        // 类别
+        Elements genresEles = container.select("div.info").select("p:contains(類別)");
+        if (genresEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text()).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            crawlerLoveFoot.setGenres(sb.toString());
+        }
+        // 演员
+        Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
+        if (castEles.size() > 0) {
+            Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
+            if (castElesTemp.size() == 0) {
+                StringBuffer sb = new StringBuffer();
+                Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
+                for (Element ahrefEle : ahrefEles) {
+                    sb.append(ahrefEle.text()).append(",");
+                }
+                if (sb.length() > 0) {
+                    sb = sb.deleteCharAt(sb.length() - 1);
+                }
+                crawlerLoveFoot.setCast(sb.toString());
+            }
+        }
+        // 图片URL
+        String href = container.select("a.bigImage").first().attr("abs:href");
+
+        long start = System.currentTimeMillis();
+        Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+
+        String fileName = issueDate.concat(" ").concat(h3);
+        byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
+        if (imgUrlBytes.length > 251) {
+            byte[] imgUrlDestBytes = new byte[251];
+            System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
+            fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
+        }
+        fileName = fileName.concat(".jpg");
+
+        String machiImgUrl = "足舐/".concat(fileName);
+
+        saveFile(response.bodyStream(), javbusConstantMap.get("apics_path").concat(machiImgUrl));
+        long end = System.currentTimeMillis();
+
+        crawlerLoveFoot.setImgUrl(machiImgUrl);
+        crawlerLoveFoot.setCreateTime(LocalDateTime.now());
+
+        return end - start;
+    }
+
+    /**
+     * 保存文件到本地
+     *
+     * @param bufferedInputStream
+     * @param savePath
+     */
+    private void saveFile(BufferedInputStream bufferedInputStream, String savePath) throws IOException {
+        //一次最多读取1k
+        byte[] buffer = new byte[1024];
+        //实际读取的长度
+        int readLenghth;
+        //创建的一个写出的缓冲流
+        BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(new File(savePath)));
+        //文件逐步写入本地
+        while ((readLenghth = bufferedInputStream.read(buffer, 0, 1024)) != -1) {//先读出来,保存在buffer数组中
+            bufferedOutputStream.write(buffer, 0, readLenghth);//再从buffer中取出来保存到本地
+        }
+        //关闭缓冲流
+        bufferedOutputStream.close();
+        bufferedInputStream.close();
+    }
+}
+
+

+ 3 - 0
src/main/java/top/lvzhiqiang/util/DateUtils.java

@@ -36,6 +36,8 @@ public class DateUtils {
     public static final String PATTERN_TO_DAYS = "yyyy-MM-dd";
     public static final String PATTERN_TO_DAYS2 = "yyyy.MM.dd";
 
+    public static final String PATTERN_TO_DAYS3 = "yyyy年M月d日";
+
     /**
      * (精确到秒的)日期样式
      */
@@ -44,6 +46,7 @@ public class DateUtils {
 
     public static final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern(PATTERN_TO_DAYS);
     public static final DateTimeFormatter dateFormatter2 = DateTimeFormatter.ofPattern(PATTERN_TO_DAYS2);
+    public static final DateTimeFormatter dateFormatter3 = DateTimeFormatter.ofPattern(PATTERN_TO_DAYS3);
 
     public static final DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern(PATTERN_TO_SECONDS);
     public static final DateTimeFormatter dateTimeFormatter3 = DateTimeFormatter.ofPattern(PATTERN_TO_MINUTES);

+ 17 - 0
src/main/resources/static/crawler.html

@@ -237,6 +237,23 @@
             <input type="submit" value="提交">
         </form>
     </div>
+    <hr/>
+    <div style="margin-right:20px;">
+        <span class="font">jsoupLoveFoot</span>
+        <form method="post" action="bg/crawler/jsoupLoveFoot">
+            <span>status</span>
+            <select name="status" style="height: 21.43px;">
+                <option value="2">获取关键词失败</option>
+                <option value="4">爬取失败</option>
+                <option value="">从0开始</option>
+            </select>
+            <span>isDel</span>
+            <input type="text" name="isDel" placeholder="1:是,2:否。默认否"/>
+            <span>ignoreRetryCount</span>
+            <input type="text" name="ignoreRetryCount" placeholder="1:是,2:否。默认否"/>
+            <input type="submit" value="提交">
+        </form>
+    </div>
 </div>
 </body>
 </html>