|
@@ -4,19 +4,28 @@ import com.alibaba.fastjson.JSONObject;
|
|
|
import com.github.pagehelper.PageInfo;
|
|
import com.github.pagehelper.PageInfo;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import net.coobird.thumbnailator.Thumbnails;
|
|
import net.coobird.thumbnailator.Thumbnails;
|
|
|
|
|
+import org.jsoup.Connection;
|
|
|
|
|
+import org.jsoup.Jsoup;
|
|
|
|
|
+import org.jsoup.nodes.Document;
|
|
|
|
|
+import org.jsoup.nodes.Element;
|
|
|
|
|
+import org.jsoup.select.Elements;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
|
|
+import org.springframework.scheduling.annotation.Async;
|
|
|
import org.springframework.stereotype.Service;
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
+import org.springframework.transaction.annotation.Propagation;
|
|
|
|
|
+import org.springframework.transaction.annotation.Transactional;
|
|
|
|
|
+import org.springframework.util.StopWatch;
|
|
|
import org.springframework.web.multipart.MultipartFile;
|
|
import org.springframework.web.multipart.MultipartFile;
|
|
|
import top.lvzhiqiang.config.InitRunner;
|
|
import top.lvzhiqiang.config.InitRunner;
|
|
|
import top.lvzhiqiang.dto.R;
|
|
import top.lvzhiqiang.dto.R;
|
|
|
|
|
+import top.lvzhiqiang.entity.FileCrawlerImage;
|
|
|
|
|
+import top.lvzhiqiang.entity.FileCrawlerImageLog;
|
|
|
import top.lvzhiqiang.entity.FileImage;
|
|
import top.lvzhiqiang.entity.FileImage;
|
|
|
import top.lvzhiqiang.enumeration.ResultCodeEnum;
|
|
import top.lvzhiqiang.enumeration.ResultCodeEnum;
|
|
|
import top.lvzhiqiang.exception.BusinessException;
|
|
import top.lvzhiqiang.exception.BusinessException;
|
|
|
import top.lvzhiqiang.mapper.PictureInfoMapper;
|
|
import top.lvzhiqiang.mapper.PictureInfoMapper;
|
|
|
import top.lvzhiqiang.service.PictureInfoService;
|
|
import top.lvzhiqiang.service.PictureInfoService;
|
|
|
-import top.lvzhiqiang.util.DateUtils;
|
|
|
|
|
-import top.lvzhiqiang.util.FtpUtil;
|
|
|
|
|
-import top.lvzhiqiang.util.StringUtils;
|
|
|
|
|
|
|
+import top.lvzhiqiang.util.*;
|
|
|
|
|
|
|
|
import javax.annotation.Resource;
|
|
import javax.annotation.Resource;
|
|
|
import javax.imageio.ImageIO;
|
|
import javax.imageio.ImageIO;
|
|
@@ -27,8 +36,11 @@ import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
import java.io.InputStream;
|
|
|
import java.math.BigDecimal;
|
|
import java.math.BigDecimal;
|
|
|
import java.math.RoundingMode;
|
|
import java.math.RoundingMode;
|
|
|
|
|
+import java.net.Proxy;
|
|
|
|
|
+import java.net.URLDecoder;
|
|
|
import java.time.LocalDate;
|
|
import java.time.LocalDate;
|
|
|
import java.util.ArrayList;
|
|
import java.util.ArrayList;
|
|
|
|
|
+import java.util.HashMap;
|
|
|
import java.util.List;
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
import java.util.Map;
|
|
|
|
|
|
|
@@ -46,6 +58,7 @@ public class PictureInfoServiceImpl extends BaseServiceImpl<Object> implements P
|
|
|
private PictureInfoMapper pictureInfoMapper;
|
|
private PictureInfoMapper pictureInfoMapper;
|
|
|
@Value("${spring.profiles.active}")
|
|
@Value("${spring.profiles.active}")
|
|
|
private String env;
|
|
private String env;
|
|
|
|
|
+ private final String parentPath = LocalDate.now().format(DateUtils.dateFormatter5);
|
|
|
|
|
|
|
|
@Override
|
|
@Override
|
|
|
public PageInfo<FileImage> getPictureInfoPage(Map<String, Object> params) {
|
|
public PageInfo<FileImage> getPictureInfoPage(Map<String, Object> params) {
|
|
@@ -207,4 +220,137 @@ public class PictureInfoServiceImpl extends BaseServiceImpl<Object> implements P
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ @Override
|
|
|
|
|
+ @Async
|
|
|
|
|
+ public void jsoupFulibaPic(Object o, Object o1) throws Exception {
|
|
|
|
|
+ log.warn("jsoupFulibaPic 开始:");
|
|
|
|
|
+ StopWatch stopWatch = new StopWatch();
|
|
|
|
|
+ stopWatch.start();
|
|
|
|
|
+
|
|
|
|
|
+ FileCrawlerImage latestFileCrawlerImage = pictureInfoMapper.findLatestCrawlerImage(1);
|
|
|
|
|
+ LocalDate latestDate;
|
|
|
|
|
+ if (latestFileCrawlerImage == null) {
|
|
|
|
|
+ latestDate = LocalDate.of(1970, 1, 1);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ latestDate = latestFileCrawlerImage.getPublishTime();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ String crawlerFulibaUrl = InitRunner.dicCodeMap.get("crawler_fuliba_url").getCodeValue();
|
|
|
|
|
+ Map<String, String> headerMap = new HashMap<>();
|
|
|
|
|
+ headerMap.put("referer", crawlerFulibaUrl);
|
|
|
|
|
+ Document fulibaDocument;
|
|
|
|
|
+
|
|
|
|
|
+ outer:
|
|
|
|
|
+ while (true) {
|
|
|
|
|
+ fulibaDocument = JsoupUtil.requestDocument(crawlerFulibaUrl, JsoupUtil.HTTP_GET, Proxy.NO_PROXY, null, headerMap, null);
|
|
|
|
|
+ log.warn("jsoupFulibaPic page success:url={}", crawlerFulibaUrl);
|
|
|
|
|
+
|
|
|
|
|
+ Elements sourceSelects = fulibaDocument.select(".content").select("article.excerpt");
|
|
|
|
|
+ for (Element sourceSelect : sourceSelects) {
|
|
|
|
|
+ Thread.sleep(2000L);
|
|
|
|
|
+ String mainUrl = sourceSelect.select("header").select("a").attr("abs:href");
|
|
|
|
|
+ mainUrl = URLDecoder.decode(mainUrl, "UTF-8");
|
|
|
|
|
+
|
|
|
|
|
+ String publishTimeStr = sourceSelect.select("div.meta").select("time").text();
|
|
|
|
|
+ LocalDate publishTime = LocalDate.parse(publishTimeStr, DateUtils.dateFormatter);
|
|
|
|
|
+ if (publishTime.isBefore(latestDate) || publishTime.isEqual(latestDate)) {
|
|
|
|
|
+ break outer;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ FileCrawlerImageLog crawlerImageLog = new FileCrawlerImageLog();
|
|
|
|
|
+ crawlerImageLog.setId(UUIDUtils.getUUID());
|
|
|
|
|
+ crawlerImageLog.setMainUrl(mainUrl);
|
|
|
|
|
+ crawlerImageLog.setStatus(1);
|
|
|
|
|
+ try {
|
|
|
|
|
+ String mainTitle = SpringUtils.getBean(PictureInfoServiceImpl.class).jsoupFulibaPicSub(mainUrl, headerMap, publishTime, crawlerImageLog.getId());
|
|
|
|
|
+ crawlerImageLog.setMainTitle(mainTitle);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ crawlerImageLog.setFailureCause(e.getMessage());
|
|
|
|
|
+ crawlerImageLog.setStatus(2);
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ pictureInfoMapper.insertOrUpdateFileCrawlerImageLog(crawlerImageLog);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 继续下一页
|
|
|
|
|
+ Elements nextSelects = fulibaDocument.select("div.pagination > ul").select("li.next-page");
|
|
|
|
|
+ if (!nextSelects.isEmpty()) {
|
|
|
|
|
+ crawlerFulibaUrl = nextSelects.get(0).select("a").attr("abs:href");
|
|
|
|
|
+ } else {
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ log.warn("jsoupFulibaPic 结束:time={}", stopWatch.getTotalTimeSeconds());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @Override
|
|
|
|
|
+ @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
|
|
+ public String jsoupFulibaPicSub(String mainUrl, Map<String, String> headerMap, LocalDate publishTime, String logId) {
|
|
|
|
|
+ String newName;
|
|
|
|
|
+ String imageUrl;
|
|
|
|
|
+ String imageSize;
|
|
|
|
|
+ String mainTitle;
|
|
|
|
|
+ Document fulibaDetailDocument;
|
|
|
|
|
+ String ftpImageCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_image_crawler_basepath").getCodeValue();
|
|
|
|
|
+ String ftpThumbnailCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_crawler_basepath").getCodeValue();
|
|
|
|
|
+ List<String> delPathList = new ArrayList<>();
|
|
|
|
|
+ String srcUrl = "";
|
|
|
|
|
+ try {
|
|
|
|
|
+ fulibaDetailDocument = JsoupUtil.requestDocument(mainUrl, JsoupUtil.HTTP_GET, Proxy.NO_PROXY, null, headerMap, null);
|
|
|
|
|
+ log.warn("jsoupFulibaPicSub detail success:url={},logId={}", mainUrl, logId);
|
|
|
|
|
+ Elements imgEles = fulibaDetailDocument.select("div.content > article.article-content").select("img");
|
|
|
|
|
+ mainTitle = fulibaDetailDocument.select("div.content > header.article-header > h1.article-title").select("a").text();
|
|
|
|
|
+ for (Element imgEle : imgEles) {
|
|
|
|
|
+
|
|
|
|
|
+ srcUrl = imgEle.attr("src");
|
|
|
|
|
+ String altTitle = imgEle.attr("alt");
|
|
|
|
|
+
|
|
|
|
|
+ newName = FtpUtil.genImageName();
|
|
|
|
|
+ String prefx = srcUrl.substring(srcUrl.lastIndexOf("."));
|
|
|
|
|
+ newName = newName + prefx;
|
|
|
|
|
+
|
|
|
|
|
+ Connection.Response response = Jsoup.connect(srcUrl).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
|
|
+
|
|
|
|
|
+ FileCrawlerImage fileCrawlerImage = new FileCrawlerImage();
|
|
|
|
|
+ fileCrawlerImage.setOldName(altTitle);
|
|
|
|
|
+ fileCrawlerImage.setNewName(newName);
|
|
|
|
|
+
|
|
|
|
|
+ byte[] imageBytes = response.bodyAsBytes();
|
|
|
|
|
+ imageSize = BigDecimal.valueOf(imageBytes.length).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB");
|
|
|
|
|
+ fileCrawlerImage.setSize(imageSize);
|
|
|
|
|
+
|
|
|
|
|
+ imageUrl = parentPath + "/" + newName;
|
|
|
|
|
+ fileCrawlerImage.setPath(imageUrl);
|
|
|
|
|
+
|
|
|
|
|
+ fileCrawlerImage.setRemark("");
|
|
|
|
|
+ fileCrawlerImage.setCategoryId(1L);
|
|
|
|
|
+ fileCrawlerImage.setOrginUrl(srcUrl);
|
|
|
|
|
+ fileCrawlerImage.setLogId(logId);
|
|
|
|
|
+ fileCrawlerImage.setPublishTime(publishTime);
|
|
|
|
|
+ int count = pictureInfoMapper.insertIgnoreFileImage(fileCrawlerImage);
|
|
|
|
|
+ if (count > 0) {
|
|
|
|
|
+ InputStream imageStream1 = new ByteArrayInputStream(imageBytes);
|
|
|
|
|
+ FtpUtil.uploadFile(ftpImageCrawlerBasePath, parentPath, newName, imageStream1);
|
|
|
|
|
+ delPathList.add(ftpImageCrawlerBasePath + imageUrl);
|
|
|
|
|
+
|
|
|
|
|
+ InputStream imageStream2 = new ByteArrayInputStream(imageBytes);
|
|
|
|
|
+ ByteArrayOutputStream thumbnailOutputStream = new ByteArrayOutputStream();
|
|
|
|
|
+ Thumbnails.of(imageStream2).size(300, 200).toOutputStream(thumbnailOutputStream);
|
|
|
|
|
+ ByteArrayInputStream thumbnailInputStream = new ByteArrayInputStream(thumbnailOutputStream.toByteArray());
|
|
|
|
|
+ FtpUtil.uploadFile(ftpThumbnailCrawlerBasePath, parentPath, newName, thumbnailInputStream);
|
|
|
|
|
+ delPathList.add(ftpThumbnailCrawlerBasePath + imageUrl);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ // 异常,删除已经上传的文件
|
|
|
|
|
+ if (delPathList.size() > 0) {
|
|
|
|
|
+ delPathList.stream().forEach(delPath -> FtpUtil.delFile(delPath));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ log.error("jsoupFulibaPicSub exception,mainUrl={},publishTime={},srcUrl={}", mainUrl, publishTime, srcUrl, e);
|
|
|
|
|
+ throw new BusinessException(30000, e.getMessage());
|
|
|
|
|
+ }
|
|
|
|
|
+ return mainTitle;
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|