|
|
@@ -30,10 +30,7 @@ import top.lvzhiqiang.util.*;
|
|
|
import javax.annotation.Resource;
|
|
|
import javax.imageio.ImageIO;
|
|
|
import java.awt.image.BufferedImage;
|
|
|
-import java.io.ByteArrayInputStream;
|
|
|
-import java.io.ByteArrayOutputStream;
|
|
|
-import java.io.IOException;
|
|
|
-import java.io.InputStream;
|
|
|
+import java.io.*;
|
|
|
import java.math.BigDecimal;
|
|
|
import java.math.RoundingMode;
|
|
|
import java.net.Proxy;
|
|
|
@@ -58,7 +55,6 @@ public class PictureInfoServiceImpl extends BaseServiceImpl<Object> implements P
|
|
|
private PictureInfoMapper pictureInfoMapper;
|
|
|
@Value("${spring.profiles.active}")
|
|
|
private String env;
|
|
|
- private final String parentPath = LocalDate.now().format(DateUtils.dateFormatter5);
|
|
|
|
|
|
@Override
|
|
|
public Object getPictureInfoPage(Map<String, Object> params) {
|
|
|
@@ -331,23 +327,63 @@ public class PictureInfoServiceImpl extends BaseServiceImpl<Object> implements P
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
- @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
- public String jsoupFulibaPicSub(String mainUrl, Map<String, String> headerMap, LocalDate publishTime, String logId) {
|
|
|
- /*try {
|
|
|
- String mainTitle = SpringUtils.getBean(PictureInfoServiceImpl.class).jsoupFulibaPicSub(mainUrl, headerMap, publishTime, crawlerImageLog.getId());
|
|
|
- crawlerImageLog.setMainTitle(mainTitle);
|
|
|
- } catch (Exception e) {
|
|
|
- crawlerImageLog.setFailureCause(e.getMessage());
|
|
|
- crawlerImageLog.setStatus(2);
|
|
|
- } finally {
|
|
|
+ @Async
|
|
|
+ public void jsoupFulibaPicDetail(Integer status, String mainUrl, String id) {
|
|
|
+ log.warn("jsoupFulibaPicDetail 开始:status={},mainUrl={},id={}", status, mainUrl, id);
|
|
|
+
|
|
|
+ StopWatch stopWatch = new StopWatch();
|
|
|
+ stopWatch.start();
|
|
|
+
|
|
|
+ Map<String, Object> params = new HashMap<>();
|
|
|
+ params.put("categoryId", 1);
|
|
|
+ if (StringUtils.isNotEmpty(id)) {
|
|
|
+ params.put("id", id);
|
|
|
+ } else if (StringUtils.isNotEmpty(mainUrl)) {
|
|
|
+ params.put("mainUrl", mainUrl);
|
|
|
+ } else if (status != null) {
|
|
|
+ params.put("status", status);
|
|
|
+ } else {
|
|
|
+ throw new BusinessException(30000, "参数错误!");
|
|
|
+ }
|
|
|
+
|
|
|
+ List<FileCrawlerImageLog> fileCrawlerImageLogList = pictureInfoMapper.findJsoupFulibaPicDetailListByParams(params);
|
|
|
+ if (fileCrawlerImageLogList.isEmpty()) {
|
|
|
+ log.warn("jsoupFulibaPicDetail 结束:fileCrawlerImageLogList is empty");
|
|
|
+ return;
|
|
|
+ }
|
|
|
|
|
|
- }*/
|
|
|
+ String crawlerFulibaUrl = InitRunner.dicCodeMap.get("crawler_fuliba_url").getCodeValue();
|
|
|
+ Map<String, String> headerMap = new HashMap<>();
|
|
|
+ headerMap.put("referer", crawlerFulibaUrl);
|
|
|
+ int successCount = 0;
|
|
|
+ int failCount = 0;
|
|
|
+ for (FileCrawlerImageLog fileCrawlerImageLog : fileCrawlerImageLogList) {
|
|
|
+ try {
|
|
|
+ Thread.sleep(5000L);
|
|
|
+
|
|
|
+ SpringUtils.getBean(PictureInfoServiceImpl.class).jsoupFulibaPicDetailSub(fileCrawlerImageLog.getMainUrl(), headerMap, fileCrawlerImageLog.getPublishTime(), fileCrawlerImageLog.getId());
|
|
|
+ fileCrawlerImageLog.setStatus(1);
|
|
|
+ successCount++;
|
|
|
+ } catch (Exception e) {
|
|
|
+ fileCrawlerImageLog.setFailureCause(e.getMessage());
|
|
|
+ fileCrawlerImageLog.setStatus(2);
|
|
|
+ failCount++;
|
|
|
+ } finally {
|
|
|
+ pictureInfoMapper.insertOrUpdateFileCrawlerImageLog(fileCrawlerImageLog);
|
|
|
+ log.warn("jsoupFulibaPicDetail update status:mainUrl={},status={}", mainUrl, fileCrawlerImageLog.getStatus());
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
+ stopWatch.stop();
|
|
|
+ log.warn("jsoupFulibaPicDetail 结束:totalSize={},successCount={},failCount={},time={}", fileCrawlerImageLogList.size(), successCount, failCount, stopWatch.getTotalTimeMillis());
|
|
|
+ }
|
|
|
|
|
|
+ @Override
|
|
|
+ @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
+ public String jsoupFulibaPicDetailSub(String mainUrl, Map<String, String> headerMap, LocalDate publishTime, String logId) {
|
|
|
String newName;
|
|
|
String imageUrl;
|
|
|
String imageSize;
|
|
|
- String mainTitle;
|
|
|
Document fulibaDetailDocument;
|
|
|
String ftpImageCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_image_crawler_basepath").getCodeValue();
|
|
|
String ftpThumbnailCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_crawler_basepath").getCodeValue();
|
|
|
@@ -355,11 +391,11 @@ public class PictureInfoServiceImpl extends BaseServiceImpl<Object> implements P
|
|
|
String srcUrl = "";
|
|
|
try {
|
|
|
fulibaDetailDocument = JsoupUtil.requestDocument(mainUrl, JsoupUtil.HTTP_GET, Proxy.NO_PROXY, null, headerMap, null);
|
|
|
- log.warn("jsoupFulibaPicSub detail success:url={},logId={}", mainUrl, logId);
|
|
|
+ log.warn("jsoupFulibaPicDetailSub start:mainUrl={},logId={}", mainUrl, logId);
|
|
|
Elements imgEles = fulibaDetailDocument.select("div.content > article.article-content").select("img");
|
|
|
- mainTitle = fulibaDetailDocument.select("div.content > header.article-header > h1.article-title").select("a").text();
|
|
|
+ int i = 0;
|
|
|
+ String parentPath = "1" + File.separator + publishTime.format(DateUtils.dateFormatter5);
|
|
|
for (Element imgEle : imgEles) {
|
|
|
-
|
|
|
srcUrl = imgEle.attr("src");
|
|
|
String altTitle = imgEle.attr("alt");
|
|
|
|
|
|
@@ -377,14 +413,14 @@ public class PictureInfoServiceImpl extends BaseServiceImpl<Object> implements P
|
|
|
imageSize = BigDecimal.valueOf(imageBytes.length).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB");
|
|
|
fileCrawlerImage.setSize(imageSize);
|
|
|
|
|
|
- imageUrl = parentPath + "/" + newName;
|
|
|
+ imageUrl = parentPath + File.separator + newName;
|
|
|
fileCrawlerImage.setPath(imageUrl);
|
|
|
|
|
|
fileCrawlerImage.setRemark("");
|
|
|
fileCrawlerImage.setCategoryId(1L);
|
|
|
fileCrawlerImage.setOrginUrl(srcUrl);
|
|
|
fileCrawlerImage.setLogId(logId);
|
|
|
- fileCrawlerImage.setPublishTime(publishTime);
|
|
|
+ fileCrawlerImage.setSort(++i);
|
|
|
int count = pictureInfoMapper.insertIgnoreFileImage(fileCrawlerImage);
|
|
|
if (count > 0) {
|
|
|
InputStream imageStream1 = new ByteArrayInputStream(imageBytes);
|
|
|
@@ -401,13 +437,13 @@ public class PictureInfoServiceImpl extends BaseServiceImpl<Object> implements P
|
|
|
}
|
|
|
} catch (Exception e) {
|
|
|
// 异常,删除已经上传的文件
|
|
|
- if (delPathList.size() > 0) {
|
|
|
- delPathList.stream().forEach(delPath -> FtpUtil.delFile(delPath));
|
|
|
+ if (!delPathList.isEmpty()) {
|
|
|
+ delPathList.forEach(FtpUtil::delFile);
|
|
|
}
|
|
|
|
|
|
- log.error("jsoupFulibaPicSub exception,mainUrl={},publishTime={},srcUrl={}", mainUrl, publishTime, srcUrl, e);
|
|
|
+ log.error("jsoupFulibaPicDetailSub exception,mainUrl={},publishTime={},srcUrl={}", mainUrl, publishTime, srcUrl, e);
|
|
|
throw new BusinessException(30000, e.getMessage());
|
|
|
}
|
|
|
- return mainTitle;
|
|
|
+ return "";
|
|
|
}
|
|
|
}
|