| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561 |
- package top.lvzhiqiang.service.impl;
- import com.alibaba.fastjson.JSONObject;
- import com.github.pagehelper.PageHelper;
- import com.github.pagehelper.PageInfo;
- import lombok.extern.slf4j.Slf4j;
- import net.coobird.thumbnailator.Thumbnails;
- import net.coobird.thumbnailator.tasks.UnsupportedFormatException;
- import org.jsoup.Connection;
- import org.jsoup.HttpStatusException;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- import org.springframework.beans.factory.annotation.Value;
- import org.springframework.scheduling.annotation.Async;
- import org.springframework.stereotype.Service;
- import org.springframework.transaction.annotation.Propagation;
- import org.springframework.transaction.annotation.Transactional;
- import org.springframework.util.ObjectUtils;
- import org.springframework.util.StopWatch;
- import org.springframework.web.multipart.MultipartFile;
- import top.lvzhiqiang.config.InitRunner;
- import top.lvzhiqiang.dto.R;
- import top.lvzhiqiang.entity.FileCrawlerImage;
- import top.lvzhiqiang.entity.FileCrawlerImageLog;
- import top.lvzhiqiang.entity.FileImage;
- import top.lvzhiqiang.enumeration.ResultCodeEnum;
- import top.lvzhiqiang.exception.BusinessException;
- import top.lvzhiqiang.mapper.PictureInfoMapper;
- import top.lvzhiqiang.service.PictureInfoService;
- import top.lvzhiqiang.util.*;
- import javax.annotation.Resource;
- import javax.imageio.ImageIO;
- import java.awt.image.BufferedImage;
- import java.io.*;
- import java.math.BigDecimal;
- import java.math.RoundingMode;
- import java.net.Proxy;
- import java.net.SocketTimeoutException;
- import java.net.URLDecoder;
- import java.time.LocalDate;
- import java.time.LocalDateTime;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- /**
- * 照片信息ServiceImpl
- *
- * @author lvzhiqiang
- * 2024/8/26 11:02
- */
- @Service
- @Slf4j
- public class PictureInfoServiceImpl extends BaseServiceImpl<Object> implements PictureInfoService {
- @Resource
- private PictureInfoMapper pictureInfoMapper;
- @Value("${spring.profiles.active}")
- private String env;
- @Override
- public Object getPictureInfoPage(Map<String, Object> params) {
- Object bigType = params.get("bigType");
- // 转换成like
- paramsToLike(params, "keyword");
- // 分页
- paramsToPagination(params);
- if ("上传".equals(bigType)) {
- // 排序
- paramsToSort(params);
- List<FileImage> pictureInfoList = pictureInfoMapper.getUploadImageInfoList(params);
- String bpicsUrl = InitRunner.dicCodeMap.get("bpics_url").getCodeValue();
- String ftpBasePath = InitRunner.dicCodeMap.get("ftp_basepath").getCodeValue();
- String ftpThumbnailBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_basepath").getCodeValue();
- pictureInfoList.stream().forEach(e -> {
- String path = e.getPath();
- e.setPath(bpicsUrl + ftpBasePath + path);
- e.setThumbnailPath(bpicsUrl + ftpThumbnailBasePath + path);
- });
- return new PageInfo<>(pictureInfoList);
- } else if ("爬虫".equals(bigType)) {
- // 排序
- if (params.containsKey(ORDER_FIELD) && params.containsKey(ORDER)
- && !ObjectUtils.isEmpty(params.get(ORDER_FIELD)) && !ObjectUtils.isEmpty(params.get(ORDER))) {
- PageHelper.orderBy(params.get(ORDER_FIELD) + " " + params.get(ORDER) + ",fi.sort asc");
- }
- List<FileCrawlerImage> crawlerImageList = pictureInfoMapper.getCrawlerImageInfoList(params);
- String bpicsUrl = InitRunner.dicCodeMap.get("bpics_url").getCodeValue();
- String ftpImageCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_image_crawler_basepath").getCodeValue();
- String ftpThumbnailCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_crawler_basepath").getCodeValue();
- crawlerImageList.stream().forEach(e -> {
- String path = e.getPath();
- e.setPath(bpicsUrl + ftpImageCrawlerBasePath + path);
- e.setThumbnailPath(bpicsUrl + ftpThumbnailCrawlerBasePath + path);
- e.setRemark(StringUtils.isEmpty(e.getRemark()) ? e.getOldName() : e.getRemark());
- });
- return new PageInfo<>(crawlerImageList);
- } else {
- throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "不支持的类型!");
- }
- }
- @Override
- public R insertOrUpdateImg(MultipartFile file, String remark, String createDate, Long categoryId, String id) {
- String imageUrl = "";
- String imageSize = "";
- String ftpBasePath = InitRunner.dicCodeMap.get("ftp_basepath").getCodeValue();
- String ftpThumbnailBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_basepath").getCodeValue();
- String ftpBaseUrl = InitRunner.dicCodeMap.get("ftp_baseurl").getCodeValue();
- List<String> delPathList = new ArrayList<>();
- if (StringUtils.isEmpty(id)) {
- String parentPath;
- LocalDateTime createTime = LocalDateTime.now();
- if (StringUtils.isNotEmpty(createDate)) {
- createTime = LocalDateTime.parse(createDate, DateUtils.dateTimeFormatter);
- parentPath = createTime.format(DateUtils.dateFormatter6);
- } else {
- parentPath = LocalDate.now().format(DateUtils.dateFormatter6);
- }
- try {
- // 1、给上传的图片生成新的文件名
- // 1.1获取原始文件名
- String oldName = file.getOriginalFilename();
- // 1.2使用FtpUtil工具类生成新的文件名,新文件名 = newName + 文件后缀
- String newName = FtpUtil.genImageName();
- newName = newName + oldName.substring(oldName.lastIndexOf("."));
- // 2、把图片上传到图片服务器
- // 2.1获取上传的io流
- InputStream input = file.getInputStream();
- // 2.2调用FtpUtil工具类进行上传
- boolean result = FtpUtil.uploadFile(ftpBasePath, parentPath, newName, input);
- delPathList.add(ftpBasePath + parentPath + "/" + newName);
- // 2.3缩略图
- BufferedImage originalImage = ImageIO.read(file.getInputStream());
- ByteArrayOutputStream thumbnailOutputStream = new ByteArrayOutputStream();
- Thumbnails.of(originalImage).size(300, 200).outputFormat(oldName.substring(oldName.lastIndexOf(".") + 1)).toOutputStream(thumbnailOutputStream);
- ByteArrayInputStream thumbnailInputStream = new ByteArrayInputStream(thumbnailOutputStream.toByteArray());
- FtpUtil.uploadFile(ftpThumbnailBasePath, parentPath, newName, thumbnailInputStream);
- delPathList.add(ftpThumbnailBasePath + parentPath + "/" + newName);
- thumbnailOutputStream.close();
- thumbnailInputStream.close();
- if (result) {
- //返回给前端图片访问路径
- imageUrl = parentPath + "/" + newName;
- imageSize = BigDecimal.valueOf(file.getSize()).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB");
- FileImage fileImage = new FileImage();
- fileImage.setOldName(oldName);
- fileImage.setNewName(newName);
- fileImage.setSize(imageSize);
- fileImage.setPath(imageUrl);
- fileImage.setRemark(remark);
- fileImage.setCategoryId(categoryId);
- fileImage.setCreateTime(createTime);
- pictureInfoMapper.insertFileImage(fileImage);
- }
- } catch (Exception e) {
- // 异常,删除已经上传的文件
- if (!delPathList.isEmpty()) {
- delPathList.forEach(FtpUtil::delFile);
- }
- log.error("insertOrUpdateImg Exception,", e);
- throw new BusinessException(30000, e.getMessage());
- }
- JSONObject result = new JSONObject();
- result.put("imageUrl", ftpBaseUrl + ftpBasePath + imageUrl);
- result.put("imageSize", imageSize);
- return R.ok().data(result);
- } else {
- FileImage fileImage = pictureInfoMapper.findFileImageById(Long.valueOf(id));
- if (fileImage == null) {
- throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "ID 不存在!");
- }
- fileImage.setCategoryId(categoryId);
- if (StringUtils.isNotEmpty(remark)) {
- fileImage.setRemark(remark);
- }
- String parentPath;
- if (StringUtils.isNotEmpty(createDate)) {
- LocalDateTime createTime = LocalDateTime.parse(createDate, DateUtils.dateTimeFormatter);
- parentPath = createTime.format(DateUtils.dateFormatter6);
- fileImage.setCreateTime(createTime);
- } else {
- parentPath = fileImage.getCreateTime().format(DateUtils.dateFormatter6);
- }
- if (file != null && file.getSize() > 0) {
- try {
- // 1、给上传的图片生成新的文件名
- // 1.1获取原始文件名
- String oldName = file.getOriginalFilename();
- String newName = FtpUtil.genImageName();
- // 1.2使用FtpUtil工具类生成新的文件名,新文件名 = newName + 文件后缀
- newName = newName + oldName.substring(oldName.lastIndexOf("."));
- // 2、把图片上传到图片服务器
- // 2.1获取上传的io流
- InputStream input = file.getInputStream();
- // 2.2调用FtpUtil工具类进行上传
- boolean result1 = FtpUtil.uploadFile(ftpBasePath, parentPath, newName, input);
- delPathList.add(ftpBasePath + parentPath + "/" + newName);
- // 2.3缩略图
- BufferedImage originalImage = ImageIO.read(file.getInputStream());
- ByteArrayOutputStream thumbnailOutputStream = new ByteArrayOutputStream();
- Thumbnails.of(originalImage).size(300, 200).outputFormat(oldName.substring(oldName.lastIndexOf(".") + 1)).toOutputStream(thumbnailOutputStream);
- ByteArrayInputStream thumbnailInputStream = new ByteArrayInputStream(thumbnailOutputStream.toByteArray());
- boolean result2 = FtpUtil.uploadFile(ftpThumbnailBasePath, parentPath, newName, thumbnailInputStream);
- delPathList.add(ftpThumbnailBasePath + parentPath + "/" + newName);
- thumbnailOutputStream.close();
- thumbnailInputStream.close();
- if (result2 && result2) {
- String oriPath = fileImage.getPath();
- imageUrl = parentPath + "/" + newName;
- imageSize = BigDecimal.valueOf(file.getSize()).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB");
- fileImage.setOldName(oldName);
- fileImage.setNewName(newName);
- fileImage.setSize(imageSize);
- fileImage.setPath(imageUrl);
- pictureInfoMapper.updateFileImage(fileImage);
- FtpUtil.delFile(ftpBasePath + oriPath);
- FtpUtil.delFile(ftpThumbnailBasePath + oriPath);
- } else {
- delPathList.forEach(FtpUtil::delFile);
- throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "上传新文件失败!");
- }
- } catch (Exception e) {
- // 异常,删除已经上传的文件
- if (!delPathList.isEmpty()) {
- delPathList.forEach(FtpUtil::delFile);
- }
- log.error("insertOrUpdateImg Exception,", e);
- throw new BusinessException(30000, e.getMessage());
- }
- } else {
- pictureInfoMapper.updateFileImage(fileImage);
- }
- return R.ok().data("success");
- }
- }
- @Override
- public R deleteImgs(Long imageId) {
- FileImage fileImage = pictureInfoMapper.findFileImageById(imageId);
- if (fileImage == null) {
- throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "ID 不存在!");
- }
- try {
- String ftpBasePath = InitRunner.dicCodeMap.get("ftp_basepath").getCodeValue();
- String ftpThumbnailBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_basepath").getCodeValue();
- boolean flag = FtpUtil.delFile(ftpBasePath + fileImage.getPath());
- FtpUtil.delFile(ftpThumbnailBasePath + fileImage.getPath());
- if (flag) {
- pictureInfoMapper.deleteFileImageById(imageId);
- return R.ok();
- } else {
- return R.error().message("删除失败");
- }
- } catch (Exception e) {
- e.printStackTrace();
- return R.error().message(e.getMessage());
- }
- }
- @Override
- @Async
- public void jsoupFulibaPic(String startPageUrl, Boolean ignoreTimeCompare) throws Exception {
- log.warn("jsoupFulibaPic 开始:startPageUrl={},ignoreTimeCompare={}", startPageUrl, ignoreTimeCompare);
- StopWatch stopWatch = new StopWatch();
- stopWatch.start();
- FileCrawlerImageLog latestFileCrawlerImageLog = pictureInfoMapper.findLatestCrawlerImage(1);
- LocalDate latestDate;
- if (latestFileCrawlerImageLog == null) {
- latestDate = LocalDate.of(1970, 1, 1);
- } else {
- latestDate = latestFileCrawlerImageLog.getPublishTime();
- }
- String crawlerFulibaUrl = InitRunner.dicCodeMap.get("crawler_fuliba_url").getCodeValue();
- if (StringUtils.isNotEmpty(startPageUrl)) {
- crawlerFulibaUrl = startPageUrl;
- }
- if (ignoreTimeCompare == null) {
- ignoreTimeCompare = false;
- }
- Map<String, String> headerMap = new HashMap<>();
- headerMap.put("referer", crawlerFulibaUrl);
- Document fulibaDocument = null;
- Elements sourceSelects = null;
- int findCount = 0;
- outer:
- while (true) {
- for (int i = 0; i < 10; i++) {
- try {
- fulibaDocument = JsoupUtil.requestDocument(crawlerFulibaUrl, JsoupUtil.HTTP_GET, Proxy.NO_PROXY, null, headerMap, null);
- sourceSelects = fulibaDocument.select(".content").select("article.excerpt");
- if (!sourceSelects.isEmpty()) {
- log.warn("jsoupFulibaPic page success:i={},url={}", i, crawlerFulibaUrl);
- break;
- } else {
- log.warn("jsoupFulibaPic page fail:i={},url={}", i, crawlerFulibaUrl);
- }
- } catch (Exception | Error e) {
- log.warn("jsoupFulibaPic page fail:i={},url={}", i, crawlerFulibaUrl, e);
- } finally {
- Thread.sleep(5000L);
- }
- }
- if (sourceSelects == null || sourceSelects.isEmpty()) {
- log.warn("jsoupFulibaPic page empty break:url={}", crawlerFulibaUrl);
- break;
- }
- sourceSelects = fulibaDocument.select(".content").select("article.excerpt");
- for (Element sourceSelect : sourceSelects) {
- String mainUrl = sourceSelect.select("header").select("a").attr("abs:href");
- mainUrl = URLDecoder.decode(mainUrl, "UTF-8");
- String mainTitle = sourceSelect.select("header").select("a").attr("title");
- mainTitle = mainTitle.replace("-福利吧", "");
- String publishTimeStr = sourceSelect.select("div.meta").select("time").text();
- LocalDate publishTime = LocalDate.parse(publishTimeStr, DateUtils.dateFormatter);
- if (!ignoreTimeCompare && (publishTime.isBefore(latestDate) || publishTime.isEqual(latestDate))) {
- log.warn("jsoupFulibaPic page publishTime isbefore latestDate break:mainUrl={},mainTitle={},publishTime={},latestDate={}", mainUrl, mainTitle, publishTimeStr, latestDate.format(DateUtils.dateFormatter));
- break outer;
- }
- FileCrawlerImageLog crawlerImageLog = new FileCrawlerImageLog();
- crawlerImageLog.setId(UUIDUtils.getUUID());
- crawlerImageLog.setMainUrl(mainUrl);
- crawlerImageLog.setMainTitle(mainTitle);
- crawlerImageLog.setCategoryId(1L);
- crawlerImageLog.setStatus(3);
- crawlerImageLog.setPublishTime(publishTime);
- int count = pictureInfoMapper.insertIgnoreFileCrawlerImageLog(crawlerImageLog);
- findCount += count;
- log.warn("jsoupFulibaPic item success:publishTime={},mainTitle={}", publishTime, mainTitle);
- }
- // 继续下一页
- Elements nextSelects = fulibaDocument.select("div.pagination > ul").select("li.next-page").select("a");
- if (!nextSelects.isEmpty()) {
- crawlerFulibaUrl = nextSelects.get(0).attr("abs:href");
- if (StringUtils.isEmpty(crawlerFulibaUrl)) {
- break;
- }
- } else {
- break;
- }
- }
- stopWatch.stop();
- log.warn("jsoupFulibaPic 结束:findCount={},time={}", findCount, stopWatch.getTotalTimeMillis());
- }
- @Override
- @Async
- public void jsoupFulibaPicDetail(Integer status, String mainUrl, String id) {
- log.warn("jsoupFulibaPicDetail 开始:status={},mainUrl={},id={}", status, mainUrl, id);
- StopWatch stopWatch = new StopWatch();
- stopWatch.start();
- Map<String, Object> params = new HashMap<>();
- params.put("categoryId", 1);
- if (StringUtils.isNotEmpty(id)) {
- params.put("id", id);
- } else if (StringUtils.isNotEmpty(mainUrl)) {
- params.put("mainUrl", mainUrl);
- } else if (status != null) {
- params.put("status", status);
- } else {
- throw new BusinessException(30000, "参数错误!");
- }
- List<FileCrawlerImageLog> fileCrawlerImageLogList = pictureInfoMapper.findJsoupFulibaPicDetailListByParams(params);
- if (fileCrawlerImageLogList.isEmpty()) {
- log.warn("jsoupFulibaPicDetail 结束:fileCrawlerImageLogList is empty");
- return;
- }
- String crawlerFulibaUrl = InitRunner.dicCodeMap.get("crawler_fuliba_url").getCodeValue();
- Map<String, String> headerMap = new HashMap<>();
- headerMap.put("referer", crawlerFulibaUrl);
- int successCount = 0;
- int failCount = 0;
- for (FileCrawlerImageLog fileCrawlerImageLog : fileCrawlerImageLogList) {
- try {
- Thread.sleep(5000L);
- SpringUtils.getBean(PictureInfoServiceImpl.class).jsoupFulibaPicDetailSub(fileCrawlerImageLog.getMainUrl(), headerMap, fileCrawlerImageLog.getPublishTime(), fileCrawlerImageLog.getId());
- if (2 == fileCrawlerImageLog.getStatus()) {
- fileCrawlerImageLog.setFailureCause("");
- }
- fileCrawlerImageLog.setStatus(1);
- successCount++;
- } catch (Exception e) {
- fileCrawlerImageLog.setFailureCause(e.getMessage().length() > 200 ? e.getMessage().substring(0, 200) : e.getMessage());
- if (e.getMessage().contains("timeoutCount equal imgEles size")) {
- fileCrawlerImageLog.setStatus(4);
- } else {
- fileCrawlerImageLog.setStatus(2);
- }
- failCount++;
- } finally {
- pictureInfoMapper.insertOrUpdateFileCrawlerImageLog(fileCrawlerImageLog);
- log.warn("jsoupFulibaPicDetail update status:mainUrl={},status={}", fileCrawlerImageLog.getMainUrl(), fileCrawlerImageLog.getStatus());
- }
- }
- stopWatch.stop();
- log.warn("jsoupFulibaPicDetail 结束:totalSize={},successCount={},failCount={},time={}", fileCrawlerImageLogList.size(), successCount, failCount, stopWatch.getTotalTimeMillis());
- }
- @Override
- @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
- public String jsoupFulibaPicDetailSub(String mainUrl, Map<String, String> headerMap, LocalDate publishTime, String logId) {
- String newName;
- String imageUrl;
- String imageSize;
- Document fulibaDetailDocument;
- String ftpImageCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_image_crawler_basepath").getCodeValue();
- String ftpThumbnailCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_crawler_basepath").getCodeValue();
- List<String> delPathList = new ArrayList<>();
- String srcUrl = "";
- List<FileCrawlerImage> fileCrawlerImageList = new ArrayList<>();
- try {
- fulibaDetailDocument = JsoupUtil.requestDocument(mainUrl, JsoupUtil.HTTP_GET, Proxy.NO_PROXY, null, headerMap, null);
- log.warn("jsoupFulibaPicDetailSub start:mainUrl={},publishTime={},logId={}", mainUrl, publishTime, logId);
- Elements imgEles = fulibaDetailDocument.select("div.content > article.article-content").select("img");
- int i = 0;
- String parentPath = "1" + File.separator + publishTime.format(DateUtils.dateFormatter5);
- Connection.Response response;
- int timeoutCount = 0;
- for (Element imgEle : imgEles) {
- srcUrl = imgEle.attr("src");
- String altTitle = imgEle.attr("alt");
- newName = FtpUtil.genImageName();
- String prefx = srcUrl.substring(srcUrl.lastIndexOf("."));
- newName = newName + prefx;
- try {
- response = Jsoup.connect(srcUrl).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
- } catch (SocketTimeoutException ioex) {
- timeoutCount++;
- continue;
- } catch (HttpStatusException ioex) {
- try {
- response = Jsoup.connect(srcUrl).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
- } catch (Exception e) {
- timeoutCount++;
- continue;
- }
- }
- byte[] imageBytes = response.bodyAsBytes();
- if (imageBytes.length == 0) {
- // 过滤掉失效的图片链接
- continue;
- }
- FileCrawlerImage fileCrawlerImage = new FileCrawlerImage();
- fileCrawlerImage.setOldName(altTitle);
- fileCrawlerImage.setNewName(newName);
- imageSize = BigDecimal.valueOf(imageBytes.length).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB");
- fileCrawlerImage.setSize(imageSize);
- imageUrl = parentPath + File.separator + newName;
- fileCrawlerImage.setPath(imageUrl);
- fileCrawlerImage.setRemark("");
- fileCrawlerImage.setCategoryId(1L);
- fileCrawlerImage.setOrginUrl(srcUrl);
- fileCrawlerImage.setLogId(logId);
- if (true) {
- InputStream imageStream2 = new ByteArrayInputStream(imageBytes);
- ByteArrayOutputStream thumbnailOutputStream = new ByteArrayOutputStream();
- try {
- Thumbnails.of(imageStream2).size(300, 200).toOutputStream(thumbnailOutputStream);
- } catch (UnsupportedFormatException unsupportedFormatException) {
- imageStream2.close();
- thumbnailOutputStream.close();
- continue;
- }
- InputStream imageStream1 = new ByteArrayInputStream(imageBytes);
- FtpUtil.uploadFile(ftpImageCrawlerBasePath, parentPath, newName, imageStream1);
- delPathList.add(ftpImageCrawlerBasePath + imageUrl);
- ByteArrayInputStream thumbnailInputStream = new ByteArrayInputStream(thumbnailOutputStream.toByteArray());
- FtpUtil.uploadFile(ftpThumbnailCrawlerBasePath, parentPath, newName, thumbnailInputStream);
- delPathList.add(ftpThumbnailCrawlerBasePath + imageUrl);
- imageStream2.close();
- thumbnailOutputStream.close();
- }
- fileCrawlerImage.setSort(++i);
- fileCrawlerImageList.add(fileCrawlerImage);
- }
- if (!imgEles.isEmpty() && timeoutCount == imgEles.size()) {
- log.warn("jsoupFulibaPicDetailSub timeoutCount is equals imgEles size,mainUrl={},publishTime={},timeoutCount={}", mainUrl, publishTime, timeoutCount);
- throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "timeoutCount equal imgEles size");
- }
- if (!imgEles.isEmpty() && timeoutCount > 4) {
- log.warn("jsoupFulibaPicDetailSub timeoutCount is Too many,mainUrl={},publishTime={},timeoutCount={}", mainUrl, publishTime, timeoutCount);
- throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "timeoutCount is Too many");
- }
- int count = pictureInfoMapper.insertIgnoreFileImageList(fileCrawlerImageList);
- } catch (Exception e) {
- // 异常,删除已经上传的文件
- if (!delPathList.isEmpty()) {
- delPathList.forEach(FtpUtil::delFile);
- }
- log.error("jsoupFulibaPicDetailSub exception,mainUrl={},publishTime={},srcUrl={}", mainUrl, publishTime, srcUrl, e);
- throw new BusinessException(30000, e.getMessage());
- }
- return "";
- }
- public static void main(String[] args) throws IOException {
- String srcUrl = "https://image.baidu.com/search/down?thumburl=https://baidu.com&url=https://tva1.sinaimg.cn/mw690/007Y7SRMly1gmays3w173j30ol16fh8x.jpg";
- Connection.Response response = Jsoup.connect(srcUrl).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
- byte[] imageBytes = response.bodyAsBytes();
- System.out.println(imageBytes.length);
- }
- }
|