package top.lvzhiqiang.service.impl; import com.alibaba.fastjson.JSONObject; import com.github.pagehelper.PageHelper; import com.github.pagehelper.PageInfo; import lombok.extern.slf4j.Slf4j; import net.coobird.thumbnailator.Thumbnails; import net.coobird.thumbnailator.tasks.UnsupportedFormatException; import org.jsoup.Connection; import org.jsoup.HttpStatusException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.beans.factory.annotation.Value; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Propagation; import org.springframework.transaction.annotation.Transactional; import org.springframework.util.ObjectUtils; import org.springframework.util.StopWatch; import org.springframework.web.multipart.MultipartFile; import top.lvzhiqiang.config.InitRunner; import top.lvzhiqiang.dto.R; import top.lvzhiqiang.entity.FileCrawlerImage; import top.lvzhiqiang.entity.FileCrawlerImageLog; import top.lvzhiqiang.entity.FileImage; import top.lvzhiqiang.enumeration.ResultCodeEnum; import top.lvzhiqiang.exception.BusinessException; import top.lvzhiqiang.mapper.PictureInfoMapper; import top.lvzhiqiang.service.PictureInfoService; import top.lvzhiqiang.util.*; import javax.annotation.Resource; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.*; import java.math.BigDecimal; import java.math.RoundingMode; import java.net.Proxy; import java.net.SocketTimeoutException; import java.net.URLDecoder; import java.time.LocalDate; import java.time.LocalDateTime; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * 照片信息ServiceImpl * * @author lvzhiqiang * 2024/8/26 11:02 */ @Service @Slf4j public class PictureInfoServiceImpl extends BaseServiceImpl implements PictureInfoService { @Resource private PictureInfoMapper pictureInfoMapper; @Value("${spring.profiles.active}") private String env; @Override public Object getPictureInfoPage(Map params) { Object bigType = params.get("bigType"); // 转换成like paramsToLike(params, "keyword"); // 分页 paramsToPagination(params); if ("上传".equals(bigType)) { // 排序 paramsToSort(params); List pictureInfoList = pictureInfoMapper.getUploadImageInfoList(params); String bpicsUrl = InitRunner.dicCodeMap.get("bpics_url").getCodeValue(); String ftpBasePath = InitRunner.dicCodeMap.get("ftp_basepath").getCodeValue(); String ftpThumbnailBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_basepath").getCodeValue(); pictureInfoList.stream().forEach(e -> { String path = e.getPath(); e.setPath(bpicsUrl + ftpBasePath + path); e.setThumbnailPath(bpicsUrl + ftpThumbnailBasePath + path); }); return new PageInfo<>(pictureInfoList); } else if ("爬虫".equals(bigType)) { // 排序 if (params.containsKey(ORDER_FIELD) && params.containsKey(ORDER) && !ObjectUtils.isEmpty(params.get(ORDER_FIELD)) && !ObjectUtils.isEmpty(params.get(ORDER))) { PageHelper.orderBy(params.get(ORDER_FIELD) + " " + params.get(ORDER) + ",fi.sort asc"); } List crawlerImageList = pictureInfoMapper.getCrawlerImageInfoList(params); String bpicsUrl = InitRunner.dicCodeMap.get("bpics_url").getCodeValue(); String ftpImageCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_image_crawler_basepath").getCodeValue(); String ftpThumbnailCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_crawler_basepath").getCodeValue(); crawlerImageList.stream().forEach(e -> { String path = e.getPath(); e.setPath(bpicsUrl + ftpImageCrawlerBasePath + path); e.setThumbnailPath(bpicsUrl + ftpThumbnailCrawlerBasePath + path); e.setRemark(StringUtils.isEmpty(e.getRemark()) ? e.getOldName() : e.getRemark()); }); return new PageInfo<>(crawlerImageList); } else { throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "不支持的类型!"); } } @Override public R insertOrUpdateImg(MultipartFile file, String remark, String createDate, Long categoryId, String id) { String imageUrl = ""; String imageSize = ""; String ftpBasePath = InitRunner.dicCodeMap.get("ftp_basepath").getCodeValue(); String ftpThumbnailBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_basepath").getCodeValue(); String ftpBaseUrl = InitRunner.dicCodeMap.get("ftp_baseurl").getCodeValue(); List delPathList = new ArrayList<>(); if (StringUtils.isEmpty(id)) { String parentPath; LocalDateTime createTime = LocalDateTime.now(); if (StringUtils.isNotEmpty(createDate)) { createTime = LocalDateTime.parse(createDate, DateUtils.dateTimeFormatter); parentPath = createTime.format(DateUtils.dateFormatter6); } else { parentPath = LocalDate.now().format(DateUtils.dateFormatter6); } try { // 1、给上传的图片生成新的文件名 // 1.1获取原始文件名 String oldName = file.getOriginalFilename(); // 1.2使用FtpUtil工具类生成新的文件名,新文件名 = newName + 文件后缀 String newName = FtpUtil.genImageName(); newName = newName + oldName.substring(oldName.lastIndexOf(".")); // 2、把图片上传到图片服务器 // 2.1获取上传的io流 InputStream input = file.getInputStream(); // 2.2调用FtpUtil工具类进行上传 boolean result = FtpUtil.uploadFile(ftpBasePath, parentPath, newName, input); delPathList.add(ftpBasePath + parentPath + "/" + newName); // 2.3缩略图 BufferedImage originalImage = ImageIO.read(file.getInputStream()); ByteArrayOutputStream thumbnailOutputStream = new ByteArrayOutputStream(); Thumbnails.of(originalImage).size(300, 200).outputFormat(oldName.substring(oldName.lastIndexOf(".") + 1)).toOutputStream(thumbnailOutputStream); ByteArrayInputStream thumbnailInputStream = new ByteArrayInputStream(thumbnailOutputStream.toByteArray()); FtpUtil.uploadFile(ftpThumbnailBasePath, parentPath, newName, thumbnailInputStream); delPathList.add(ftpThumbnailBasePath + parentPath + "/" + newName); thumbnailOutputStream.close(); thumbnailInputStream.close(); if (result) { //返回给前端图片访问路径 imageUrl = parentPath + "/" + newName; imageSize = BigDecimal.valueOf(file.getSize()).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB"); FileImage fileImage = new FileImage(); fileImage.setOldName(oldName); fileImage.setNewName(newName); fileImage.setSize(imageSize); fileImage.setPath(imageUrl); fileImage.setRemark(remark); fileImage.setCategoryId(categoryId); fileImage.setCreateTime(createTime); pictureInfoMapper.insertFileImage(fileImage); } } catch (Exception e) { // 异常,删除已经上传的文件 if (!delPathList.isEmpty()) { delPathList.forEach(FtpUtil::delFile); } log.error("insertOrUpdateImg Exception,", e); throw new BusinessException(30000, e.getMessage()); } JSONObject result = new JSONObject(); result.put("imageUrl", ftpBaseUrl + ftpBasePath + imageUrl); result.put("imageSize", imageSize); return R.ok().data(result); } else { FileImage fileImage = pictureInfoMapper.findFileImageById(Long.valueOf(id)); if (fileImage == null) { throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "ID 不存在!"); } fileImage.setCategoryId(categoryId); if (StringUtils.isNotEmpty(remark)) { fileImage.setRemark(remark); } String parentPath; if (StringUtils.isNotEmpty(createDate)) { LocalDateTime createTime = LocalDateTime.parse(createDate, DateUtils.dateTimeFormatter); parentPath = createTime.format(DateUtils.dateFormatter6); fileImage.setCreateTime(createTime); } else { parentPath = fileImage.getCreateTime().format(DateUtils.dateFormatter6); } if (file != null && file.getSize() > 0) { try { // 1、给上传的图片生成新的文件名 // 1.1获取原始文件名 String oldName = file.getOriginalFilename(); String newName = FtpUtil.genImageName(); // 1.2使用FtpUtil工具类生成新的文件名,新文件名 = newName + 文件后缀 newName = newName + oldName.substring(oldName.lastIndexOf(".")); // 2、把图片上传到图片服务器 // 2.1获取上传的io流 InputStream input = file.getInputStream(); // 2.2调用FtpUtil工具类进行上传 boolean result1 = FtpUtil.uploadFile(ftpBasePath, parentPath, newName, input); delPathList.add(ftpBasePath + parentPath + "/" + newName); // 2.3缩略图 BufferedImage originalImage = ImageIO.read(file.getInputStream()); ByteArrayOutputStream thumbnailOutputStream = new ByteArrayOutputStream(); Thumbnails.of(originalImage).size(300, 200).outputFormat(oldName.substring(oldName.lastIndexOf(".") + 1)).toOutputStream(thumbnailOutputStream); ByteArrayInputStream thumbnailInputStream = new ByteArrayInputStream(thumbnailOutputStream.toByteArray()); boolean result2 = FtpUtil.uploadFile(ftpThumbnailBasePath, parentPath, newName, thumbnailInputStream); delPathList.add(ftpThumbnailBasePath + parentPath + "/" + newName); thumbnailOutputStream.close(); thumbnailInputStream.close(); if (result2 && result2) { String oriPath = fileImage.getPath(); imageUrl = parentPath + "/" + newName; imageSize = BigDecimal.valueOf(file.getSize()).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB"); fileImage.setOldName(oldName); fileImage.setNewName(newName); fileImage.setSize(imageSize); fileImage.setPath(imageUrl); pictureInfoMapper.updateFileImage(fileImage); FtpUtil.delFile(ftpBasePath + oriPath); FtpUtil.delFile(ftpThumbnailBasePath + oriPath); } else { delPathList.forEach(FtpUtil::delFile); throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "上传新文件失败!"); } } catch (Exception e) { // 异常,删除已经上传的文件 if (!delPathList.isEmpty()) { delPathList.forEach(FtpUtil::delFile); } log.error("insertOrUpdateImg Exception,", e); throw new BusinessException(30000, e.getMessage()); } } else { pictureInfoMapper.updateFileImage(fileImage); } return R.ok().data("success"); } } @Override public R deleteImgs(Long imageId) { FileImage fileImage = pictureInfoMapper.findFileImageById(imageId); if (fileImage == null) { throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "ID 不存在!"); } try { String ftpBasePath = InitRunner.dicCodeMap.get("ftp_basepath").getCodeValue(); String ftpThumbnailBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_basepath").getCodeValue(); boolean flag = FtpUtil.delFile(ftpBasePath + fileImage.getPath()); FtpUtil.delFile(ftpThumbnailBasePath + fileImage.getPath()); if (flag) { pictureInfoMapper.deleteFileImageById(imageId); return R.ok(); } else { return R.error().message("删除失败"); } } catch (Exception e) { e.printStackTrace(); return R.error().message(e.getMessage()); } } @Override @Async public void jsoupFulibaPic(String startPageUrl, Boolean ignoreTimeCompare) throws Exception { log.warn("jsoupFulibaPic 开始:startPageUrl={},ignoreTimeCompare={}", startPageUrl, ignoreTimeCompare); StopWatch stopWatch = new StopWatch(); stopWatch.start(); FileCrawlerImageLog latestFileCrawlerImageLog = pictureInfoMapper.findLatestCrawlerImage(1); LocalDate latestDate; if (latestFileCrawlerImageLog == null) { latestDate = LocalDate.of(1970, 1, 1); } else { latestDate = latestFileCrawlerImageLog.getPublishTime(); } String crawlerFulibaUrl = InitRunner.dicCodeMap.get("crawler_fuliba_url").getCodeValue(); if (StringUtils.isNotEmpty(startPageUrl)) { crawlerFulibaUrl = startPageUrl; } if (ignoreTimeCompare == null) { ignoreTimeCompare = false; } Map headerMap = new HashMap<>(); headerMap.put("referer", crawlerFulibaUrl); Document fulibaDocument = null; Elements sourceSelects = null; int findCount = 0; outer: while (true) { for (int i = 0; i < 10; i++) { try { fulibaDocument = JsoupUtil.requestDocument(crawlerFulibaUrl, JsoupUtil.HTTP_GET, Proxy.NO_PROXY, null, headerMap, null); sourceSelects = fulibaDocument.select(".content").select("article.excerpt"); if (!sourceSelects.isEmpty()) { log.warn("jsoupFulibaPic page success:i={},url={}", i, crawlerFulibaUrl); break; } else { log.warn("jsoupFulibaPic page fail:i={},url={}", i, crawlerFulibaUrl); } } catch (Exception | Error e) { log.warn("jsoupFulibaPic page fail:i={},url={}", i, crawlerFulibaUrl, e); } finally { Thread.sleep(5000L); } } if (sourceSelects == null || sourceSelects.isEmpty()) { log.warn("jsoupFulibaPic page empty break:url={}", crawlerFulibaUrl); break; } sourceSelects = fulibaDocument.select(".content").select("article.excerpt"); for (Element sourceSelect : sourceSelects) { String mainUrl = sourceSelect.select("header").select("a").attr("abs:href"); mainUrl = URLDecoder.decode(mainUrl, "UTF-8"); String mainTitle = sourceSelect.select("header").select("a").attr("title"); mainTitle = mainTitle.replace("-福利吧", ""); String publishTimeStr = sourceSelect.select("div.meta").select("time").text(); LocalDate publishTime = LocalDate.parse(publishTimeStr, DateUtils.dateFormatter); if (!ignoreTimeCompare && (publishTime.isBefore(latestDate) || publishTime.isEqual(latestDate))) { log.warn("jsoupFulibaPic page publishTime isbefore latestDate break:mainUrl={},mainTitle={},publishTime={},latestDate={}", mainUrl, mainTitle, publishTimeStr, latestDate.format(DateUtils.dateFormatter)); break outer; } FileCrawlerImageLog crawlerImageLog = new FileCrawlerImageLog(); crawlerImageLog.setId(UUIDUtils.getUUID()); crawlerImageLog.setMainUrl(mainUrl); crawlerImageLog.setMainTitle(mainTitle); crawlerImageLog.setCategoryId(1L); crawlerImageLog.setStatus(3); crawlerImageLog.setPublishTime(publishTime); int count = pictureInfoMapper.insertIgnoreFileCrawlerImageLog(crawlerImageLog); findCount += count; log.warn("jsoupFulibaPic item success:publishTime={},mainTitle={}", publishTime, mainTitle); } // 继续下一页 Elements nextSelects = fulibaDocument.select("div.pagination > ul").select("li.next-page").select("a"); if (!nextSelects.isEmpty()) { crawlerFulibaUrl = nextSelects.get(0).attr("abs:href"); if (StringUtils.isEmpty(crawlerFulibaUrl)) { break; } } else { break; } } stopWatch.stop(); log.warn("jsoupFulibaPic 结束:findCount={},time={}", findCount, stopWatch.getTotalTimeMillis()); } @Override @Async public void jsoupFulibaPicDetail(Integer status, String mainUrl, String id) { log.warn("jsoupFulibaPicDetail 开始:status={},mainUrl={},id={}", status, mainUrl, id); StopWatch stopWatch = new StopWatch(); stopWatch.start(); Map params = new HashMap<>(); params.put("categoryId", 1); if (StringUtils.isNotEmpty(id)) { params.put("id", id); } else if (StringUtils.isNotEmpty(mainUrl)) { params.put("mainUrl", mainUrl); } else if (status != null) { params.put("status", status); } else { throw new BusinessException(30000, "参数错误!"); } List fileCrawlerImageLogList = pictureInfoMapper.findJsoupFulibaPicDetailListByParams(params); if (fileCrawlerImageLogList.isEmpty()) { log.warn("jsoupFulibaPicDetail 结束:fileCrawlerImageLogList is empty"); return; } String crawlerFulibaUrl = InitRunner.dicCodeMap.get("crawler_fuliba_url").getCodeValue(); Map headerMap = new HashMap<>(); headerMap.put("referer", crawlerFulibaUrl); int successCount = 0; int failCount = 0; for (FileCrawlerImageLog fileCrawlerImageLog : fileCrawlerImageLogList) { try { Thread.sleep(5000L); SpringUtils.getBean(PictureInfoServiceImpl.class).jsoupFulibaPicDetailSub(fileCrawlerImageLog.getMainUrl(), headerMap, fileCrawlerImageLog.getPublishTime(), fileCrawlerImageLog.getId()); if (2 == fileCrawlerImageLog.getStatus()) { fileCrawlerImageLog.setFailureCause(""); } fileCrawlerImageLog.setStatus(1); successCount++; } catch (Exception e) { fileCrawlerImageLog.setFailureCause(e.getMessage().length() > 200 ? e.getMessage().substring(0, 200) : e.getMessage()); if (e.getMessage().contains("timeoutCount equal imgEles size")) { fileCrawlerImageLog.setStatus(4); } else { fileCrawlerImageLog.setStatus(2); } failCount++; } finally { pictureInfoMapper.insertOrUpdateFileCrawlerImageLog(fileCrawlerImageLog); log.warn("jsoupFulibaPicDetail update status:mainUrl={},status={}", fileCrawlerImageLog.getMainUrl(), fileCrawlerImageLog.getStatus()); } } stopWatch.stop(); log.warn("jsoupFulibaPicDetail 结束:totalSize={},successCount={},failCount={},time={}", fileCrawlerImageLogList.size(), successCount, failCount, stopWatch.getTotalTimeMillis()); } @Override @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class) public String jsoupFulibaPicDetailSub(String mainUrl, Map headerMap, LocalDate publishTime, String logId) { String newName; String imageUrl; String imageSize; Document fulibaDetailDocument; String ftpImageCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_image_crawler_basepath").getCodeValue(); String ftpThumbnailCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_crawler_basepath").getCodeValue(); List delPathList = new ArrayList<>(); String srcUrl = ""; List fileCrawlerImageList = new ArrayList<>(); try { fulibaDetailDocument = JsoupUtil.requestDocument(mainUrl, JsoupUtil.HTTP_GET, Proxy.NO_PROXY, null, headerMap, null); log.warn("jsoupFulibaPicDetailSub start:mainUrl={},publishTime={},logId={}", mainUrl, publishTime, logId); Elements imgEles = fulibaDetailDocument.select("div.content > article.article-content").select("img"); int i = 0; String parentPath = "1" + File.separator + publishTime.format(DateUtils.dateFormatter5); Connection.Response response; int timeoutCount = 0; for (Element imgEle : imgEles) { srcUrl = imgEle.attr("src"); String altTitle = imgEle.attr("alt"); newName = FtpUtil.genImageName(); String prefx = srcUrl.substring(srcUrl.lastIndexOf(".")); newName = newName + prefx; try { response = Jsoup.connect(srcUrl).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute(); } catch (SocketTimeoutException ioex) { timeoutCount++; continue; } catch (HttpStatusException ioex) { try { response = Jsoup.connect(srcUrl).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute(); } catch (Exception e) { timeoutCount++; continue; } } byte[] imageBytes = response.bodyAsBytes(); if (imageBytes.length == 0) { // 过滤掉失效的图片链接 continue; } FileCrawlerImage fileCrawlerImage = new FileCrawlerImage(); fileCrawlerImage.setOldName(altTitle); fileCrawlerImage.setNewName(newName); imageSize = BigDecimal.valueOf(imageBytes.length).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB"); fileCrawlerImage.setSize(imageSize); imageUrl = parentPath + File.separator + newName; fileCrawlerImage.setPath(imageUrl); fileCrawlerImage.setRemark(""); fileCrawlerImage.setCategoryId(1L); fileCrawlerImage.setOrginUrl(srcUrl); fileCrawlerImage.setLogId(logId); if (true) { InputStream imageStream2 = new ByteArrayInputStream(imageBytes); ByteArrayOutputStream thumbnailOutputStream = new ByteArrayOutputStream(); try { Thumbnails.of(imageStream2).size(300, 200).toOutputStream(thumbnailOutputStream); } catch (UnsupportedFormatException unsupportedFormatException) { imageStream2.close(); thumbnailOutputStream.close(); continue; } InputStream imageStream1 = new ByteArrayInputStream(imageBytes); FtpUtil.uploadFile(ftpImageCrawlerBasePath, parentPath, newName, imageStream1); delPathList.add(ftpImageCrawlerBasePath + imageUrl); ByteArrayInputStream thumbnailInputStream = new ByteArrayInputStream(thumbnailOutputStream.toByteArray()); FtpUtil.uploadFile(ftpThumbnailCrawlerBasePath, parentPath, newName, thumbnailInputStream); delPathList.add(ftpThumbnailCrawlerBasePath + imageUrl); imageStream2.close(); thumbnailOutputStream.close(); } fileCrawlerImage.setSort(++i); fileCrawlerImageList.add(fileCrawlerImage); } if (!imgEles.isEmpty() && timeoutCount == imgEles.size()) { log.warn("jsoupFulibaPicDetailSub timeoutCount is equals imgEles size,mainUrl={},publishTime={},timeoutCount={}", mainUrl, publishTime, timeoutCount); throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "timeoutCount equal imgEles size"); } if (!imgEles.isEmpty() && timeoutCount > 4) { log.warn("jsoupFulibaPicDetailSub timeoutCount is Too many,mainUrl={},publishTime={},timeoutCount={}", mainUrl, publishTime, timeoutCount); throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "timeoutCount is Too many"); } int count = pictureInfoMapper.insertIgnoreFileImageList(fileCrawlerImageList); } catch (Exception e) { // 异常,删除已经上传的文件 if (!delPathList.isEmpty()) { delPathList.forEach(FtpUtil::delFile); } log.error("jsoupFulibaPicDetailSub exception,mainUrl={},publishTime={},srcUrl={}", mainUrl, publishTime, srcUrl, e); throw new BusinessException(30000, e.getMessage()); } return ""; } public static void main(String[] args) throws IOException { String srcUrl = "https://image.baidu.com/search/down?thumburl=https://baidu.com&url=https://tva1.sinaimg.cn/mw690/007Y7SRMly1gmays3w173j30ol16fh8x.jpg"; Connection.Response response = Jsoup.connect(srcUrl).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute(); byte[] imageBytes = response.bodyAsBytes(); System.out.println(imageBytes.length); } }