PictureInfoServiceImpl.java 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. package top.lvzhiqiang.service.impl;
  2. import com.alibaba.fastjson.JSONObject;
  3. import com.github.pagehelper.PageHelper;
  4. import com.github.pagehelper.PageInfo;
  5. import lombok.extern.slf4j.Slf4j;
  6. import net.coobird.thumbnailator.Thumbnails;
  7. import net.coobird.thumbnailator.tasks.UnsupportedFormatException;
  8. import org.jsoup.Connection;
  9. import org.jsoup.HttpStatusException;
  10. import org.jsoup.Jsoup;
  11. import org.jsoup.nodes.Document;
  12. import org.jsoup.nodes.Element;
  13. import org.jsoup.select.Elements;
  14. import org.springframework.beans.factory.annotation.Value;
  15. import org.springframework.scheduling.annotation.Async;
  16. import org.springframework.stereotype.Service;
  17. import org.springframework.transaction.annotation.Propagation;
  18. import org.springframework.transaction.annotation.Transactional;
  19. import org.springframework.util.ObjectUtils;
  20. import org.springframework.util.StopWatch;
  21. import org.springframework.web.multipart.MultipartFile;
  22. import top.lvzhiqiang.config.InitRunner;
  23. import top.lvzhiqiang.dto.R;
  24. import top.lvzhiqiang.entity.FileCrawlerImage;
  25. import top.lvzhiqiang.entity.FileCrawlerImageLog;
  26. import top.lvzhiqiang.entity.FileImage;
  27. import top.lvzhiqiang.enumeration.ResultCodeEnum;
  28. import top.lvzhiqiang.exception.BusinessException;
  29. import top.lvzhiqiang.mapper.PictureInfoMapper;
  30. import top.lvzhiqiang.service.PictureInfoService;
  31. import top.lvzhiqiang.util.*;
  32. import javax.annotation.Resource;
  33. import javax.imageio.ImageIO;
  34. import java.awt.image.BufferedImage;
  35. import java.io.*;
  36. import java.math.BigDecimal;
  37. import java.math.RoundingMode;
  38. import java.net.Proxy;
  39. import java.net.SocketTimeoutException;
  40. import java.net.URLDecoder;
  41. import java.time.LocalDate;
  42. import java.time.LocalDateTime;
  43. import java.util.ArrayList;
  44. import java.util.HashMap;
  45. import java.util.List;
  46. import java.util.Map;
  47. /**
  48. * 照片信息ServiceImpl
  49. *
  50. * @author lvzhiqiang
  51. * 2024/8/26 11:02
  52. */
  53. @Service
  54. @Slf4j
  55. public class PictureInfoServiceImpl extends BaseServiceImpl<Object> implements PictureInfoService {
  56. @Resource
  57. private PictureInfoMapper pictureInfoMapper;
  58. @Value("${spring.profiles.active}")
  59. private String env;
  60. @Override
  61. public Object getPictureInfoPage(Map<String, Object> params) {
  62. Object bigType = params.get("bigType");
  63. // 转换成like
  64. paramsToLike(params, "keyword");
  65. // 分页
  66. paramsToPagination(params);
  67. if ("上传".equals(bigType)) {
  68. // 排序
  69. paramsToSort(params);
  70. List<FileImage> pictureInfoList = pictureInfoMapper.getUploadImageInfoList(params);
  71. String bpicsUrl = InitRunner.dicCodeMap.get("bpics_url").getCodeValue();
  72. String ftpBasePath = InitRunner.dicCodeMap.get("ftp_basepath").getCodeValue();
  73. String ftpThumbnailBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_basepath").getCodeValue();
  74. pictureInfoList.stream().forEach(e -> {
  75. String path = e.getPath();
  76. e.setPath(bpicsUrl + ftpBasePath + path);
  77. e.setThumbnailPath(bpicsUrl + ftpThumbnailBasePath + path);
  78. });
  79. return new PageInfo<>(pictureInfoList);
  80. } else if ("爬虫".equals(bigType)) {
  81. // 排序
  82. if (params.containsKey(ORDER_FIELD) && params.containsKey(ORDER)
  83. && !ObjectUtils.isEmpty(params.get(ORDER_FIELD)) && !ObjectUtils.isEmpty(params.get(ORDER))) {
  84. PageHelper.orderBy(params.get(ORDER_FIELD) + " " + params.get(ORDER) + ",fi.sort asc");
  85. }
  86. List<FileCrawlerImage> crawlerImageList = pictureInfoMapper.getCrawlerImageInfoList(params);
  87. String bpicsUrl = InitRunner.dicCodeMap.get("bpics_url").getCodeValue();
  88. String ftpImageCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_image_crawler_basepath").getCodeValue();
  89. String ftpThumbnailCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_crawler_basepath").getCodeValue();
  90. crawlerImageList.stream().forEach(e -> {
  91. String path = e.getPath();
  92. e.setPath(bpicsUrl + ftpImageCrawlerBasePath + path);
  93. e.setThumbnailPath(bpicsUrl + ftpThumbnailCrawlerBasePath + path);
  94. e.setRemark(StringUtils.isEmpty(e.getRemark()) ? e.getOldName() : e.getRemark());
  95. });
  96. return new PageInfo<>(crawlerImageList);
  97. } else {
  98. throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "不支持的类型!");
  99. }
  100. }
  101. @Override
  102. public R insertOrUpdateImg(MultipartFile file, String remark, String createDate, Long categoryId, String id) {
  103. String imageUrl = "";
  104. String imageSize = "";
  105. String ftpBasePath = InitRunner.dicCodeMap.get("ftp_basepath").getCodeValue();
  106. String ftpThumbnailBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_basepath").getCodeValue();
  107. String ftpBaseUrl = InitRunner.dicCodeMap.get("ftp_baseurl").getCodeValue();
  108. List<String> delPathList = new ArrayList<>();
  109. if (StringUtils.isEmpty(id)) {
  110. String parentPath;
  111. LocalDateTime createTime = LocalDateTime.now();
  112. if (StringUtils.isNotEmpty(createDate)) {
  113. createTime = LocalDateTime.parse(createDate, DateUtils.dateTimeFormatter);
  114. parentPath = createTime.format(DateUtils.dateFormatter6);
  115. } else {
  116. parentPath = LocalDate.now().format(DateUtils.dateFormatter6);
  117. }
  118. try {
  119. // 1、给上传的图片生成新的文件名
  120. // 1.1获取原始文件名
  121. String oldName = file.getOriginalFilename();
  122. // 1.2使用FtpUtil工具类生成新的文件名,新文件名 = newName + 文件后缀
  123. String newName = FtpUtil.genImageName();
  124. newName = newName + oldName.substring(oldName.lastIndexOf("."));
  125. // 2、把图片上传到图片服务器
  126. // 2.1获取上传的io流
  127. InputStream input = file.getInputStream();
  128. // 2.2调用FtpUtil工具类进行上传
  129. boolean result = FtpUtil.uploadFile(ftpBasePath, parentPath, newName, input);
  130. delPathList.add(ftpBasePath + parentPath + "/" + newName);
  131. // 2.3缩略图
  132. BufferedImage originalImage = ImageIO.read(file.getInputStream());
  133. ByteArrayOutputStream thumbnailOutputStream = new ByteArrayOutputStream();
  134. Thumbnails.of(originalImage).size(300, 200).outputFormat(oldName.substring(oldName.lastIndexOf(".") + 1)).toOutputStream(thumbnailOutputStream);
  135. ByteArrayInputStream thumbnailInputStream = new ByteArrayInputStream(thumbnailOutputStream.toByteArray());
  136. FtpUtil.uploadFile(ftpThumbnailBasePath, parentPath, newName, thumbnailInputStream);
  137. delPathList.add(ftpThumbnailBasePath + parentPath + "/" + newName);
  138. thumbnailOutputStream.close();
  139. thumbnailInputStream.close();
  140. if (result) {
  141. //返回给前端图片访问路径
  142. imageUrl = parentPath + "/" + newName;
  143. imageSize = BigDecimal.valueOf(file.getSize()).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB");
  144. FileImage fileImage = new FileImage();
  145. fileImage.setOldName(oldName);
  146. fileImage.setNewName(newName);
  147. fileImage.setSize(imageSize);
  148. fileImage.setPath(imageUrl);
  149. fileImage.setRemark(remark);
  150. fileImage.setCategoryId(categoryId);
  151. fileImage.setCreateTime(createTime);
  152. pictureInfoMapper.insertFileImage(fileImage);
  153. }
  154. } catch (Exception e) {
  155. // 异常,删除已经上传的文件
  156. if (!delPathList.isEmpty()) {
  157. delPathList.forEach(FtpUtil::delFile);
  158. }
  159. log.error("insertOrUpdateImg Exception,", e);
  160. throw new BusinessException(30000, e.getMessage());
  161. }
  162. JSONObject result = new JSONObject();
  163. result.put("imageUrl", ftpBaseUrl + ftpBasePath + imageUrl);
  164. result.put("imageSize", imageSize);
  165. return R.ok().data(result);
  166. } else {
  167. FileImage fileImage = pictureInfoMapper.findFileImageById(Long.valueOf(id));
  168. if (fileImage == null) {
  169. throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "ID 不存在!");
  170. }
  171. fileImage.setCategoryId(categoryId);
  172. if (StringUtils.isNotEmpty(remark)) {
  173. fileImage.setRemark(remark);
  174. }
  175. String parentPath;
  176. if (StringUtils.isNotEmpty(createDate)) {
  177. LocalDateTime createTime = LocalDateTime.parse(createDate, DateUtils.dateTimeFormatter);
  178. parentPath = createTime.format(DateUtils.dateFormatter6);
  179. fileImage.setCreateTime(createTime);
  180. } else {
  181. parentPath = fileImage.getCreateTime().format(DateUtils.dateFormatter6);
  182. }
  183. if (file != null && file.getSize() > 0) {
  184. try {
  185. // 1、给上传的图片生成新的文件名
  186. // 1.1获取原始文件名
  187. String oldName = file.getOriginalFilename();
  188. String newName = FtpUtil.genImageName();
  189. // 1.2使用FtpUtil工具类生成新的文件名,新文件名 = newName + 文件后缀
  190. newName = newName + oldName.substring(oldName.lastIndexOf("."));
  191. // 2、把图片上传到图片服务器
  192. // 2.1获取上传的io流
  193. InputStream input = file.getInputStream();
  194. // 2.2调用FtpUtil工具类进行上传
  195. boolean result1 = FtpUtil.uploadFile(ftpBasePath, parentPath, newName, input);
  196. delPathList.add(ftpBasePath + parentPath + "/" + newName);
  197. // 2.3缩略图
  198. BufferedImage originalImage = ImageIO.read(file.getInputStream());
  199. ByteArrayOutputStream thumbnailOutputStream = new ByteArrayOutputStream();
  200. Thumbnails.of(originalImage).size(300, 200).outputFormat(oldName.substring(oldName.lastIndexOf(".") + 1)).toOutputStream(thumbnailOutputStream);
  201. ByteArrayInputStream thumbnailInputStream = new ByteArrayInputStream(thumbnailOutputStream.toByteArray());
  202. boolean result2 = FtpUtil.uploadFile(ftpThumbnailBasePath, parentPath, newName, thumbnailInputStream);
  203. delPathList.add(ftpThumbnailBasePath + parentPath + "/" + newName);
  204. thumbnailOutputStream.close();
  205. thumbnailInputStream.close();
  206. if (result2 && result2) {
  207. String oriPath = fileImage.getPath();
  208. imageUrl = parentPath + "/" + newName;
  209. imageSize = BigDecimal.valueOf(file.getSize()).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB");
  210. fileImage.setOldName(oldName);
  211. fileImage.setNewName(newName);
  212. fileImage.setSize(imageSize);
  213. fileImage.setPath(imageUrl);
  214. pictureInfoMapper.updateFileImage(fileImage);
  215. FtpUtil.delFile(ftpBasePath + oriPath);
  216. FtpUtil.delFile(ftpThumbnailBasePath + oriPath);
  217. } else {
  218. delPathList.forEach(FtpUtil::delFile);
  219. throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "上传新文件失败!");
  220. }
  221. } catch (Exception e) {
  222. // 异常,删除已经上传的文件
  223. if (!delPathList.isEmpty()) {
  224. delPathList.forEach(FtpUtil::delFile);
  225. }
  226. log.error("insertOrUpdateImg Exception,", e);
  227. throw new BusinessException(30000, e.getMessage());
  228. }
  229. } else {
  230. pictureInfoMapper.updateFileImage(fileImage);
  231. }
  232. return R.ok().data("success");
  233. }
  234. }
  235. @Override
  236. public R deleteImgs(Long imageId) {
  237. FileImage fileImage = pictureInfoMapper.findFileImageById(imageId);
  238. if (fileImage == null) {
  239. throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "ID 不存在!");
  240. }
  241. try {
  242. String ftpBasePath = InitRunner.dicCodeMap.get("ftp_basepath").getCodeValue();
  243. String ftpThumbnailBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_basepath").getCodeValue();
  244. boolean flag = FtpUtil.delFile(ftpBasePath + fileImage.getPath());
  245. FtpUtil.delFile(ftpThumbnailBasePath + fileImage.getPath());
  246. if (flag) {
  247. pictureInfoMapper.deleteFileImageById(imageId);
  248. return R.ok();
  249. } else {
  250. return R.error().message("删除失败");
  251. }
  252. } catch (Exception e) {
  253. e.printStackTrace();
  254. return R.error().message(e.getMessage());
  255. }
  256. }
  257. @Override
  258. @Async
  259. public void jsoupFulibaPic(String startPageUrl, Boolean ignoreTimeCompare) throws Exception {
  260. log.warn("jsoupFulibaPic 开始:startPageUrl={},ignoreTimeCompare={}", startPageUrl, ignoreTimeCompare);
  261. StopWatch stopWatch = new StopWatch();
  262. stopWatch.start();
  263. FileCrawlerImageLog latestFileCrawlerImageLog = pictureInfoMapper.findLatestCrawlerImage(1);
  264. LocalDate latestDate;
  265. if (latestFileCrawlerImageLog == null) {
  266. latestDate = LocalDate.of(1970, 1, 1);
  267. } else {
  268. latestDate = latestFileCrawlerImageLog.getPublishTime();
  269. }
  270. String crawlerFulibaUrl = InitRunner.dicCodeMap.get("crawler_fuliba_url").getCodeValue();
  271. if (StringUtils.isNotEmpty(startPageUrl)) {
  272. crawlerFulibaUrl = startPageUrl;
  273. }
  274. if (ignoreTimeCompare == null) {
  275. ignoreTimeCompare = false;
  276. }
  277. Map<String, String> headerMap = new HashMap<>();
  278. headerMap.put("referer", crawlerFulibaUrl);
  279. Document fulibaDocument = null;
  280. Elements sourceSelects = null;
  281. int findCount = 0;
  282. outer:
  283. while (true) {
  284. for (int i = 0; i < 10; i++) {
  285. try {
  286. fulibaDocument = JsoupUtil.requestDocument(crawlerFulibaUrl, JsoupUtil.HTTP_GET, Proxy.NO_PROXY, null, headerMap, null);
  287. sourceSelects = fulibaDocument.select(".content").select("article.excerpt");
  288. if (!sourceSelects.isEmpty()) {
  289. log.warn("jsoupFulibaPic page success:i={},url={}", i, crawlerFulibaUrl);
  290. break;
  291. } else {
  292. log.warn("jsoupFulibaPic page fail:i={},url={}", i, crawlerFulibaUrl);
  293. }
  294. } catch (Exception | Error e) {
  295. log.warn("jsoupFulibaPic page fail:i={},url={}", i, crawlerFulibaUrl, e);
  296. } finally {
  297. Thread.sleep(5000L);
  298. }
  299. }
  300. if (sourceSelects == null || sourceSelects.isEmpty()) {
  301. log.warn("jsoupFulibaPic page empty break:url={}", crawlerFulibaUrl);
  302. break;
  303. }
  304. sourceSelects = fulibaDocument.select(".content").select("article.excerpt");
  305. for (Element sourceSelect : sourceSelects) {
  306. String mainUrl = sourceSelect.select("header").select("a").attr("abs:href");
  307. mainUrl = URLDecoder.decode(mainUrl, "UTF-8");
  308. String mainTitle = sourceSelect.select("header").select("a").attr("title");
  309. mainTitle = mainTitle.replace("-福利吧", "");
  310. String publishTimeStr = sourceSelect.select("div.meta").select("time").text();
  311. LocalDate publishTime = LocalDate.parse(publishTimeStr, DateUtils.dateFormatter);
  312. if (!ignoreTimeCompare && (publishTime.isBefore(latestDate) || publishTime.isEqual(latestDate))) {
  313. log.warn("jsoupFulibaPic page publishTime isbefore latestDate break:mainUrl={},mainTitle={},publishTime={},latestDate={}", mainUrl, mainTitle, publishTimeStr, latestDate.format(DateUtils.dateFormatter));
  314. break outer;
  315. }
  316. FileCrawlerImageLog crawlerImageLog = new FileCrawlerImageLog();
  317. crawlerImageLog.setId(UUIDUtils.getUUID());
  318. crawlerImageLog.setMainUrl(mainUrl);
  319. crawlerImageLog.setMainTitle(mainTitle);
  320. crawlerImageLog.setCategoryId(1L);
  321. crawlerImageLog.setStatus(3);
  322. crawlerImageLog.setPublishTime(publishTime);
  323. int count = pictureInfoMapper.insertIgnoreFileCrawlerImageLog(crawlerImageLog);
  324. findCount += count;
  325. log.warn("jsoupFulibaPic item success:publishTime={},mainTitle={}", publishTime, mainTitle);
  326. }
  327. // 继续下一页
  328. Elements nextSelects = fulibaDocument.select("div.pagination > ul").select("li.next-page").select("a");
  329. if (!nextSelects.isEmpty()) {
  330. crawlerFulibaUrl = nextSelects.get(0).attr("abs:href");
  331. if (StringUtils.isEmpty(crawlerFulibaUrl)) {
  332. break;
  333. }
  334. } else {
  335. break;
  336. }
  337. }
  338. stopWatch.stop();
  339. log.warn("jsoupFulibaPic 结束:findCount={},time={}", findCount, stopWatch.getTotalTimeMillis());
  340. }
  341. @Override
  342. @Async
  343. public void jsoupFulibaPicDetail(Integer status, String mainUrl, String id) {
  344. log.warn("jsoupFulibaPicDetail 开始:status={},mainUrl={},id={}", status, mainUrl, id);
  345. StopWatch stopWatch = new StopWatch();
  346. stopWatch.start();
  347. Map<String, Object> params = new HashMap<>();
  348. params.put("categoryId", 1);
  349. if (StringUtils.isNotEmpty(id)) {
  350. params.put("id", id);
  351. } else if (StringUtils.isNotEmpty(mainUrl)) {
  352. params.put("mainUrl", mainUrl);
  353. } else if (status != null) {
  354. params.put("status", status);
  355. } else {
  356. throw new BusinessException(30000, "参数错误!");
  357. }
  358. List<FileCrawlerImageLog> fileCrawlerImageLogList = pictureInfoMapper.findJsoupFulibaPicDetailListByParams(params);
  359. if (fileCrawlerImageLogList.isEmpty()) {
  360. log.warn("jsoupFulibaPicDetail 结束:fileCrawlerImageLogList is empty");
  361. return;
  362. }
  363. String crawlerFulibaUrl = InitRunner.dicCodeMap.get("crawler_fuliba_url").getCodeValue();
  364. Map<String, String> headerMap = new HashMap<>();
  365. headerMap.put("referer", crawlerFulibaUrl);
  366. int successCount = 0;
  367. int failCount = 0;
  368. for (FileCrawlerImageLog fileCrawlerImageLog : fileCrawlerImageLogList) {
  369. try {
  370. Thread.sleep(5000L);
  371. SpringUtils.getBean(PictureInfoServiceImpl.class).jsoupFulibaPicDetailSub(fileCrawlerImageLog.getMainUrl(), headerMap, fileCrawlerImageLog.getPublishTime(), fileCrawlerImageLog.getId());
  372. if (2 == fileCrawlerImageLog.getStatus()) {
  373. fileCrawlerImageLog.setFailureCause("");
  374. }
  375. fileCrawlerImageLog.setStatus(1);
  376. successCount++;
  377. } catch (Exception e) {
  378. fileCrawlerImageLog.setFailureCause(e.getMessage().length() > 200 ? e.getMessage().substring(0, 200) : e.getMessage());
  379. if (e.getMessage().contains("timeoutCount equal imgEles size")) {
  380. fileCrawlerImageLog.setStatus(4);
  381. } else {
  382. fileCrawlerImageLog.setStatus(2);
  383. }
  384. failCount++;
  385. } finally {
  386. pictureInfoMapper.insertOrUpdateFileCrawlerImageLog(fileCrawlerImageLog);
  387. log.warn("jsoupFulibaPicDetail update status:mainUrl={},status={}", fileCrawlerImageLog.getMainUrl(), fileCrawlerImageLog.getStatus());
  388. }
  389. }
  390. stopWatch.stop();
  391. log.warn("jsoupFulibaPicDetail 结束:totalSize={},successCount={},failCount={},time={}", fileCrawlerImageLogList.size(), successCount, failCount, stopWatch.getTotalTimeMillis());
  392. }
  393. @Override
  394. @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
  395. public String jsoupFulibaPicDetailSub(String mainUrl, Map<String, String> headerMap, LocalDate publishTime, String logId) {
  396. String newName;
  397. String imageUrl;
  398. String imageSize;
  399. Document fulibaDetailDocument;
  400. String ftpImageCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_image_crawler_basepath").getCodeValue();
  401. String ftpThumbnailCrawlerBasePath = InitRunner.dicCodeMap.get("ftp_thumbnail_crawler_basepath").getCodeValue();
  402. List<String> delPathList = new ArrayList<>();
  403. String srcUrl = "";
  404. List<FileCrawlerImage> fileCrawlerImageList = new ArrayList<>();
  405. try {
  406. fulibaDetailDocument = JsoupUtil.requestDocument(mainUrl, JsoupUtil.HTTP_GET, Proxy.NO_PROXY, null, headerMap, null);
  407. log.warn("jsoupFulibaPicDetailSub start:mainUrl={},publishTime={},logId={}", mainUrl, publishTime, logId);
  408. Elements imgEles = fulibaDetailDocument.select("div.content > article.article-content").select("img");
  409. int i = 0;
  410. String parentPath = "1" + File.separator + publishTime.format(DateUtils.dateFormatter5);
  411. Connection.Response response;
  412. int timeoutCount = 0;
  413. for (Element imgEle : imgEles) {
  414. srcUrl = imgEle.attr("src");
  415. String altTitle = imgEle.attr("alt");
  416. newName = FtpUtil.genImageName();
  417. String prefx = srcUrl.substring(srcUrl.lastIndexOf("."));
  418. newName = newName + prefx;
  419. try {
  420. response = Jsoup.connect(srcUrl).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
  421. } catch (SocketTimeoutException ioex) {
  422. timeoutCount++;
  423. continue;
  424. } catch (HttpStatusException ioex) {
  425. try {
  426. response = Jsoup.connect(srcUrl).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
  427. } catch (Exception e) {
  428. timeoutCount++;
  429. continue;
  430. }
  431. }
  432. byte[] imageBytes = response.bodyAsBytes();
  433. if (imageBytes.length == 0) {
  434. // 过滤掉失效的图片链接
  435. continue;
  436. }
  437. FileCrawlerImage fileCrawlerImage = new FileCrawlerImage();
  438. fileCrawlerImage.setOldName(altTitle);
  439. fileCrawlerImage.setNewName(newName);
  440. imageSize = BigDecimal.valueOf(imageBytes.length).divide(new BigDecimal("1024")).setScale(0, RoundingMode.UP).toPlainString().concat("KB");
  441. fileCrawlerImage.setSize(imageSize);
  442. imageUrl = parentPath + File.separator + newName;
  443. fileCrawlerImage.setPath(imageUrl);
  444. fileCrawlerImage.setRemark("");
  445. fileCrawlerImage.setCategoryId(1L);
  446. fileCrawlerImage.setOrginUrl(srcUrl);
  447. fileCrawlerImage.setLogId(logId);
  448. if (true) {
  449. InputStream imageStream2 = new ByteArrayInputStream(imageBytes);
  450. ByteArrayOutputStream thumbnailOutputStream = new ByteArrayOutputStream();
  451. try {
  452. Thumbnails.of(imageStream2).size(300, 200).toOutputStream(thumbnailOutputStream);
  453. } catch (UnsupportedFormatException unsupportedFormatException) {
  454. imageStream2.close();
  455. thumbnailOutputStream.close();
  456. continue;
  457. }
  458. InputStream imageStream1 = new ByteArrayInputStream(imageBytes);
  459. FtpUtil.uploadFile(ftpImageCrawlerBasePath, parentPath, newName, imageStream1);
  460. delPathList.add(ftpImageCrawlerBasePath + imageUrl);
  461. ByteArrayInputStream thumbnailInputStream = new ByteArrayInputStream(thumbnailOutputStream.toByteArray());
  462. FtpUtil.uploadFile(ftpThumbnailCrawlerBasePath, parentPath, newName, thumbnailInputStream);
  463. delPathList.add(ftpThumbnailCrawlerBasePath + imageUrl);
  464. imageStream2.close();
  465. thumbnailOutputStream.close();
  466. }
  467. fileCrawlerImage.setSort(++i);
  468. fileCrawlerImageList.add(fileCrawlerImage);
  469. }
  470. if (!imgEles.isEmpty() && timeoutCount == imgEles.size()) {
  471. log.warn("jsoupFulibaPicDetailSub timeoutCount is equals imgEles size,mainUrl={},publishTime={},timeoutCount={}", mainUrl, publishTime, timeoutCount);
  472. throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "timeoutCount equal imgEles size");
  473. }
  474. if (!imgEles.isEmpty() && timeoutCount > 4) {
  475. log.warn("jsoupFulibaPicDetailSub timeoutCount is Too many,mainUrl={},publishTime={},timeoutCount={}", mainUrl, publishTime, timeoutCount);
  476. throw new BusinessException(ResultCodeEnum.UNKNOWN_ERROR.getCode(), "timeoutCount is Too many");
  477. }
  478. int count = pictureInfoMapper.insertIgnoreFileImageList(fileCrawlerImageList);
  479. } catch (Exception e) {
  480. // 异常,删除已经上传的文件
  481. if (!delPathList.isEmpty()) {
  482. delPathList.forEach(FtpUtil::delFile);
  483. }
  484. log.error("jsoupFulibaPicDetailSub exception,mainUrl={},publishTime={},srcUrl={}", mainUrl, publishTime, srcUrl, e);
  485. throw new BusinessException(30000, e.getMessage());
  486. }
  487. return "";
  488. }
  489. public static void main(String[] args) throws IOException {
  490. String srcUrl = "https://image.baidu.com/search/down?thumburl=https://baidu.com&url=https://tva1.sinaimg.cn/mw690/007Y7SRMly1gmays3w173j30ol16fh8x.jpg";
  491. Connection.Response response = Jsoup.connect(srcUrl).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
  492. byte[] imageBytes = response.bodyAsBytes();
  493. System.out.println(imageBytes.length);
  494. }
  495. }