|
|
@@ -1,6 +1,11 @@
|
|
|
package top.lvzhiqiang.service.impl;
|
|
|
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.jsoup.Connection;
|
|
|
+import org.jsoup.Jsoup;
|
|
|
+import org.jsoup.nodes.Document;
|
|
|
+import org.jsoup.nodes.Element;
|
|
|
+import org.jsoup.select.Elements;
|
|
|
import org.springframework.scheduling.annotation.Async;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
import org.springframework.transaction.annotation.Propagation;
|
|
|
@@ -17,6 +22,7 @@ import javax.annotation.Resource;
|
|
|
import java.io.*;
|
|
|
import java.time.Instant;
|
|
|
import java.time.LocalDate;
|
|
|
+import java.time.LocalDateTime;
|
|
|
import java.time.ZoneOffset;
|
|
|
import java.util.ArrayList;
|
|
|
import java.util.List;
|
|
|
@@ -46,6 +52,8 @@ public class BgServiceImpl implements BgService {
|
|
|
private VideoInfoMapper videoInfoMapper;
|
|
|
@Resource
|
|
|
private IcodePoolMapper icodePoolMapper;
|
|
|
+ @Resource
|
|
|
+ private VideoInfoPoolMapper videoInfoPoolMapper;
|
|
|
|
|
|
/**
|
|
|
* 初始化骑兵数据
|
|
|
@@ -97,6 +105,189 @@ public class BgServiceImpl implements BgService {
|
|
|
log.warn("uploadFile4IdentificationCode:success={}", num);
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Jsoup IcodePool
|
|
|
+ *
|
|
|
+ * @param status
|
|
|
+ */
|
|
|
+ @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
+ @Async
|
|
|
+ @Override
|
|
|
+ public void jsoupIcodePool(Integer status) {
|
|
|
+ // 获取待抓取码列表
|
|
|
+ List<String> icodePoolList = icodePoolMapper.findIcodeByStatus(status);
|
|
|
+ // 获取主表所有识别码
|
|
|
+ List<String> allIcode = videoInfoMapper.findAllIcode();
|
|
|
+
|
|
|
+ // 获取javbus防屏蔽地址
|
|
|
+ DicCode dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 2 == x.getType() && "javbus".equals(x.getCodeKey())).findFirst().get();
|
|
|
+ if (dicCode == null) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ String javbusUrl = dicCode.getCodeValue();
|
|
|
+ // 校验地址
|
|
|
+ try {
|
|
|
+ Jsoup.connect(javbusUrl.concat(javbusUrl));
|
|
|
+ log.info("jsoupIcodePool:javbus防屏蔽地址有效!javbusUrl={}", javbusUrl);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("jsoupIcodePool:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl, e);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 获取码池图片保存路径
|
|
|
+ String machiPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "machi_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
|
|
|
+
|
|
|
+ // 遍历
|
|
|
+ Document document;
|
|
|
+ VideoInfoPool videoInfoPool;
|
|
|
+ for (int i = 0; i < icodePoolList.size(); i++) {
|
|
|
+ String identificationCode = icodePoolList.get(i);
|
|
|
+
|
|
|
+ IcodePool icodePool = new IcodePool();
|
|
|
+ icodePool.setIdentificationCode(identificationCode);
|
|
|
+ if (allIcode.contains(identificationCode)) {
|
|
|
+ icodePool.setStatus(4);
|
|
|
+ icodePool.setRetryCount(0);
|
|
|
+ icodePoolMapper.updateStatus(icodePool);
|
|
|
+ log.warn("jsoupIcodePool exists:i={},identificationCode={}", i, identificationCode);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ document = Jsoup.connect(javbusUrl.concat(identificationCode)).timeout(50000).get();
|
|
|
+
|
|
|
+ videoInfoPool = new VideoInfoPool();
|
|
|
+ parseDocument(document, identificationCode, machiPath, videoInfoPool);
|
|
|
+ if (videoInfoPool != null) {
|
|
|
+ icodePool.setStatus(2);
|
|
|
+ icodePool.setRetryCount(0);
|
|
|
+ icodePoolMapper.updateStatus(icodePool);
|
|
|
+ videoInfoPoolMapper.insert(videoInfoPool);
|
|
|
+ }
|
|
|
+
|
|
|
+ log.info("jsoupIcodePool success:i={},identificationCode={}", i, identificationCode);
|
|
|
+ } catch (Exception e) {
|
|
|
+ icodePool.setStatus(3);
|
|
|
+ icodePool.setRetryCount(0);
|
|
|
+ icodePool.setFailureCause(e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200));
|
|
|
+ icodePoolMapper.updateStatus(icodePool);
|
|
|
+ log.error("jsoupIcodePool error:i={},identificationCode={}", i, identificationCode, e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private void parseDocument(Document document, String identificationCode, String machiPath, VideoInfoPool videoInfoPool) throws Exception {
|
|
|
+ Elements container = document.select("div.container");
|
|
|
+ if (container.size() == 0) {
|
|
|
+ throw new Exception("番号无效!");
|
|
|
+ }
|
|
|
+
|
|
|
+ // 名称
|
|
|
+ String h3 = container.select("h3").first().text();
|
|
|
+ String[] nameArr = h3.split("\\s+");
|
|
|
+ if (nameArr.length > 1) {
|
|
|
+ videoInfoPool.setName(h3.substring(nameArr[0].length()).trim());
|
|
|
+ } else {
|
|
|
+ videoInfoPool.setName(nameArr[0]);
|
|
|
+ }
|
|
|
+
|
|
|
+ Elements pEles = container.select("div.info > p");
|
|
|
+ // 识别码
|
|
|
+ Element pEle = pEles.get(0);
|
|
|
+ String iCode = pEle.select("span[style]").first().text();
|
|
|
+ if (!identificationCode.equalsIgnoreCase(iCode)) {
|
|
|
+ throw new Exception("番号与站点不一致");
|
|
|
+ }
|
|
|
+ videoInfoPool.setIdentificationCode(iCode);
|
|
|
+ // 发行日期
|
|
|
+ pEle = pEles.get(1);
|
|
|
+ String issueDate = pEle.text().split(":")[1].replace("\"", "").trim();
|
|
|
+ videoInfoPool.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
|
|
|
+ // 长度
|
|
|
+ pEle = pEles.get(2);
|
|
|
+ String length = pEle.text().split(":")[1].replace("\"", "").trim();
|
|
|
+ videoInfoPool.setLength(length);
|
|
|
+ // 导演
|
|
|
+ Elements directorEles = container.select("div.info").select("p:contains(導演)");
|
|
|
+ if (directorEles.size() > 0) {
|
|
|
+ pEle = directorEles.first().select("a[href]").first();
|
|
|
+ videoInfoPool.setDirector(pEle.text());
|
|
|
+ }
|
|
|
+ // 制作商
|
|
|
+ Elements markerEles = container.select("div.info").select("p:contains(製作商)");
|
|
|
+ if (markerEles.size() > 0) {
|
|
|
+ pEle = markerEles.first().select("a[href]").first();
|
|
|
+ videoInfoPool.setMaker(pEle.text());
|
|
|
+ }
|
|
|
+ // 发行商
|
|
|
+ Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
|
|
|
+ if (issuerEles.size() > 0) {
|
|
|
+ pEle = issuerEles.first().select("a[href]").first();
|
|
|
+ videoInfoPool.setIssuer(pEle.text());
|
|
|
+ }
|
|
|
+ // 类别
|
|
|
+ Elements genresEles = container.select("div.info").select("p:contains(類別)");
|
|
|
+ if (genresEles.size() > 0) {
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
+ Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
|
|
|
+ for (Element ahrefEle : ahrefEles) {
|
|
|
+ sb.append(ahrefEle.text()).append(",");
|
|
|
+ }
|
|
|
+ if (sb.length() > 0) {
|
|
|
+ sb = sb.deleteCharAt(sb.length() - 1);
|
|
|
+ }
|
|
|
+ videoInfoPool.setGenres(sb.toString());
|
|
|
+ }
|
|
|
+ // 演员
|
|
|
+ Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
|
|
|
+ if (castEles.size() > 0) {
|
|
|
+ Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
|
|
|
+ if (castElesTemp.size() == 0) {
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
+ Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
|
|
|
+ for (Element ahrefEle : ahrefEles) {
|
|
|
+ sb.append(ahrefEle.text()).append(",");
|
|
|
+ }
|
|
|
+ if (sb.length() > 0) {
|
|
|
+ sb = sb.deleteCharAt(sb.length() - 1);
|
|
|
+ }
|
|
|
+ videoInfoPool.setCast(sb.toString());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 图片URL
|
|
|
+ String href = container.select("a.bigImage").first().attr("abs:href");
|
|
|
+
|
|
|
+ Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
+ String fileName = issueDate.concat(" ").concat(h3).concat(".jpg");
|
|
|
+ saveFile(response.bodyStream(), machiPath.concat(fileName));
|
|
|
+ videoInfoPool.setImgUrl(fileName);
|
|
|
+
|
|
|
+ videoInfoPool.setCreateTime(LocalDateTime.now());
|
|
|
+ videoInfoPool.setType(1);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 保存文件到本地
|
|
|
+ *
|
|
|
+ * @param bufferedInputStream
|
|
|
+ * @param savePath
|
|
|
+ */
|
|
|
+ private void saveFile(BufferedInputStream bufferedInputStream, String savePath) throws IOException {
|
|
|
+ //一次最多读取1k
|
|
|
+ byte[] buffer = new byte[1024];
|
|
|
+ //实际读取的长度
|
|
|
+ int readLenghth;
|
|
|
+ //创建的一个写出的缓冲流
|
|
|
+ BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(new File(savePath)));
|
|
|
+ //文件逐步写入本地
|
|
|
+ while ((readLenghth = bufferedInputStream.read(buffer, 0, 1024)) != -1) {//先读出来,保存在buffer数组中
|
|
|
+ bufferedOutputStream.write(buffer, 0, readLenghth);//再从buffer中取出来保存到本地
|
|
|
+ }
|
|
|
+ //关闭缓冲流
|
|
|
+ bufferedOutputStream.close();
|
|
|
+ bufferedInputStream.close();
|
|
|
+ }
|
|
|
+
|
|
|
// 递归获取某目录下的所有子目录以及子文件
|
|
|
private void getAllFilePaths(String filePath, JavAllInfo javAllInfo) {
|
|
|
File[] files = new File(filePath).listFiles();
|