|
@@ -261,6 +261,7 @@ public class BgServiceImpl implements BgService {
|
|
|
* @author lvzhiqiang
|
|
* @author lvzhiqiang
|
|
|
* 2022/5/4 19:27
|
|
* 2022/5/4 19:27
|
|
|
*/
|
|
*/
|
|
|
|
|
+ @Async
|
|
|
@Override
|
|
@Override
|
|
|
public void jsoupVideoInfoUncensored(Integer status) {
|
|
public void jsoupVideoInfoUncensored(Integer status) {
|
|
|
log.info("jsoupVideoInfoUncensored 开始:status={}", status);
|
|
log.info("jsoupVideoInfoUncensored 开始:status={}", status);
|
|
@@ -329,6 +330,77 @@ public class BgServiceImpl implements BgService {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
|
|
|
+ * Jsoup VideoInfo
|
|
|
|
|
+ *
|
|
|
|
|
+ * @author lvzhiqiang
|
|
|
|
|
+ * 2022/5/5 18:21
|
|
|
|
|
+ */
|
|
|
|
|
+ @Async
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public void jsoupVideoInfo(Integer status) {
|
|
|
|
|
+ log.info("jsoupVideoInfo 开始:status={}", status);
|
|
|
|
|
+
|
|
|
|
|
+ // 获取待抓取码列表
|
|
|
|
|
+ List<String> icodeList = videoInfoMapper.findIcodeByStatus(status);
|
|
|
|
|
+ if (icodeList.size() == 0) {
|
|
|
|
|
+ log.warn("status={}的icodeList为空", status);
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ // 获取javbus防屏蔽地址
|
|
|
|
|
+ List<String> javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
|
|
|
|
|
+ if (javbusUrlList.size() == 0) {
|
|
|
|
|
+ log.warn("javbusUrlList为空");
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 遍历
|
|
|
|
|
+ Document document;
|
|
|
|
|
+ VideoInfo videoInfo;
|
|
|
|
|
+ String javbusUrl;
|
|
|
|
|
+ int j = 0;
|
|
|
|
|
+ for (int i = 0; i < icodeList.size(); i++) {
|
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
|
|
+ String identificationCode = icodeList.get(i);
|
|
|
|
|
+
|
|
|
|
|
+ int retryCount = 0;
|
|
|
|
|
+ while (retryCount <= 3) {
|
|
|
|
|
+ javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size() - 0)));
|
|
|
|
|
+ try {
|
|
|
|
|
+ document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCode))
|
|
|
|
|
+ .timeout(50000)
|
|
|
|
|
+ //.proxy()
|
|
|
|
|
+ //.data()
|
|
|
|
|
+ .ignoreContentType(true)
|
|
|
|
|
+ .userAgent(getUserAgent())
|
|
|
|
|
+ .header("referer", "https://www.javbus.com/".concat(identificationCode))
|
|
|
|
|
+ .get();
|
|
|
|
|
+
|
|
|
|
|
+ videoInfo = new VideoInfo();
|
|
|
|
|
+ videoInfo.setIdentificationCode(identificationCode);
|
|
|
|
|
+ parseDocument4QiBing(document, identificationCode, videoInfo);
|
|
|
|
|
+ videoInfo.setStatus(2);
|
|
|
|
|
+ videoInfoMapper.updateJsoupInfoByCode(videoInfo);
|
|
|
|
|
+
|
|
|
|
|
+ j++;
|
|
|
|
|
+
|
|
|
|
|
+ log.info("jsoupVideoInfo success:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
|
|
|
|
|
+ break;
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ ++retryCount;
|
|
|
|
|
+
|
|
|
|
|
+ if (retryCount < 4) {
|
|
|
|
|
+ log.error("jsoupVideoInfo error重试:i={},retryCount={},time={},identificationCode={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, e);
|
|
|
|
|
+ } else if (retryCount == 4) {
|
|
|
|
|
+ videoInfoMapper.updateStatus(identificationCode, 3);
|
|
|
|
|
+ log.error("jsoupVideoInfo error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode, e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ log.info("jsoupVideoInfo 统计:total={},success={}", icodeList.size(), j);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
* 初始化骑兵数据
|
|
* 初始化骑兵数据
|
|
|
*/
|
|
*/
|
|
|
@Override
|
|
@Override
|
|
@@ -699,6 +771,47 @@ public class BgServiceImpl implements BgService {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ private void parseDocument4QiBing(Document document, String identificationCode, VideoInfo videoInfo) throws Exception {
|
|
|
|
|
+ Elements container = document.select("div.container");
|
|
|
|
|
+ if (container.size() == 0) {
|
|
|
|
|
+ throw new Exception("番号无效!");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ Elements pEles = container.select("div.info > p");
|
|
|
|
|
+ // 识别码
|
|
|
|
|
+ Element pEle = pEles.get(0);
|
|
|
|
|
+ String iCode = pEle.select("span[style]").first().text();
|
|
|
|
|
+ if (!identificationCode.equalsIgnoreCase(iCode)) {
|
|
|
|
|
+ throw new Exception("番号与站点不一致");
|
|
|
|
|
+ }
|
|
|
|
|
+ // 发行日期
|
|
|
|
|
+ pEle = pEles.get(1);
|
|
|
|
|
+ String issueDate = pEle.text().split(":")[1].replace("\"", "").trim();
|
|
|
|
|
+ videoInfo.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
|
|
|
|
|
+ // 长度
|
|
|
|
|
+ pEle = pEles.get(2);
|
|
|
|
|
+ String length = pEle.text().split(":")[1].replace("\"", "").trim();
|
|
|
|
|
+ videoInfo.setLength(length);
|
|
|
|
|
+ // 导演
|
|
|
|
|
+ Elements directorEles = container.select("div.info").select("p:contains(導演)");
|
|
|
|
|
+ if (directorEles.size() > 0) {
|
|
|
|
|
+ pEle = directorEles.first().select("a[href]").first();
|
|
|
|
|
+ videoInfo.setDirector(pEle.text());
|
|
|
|
|
+ }
|
|
|
|
|
+ // 制作商
|
|
|
|
|
+ Elements markerEles = container.select("div.info").select("p:contains(製作商)");
|
|
|
|
|
+ if (markerEles.size() > 0) {
|
|
|
|
|
+ pEle = markerEles.first().select("a[href]").first();
|
|
|
|
|
+ videoInfo.setMaker(pEle.text());
|
|
|
|
|
+ }
|
|
|
|
|
+ // 发行商
|
|
|
|
|
+ Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
|
|
|
|
|
+ if (issuerEles.size() > 0) {
|
|
|
|
|
+ pEle = issuerEles.first().select("a[href]").first();
|
|
|
|
|
+ videoInfo.setIssuer(pEle.text());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
/**
|
|
/**
|
|
|
* 保存文件到本地
|
|
* 保存文件到本地
|
|
|
*
|
|
*
|