|
|
@@ -515,6 +515,7 @@ public class BgServiceImpl implements BgService {
|
|
|
}
|
|
|
log.warn("jsoupVideoInfo 统计:total={},success={}", icodeList.size(), j);
|
|
|
}
|
|
|
+
|
|
|
@Async
|
|
|
@Override
|
|
|
public void jsoupVideoInfo4javdb(Integer status, String url, String identificationCodeP) {
|
|
|
@@ -910,15 +911,15 @@ public class BgServiceImpl implements BgService {
|
|
|
* @param identificationCode
|
|
|
*/
|
|
|
@Override
|
|
|
- public void single4IdentificationCode(String identificationCode) {
|
|
|
- List<String> icodePoolList = icodePoolMapper.findIcode();
|
|
|
-
|
|
|
+ public void single4IdentificationCode(String identificationCode, String url) {
|
|
|
identificationCode = identificationCode.trim().toUpperCase();
|
|
|
- if (icodePoolList.contains(identificationCode)) {
|
|
|
+
|
|
|
+ List<String> icodePoolList = icodePoolMapper.findIcodeByCode(identificationCode);
|
|
|
+ if (icodePoolList.size() > 0) {
|
|
|
throw new BusinessException(ResultCodeEnum.PARAM_DATA_EXIST_ERROR);
|
|
|
}
|
|
|
|
|
|
- icodePoolMapper.insert(identificationCode);
|
|
|
+ icodePoolMapper.insert(identificationCode, url);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
@@ -928,13 +929,12 @@ public class BgServiceImpl implements BgService {
|
|
|
*/
|
|
|
@Async
|
|
|
@Override
|
|
|
- public void jsoupIcodePool(Integer status, Integer isDel, Integer ignoreRetryCount) {
|
|
|
- log.warn("jsoupIcodePool 开始:status={},isDel={},ignoreRetryCount={}", status, isDel, ignoreRetryCount);
|
|
|
+ public String jsoupIcodePool(String website, Integer status, Integer isDel, Integer ignoreRetryCount) {
|
|
|
+ log.warn("jsoupIcodePool 开始:website={},status={},isDel={},ignoreRetryCount={}", website, status, isDel, ignoreRetryCount);
|
|
|
if (isDel == 1) {
|
|
|
videoInfoPoolMapper.deleteAll();
|
|
|
}
|
|
|
|
|
|
-
|
|
|
// 获取待抓取码列表
|
|
|
List<String> icodePoolList;
|
|
|
if (1 == ignoreRetryCount) {
|
|
|
@@ -945,13 +945,13 @@ public class BgServiceImpl implements BgService {
|
|
|
|
|
|
if (icodePoolList.size() == 0) {
|
|
|
log.warn("status={}的icodePoolList为空", status);
|
|
|
- return;
|
|
|
+ return "size:0";
|
|
|
}
|
|
|
// 获取javbus防屏蔽地址
|
|
|
List<String> javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
|
|
|
if (javbusUrlList.size() == 0) {
|
|
|
log.warn("javbusUrlList为空");
|
|
|
- return;
|
|
|
+ return "javbusUrlList为空";
|
|
|
}
|
|
|
// 获取主表所有识别码
|
|
|
List<String> allIcode = videoInfoMapper.findAllIcode();
|
|
|
@@ -966,15 +966,15 @@ public class BgServiceImpl implements BgService {
|
|
|
int j = 0;
|
|
|
for (int i = 0; i < icodePoolList.size(); i++) {
|
|
|
long start = System.currentTimeMillis();
|
|
|
- String identificationCode = icodePoolList.get(i);
|
|
|
+ String[] identificationCodeArr = icodePoolList.get(i).split("\\|");
|
|
|
|
|
|
IcodePool icodePool = new IcodePool();
|
|
|
- icodePool.setIdentificationCode(identificationCode);
|
|
|
- if (allIcode.contains(identificationCode)) {
|
|
|
+ icodePool.setIdentificationCode(identificationCodeArr[0]);
|
|
|
+ if (allIcode.contains(identificationCodeArr[0])) {
|
|
|
icodePool.setStatus(4);
|
|
|
icodePool.setRetryCount(0);
|
|
|
icodePoolMapper.updateStatus(icodePool);
|
|
|
- log.warn("jsoupIcodePool exists:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
|
|
|
+ log.warn("jsoupIcodePool exists:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCodeArr[0]);
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
@@ -982,17 +982,27 @@ public class BgServiceImpl implements BgService {
|
|
|
while (retryCount <= 3) {
|
|
|
javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size())));
|
|
|
try {
|
|
|
- document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCode))
|
|
|
- .timeout(50000)
|
|
|
- //.proxy()
|
|
|
- //.data()
|
|
|
- .ignoreContentType(true)
|
|
|
- .userAgent(getUserAgent())
|
|
|
- .header("referer", javbusUrl.concat("/").concat(identificationCode))
|
|
|
- .get();
|
|
|
-
|
|
|
videoInfoPool = new VideoInfoPool();
|
|
|
- long picTime = parseDocument(document, identificationCode, machiPath, videoInfoPool);
|
|
|
+ long picTime = 0;
|
|
|
+
|
|
|
+ if ("javbus".equalsIgnoreCase(website)) {
|
|
|
+ document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCodeArr[0]))
|
|
|
+ .timeout(50000)
|
|
|
+ //.proxy()
|
|
|
+ //.data()
|
|
|
+ .ignoreContentType(true)
|
|
|
+ .userAgent(getUserAgent())
|
|
|
+ .header("referer", javbusUrl.concat("/").concat(identificationCodeArr[0]))
|
|
|
+ .get();
|
|
|
+ picTime = parseDocument(document, identificationCodeArr[0], machiPath, videoInfoPool);
|
|
|
+ } else if ("javdb".equalsIgnoreCase(website) && StringUtils.isNotEmpty(identificationCodeArr[1])) {
|
|
|
+ Map<String, String> headerMap = new HashMap<>();
|
|
|
+ headerMap.put("referer", identificationCodeArr[1]);
|
|
|
+ beforeProxy();
|
|
|
+ document = JsoupUtil.requestDocument(identificationCodeArr[1], JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
|
|
|
+ picTime = parseDocument4Javdb(document, identificationCodeArr[0], machiPath, videoInfoPool);
|
|
|
+ }
|
|
|
+
|
|
|
if (videoInfoPool != null) {
|
|
|
icodePool.setStatus(2);
|
|
|
icodePool.setRetryCount(retryCount);
|
|
|
@@ -1002,24 +1012,25 @@ public class BgServiceImpl implements BgService {
|
|
|
j++;
|
|
|
}
|
|
|
|
|
|
- log.warn("jsoupIcodePool success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, identificationCode);
|
|
|
+ log.warn("jsoupIcodePool success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, identificationCodeArr[0]);
|
|
|
break;
|
|
|
} catch (Exception e) {
|
|
|
++retryCount;
|
|
|
|
|
|
if (retryCount < 4) {
|
|
|
- log.error("jsoupIcodePool error重试:i={},retryCount={},time={},identificationCode={},javbusUrl={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, javbusUrl, e);
|
|
|
+ log.error("jsoupIcodePool error重试:i={},retryCount={},time={},identificationCode={},javbusUrl={}", i, retryCount, System.currentTimeMillis() - start, identificationCodeArr[0], javbusUrl, e);
|
|
|
} else if (retryCount == 4) {
|
|
|
icodePool.setStatus(3);
|
|
|
icodePool.setRetryCount(retryCount - 1);
|
|
|
icodePool.setFailureCause(e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200));
|
|
|
icodePoolMapper.updateStatus(icodePool);
|
|
|
- log.error("jsoupIcodePool error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode, e);
|
|
|
+ log.error("jsoupIcodePool error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCodeArr[0], e);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
log.warn("jsoupIcodePool 统计:total={},success={}", icodePoolList.size(), j);
|
|
|
+ return "total=".concat(String.valueOf(icodePoolList.size())).concat(",success=").concat(String.valueOf(j));
|
|
|
}
|
|
|
|
|
|
private String getUserAgent() {
|
|
|
@@ -1150,6 +1161,99 @@ public class BgServiceImpl implements BgService {
|
|
|
return end - start;
|
|
|
}
|
|
|
|
|
|
+ private long parseDocument4Javdb(Document javdbCodeDocument, String identificationCode, String machiPath, VideoInfoPool videoInfoPool) throws Exception {
|
|
|
+ Elements container = javdbCodeDocument.select("section.section > div.container");
|
|
|
+ if (container.size() == 0) {
|
|
|
+ throw new BusinessException(30000, "番号无效!");
|
|
|
+ }
|
|
|
+
|
|
|
+ Elements videoDetail = container.select("div.video-detail");
|
|
|
+ // 名称
|
|
|
+ videoInfoPool.setName(videoDetail.select("h2.title").select("strong.current-title").text().trim());
|
|
|
+
|
|
|
+ Elements moviePanelInfos = videoDetail.select("nav.movie-panel-info");
|
|
|
+ Element pEle = moviePanelInfos.get(0);
|
|
|
+ // 识别码
|
|
|
+ String iCode = pEle.select("div:contains(番號)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
+ if (!identificationCode.equalsIgnoreCase(iCode)) {
|
|
|
+ throw new Exception("番号与站点不一致");
|
|
|
+ }
|
|
|
+ videoInfoPool.setIdentificationCode(iCode);
|
|
|
+ // 发行日期
|
|
|
+ String issueDate = pEle.select("div:contains(日期)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
+ videoInfoPool.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
|
|
|
+ // 长度
|
|
|
+ String length = pEle.select("div:contains(時長)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
+ videoInfoPool.setLength(length);
|
|
|
+ // 导演
|
|
|
+ Elements directorEles = pEle.select("div:contains(導演)").select("span.value");
|
|
|
+ if (directorEles.size() > 0) {
|
|
|
+ videoInfoPool.setDirector(directorEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
+ }
|
|
|
+ // 制作商
|
|
|
+ Elements markerEles = pEle.select("div:contains(片商)").select("span.value");
|
|
|
+ if (markerEles.size() > 0) {
|
|
|
+ videoInfoPool.setMaker(markerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
+ }
|
|
|
+ // 发行商
|
|
|
+ Elements issuerEles = pEle.select("div:contains(發行)").select("span.value");
|
|
|
+ if (issuerEles.size() > 0) {
|
|
|
+ videoInfoPool.setIssuer(issuerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
+ }
|
|
|
+ // 类别
|
|
|
+ Elements genresEles = pEle.select("div:contains(類別)").select("span.value");
|
|
|
+ if (genresEles.size() > 0) {
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
+ Elements ahrefEles = genresEles.first().select("a[href]");
|
|
|
+ for (Element ahrefEle : ahrefEles) {
|
|
|
+ sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
|
|
|
+ }
|
|
|
+ if (sb.length() > 0) {
|
|
|
+ sb = sb.deleteCharAt(sb.length() - 1);
|
|
|
+ }
|
|
|
+ videoInfoPool.setGenres(sb.toString());
|
|
|
+ }
|
|
|
+ // 演员
|
|
|
+ Elements castEles = pEle.select("div:contains(演員)").select("span.value");
|
|
|
+ if (castEles.size() > 0) {
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
+ Elements ahrefEles = castEles.first().select("a[href]");
|
|
|
+ for (Element ahrefEle : ahrefEles) {
|
|
|
+ sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
|
|
|
+ }
|
|
|
+ if (sb.length() > 0) {
|
|
|
+ sb = sb.deleteCharAt(sb.length() - 1);
|
|
|
+ }
|
|
|
+ videoInfoPool.setCast(sb.toString());
|
|
|
+ }
|
|
|
+ // 图片URL
|
|
|
+ String href = videoDetail.select("div.column-video-cover").select("a > img").first().attr("src");
|
|
|
+
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
+ Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
+
|
|
|
+ String fileName = issueDate.concat(" ").concat(videoInfoPool.getIdentificationCode()).concat(" ").concat(videoInfoPool.getName()).replace("?", "?");
|
|
|
+ byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
|
|
|
+ if (imgUrlBytes.length > 251) {
|
|
|
+ byte[] imgUrlDestBytes = new byte[251];
|
|
|
+ System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
|
|
|
+ fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
|
|
|
+ }
|
|
|
+ fileName = fileName.concat(".jpg");
|
|
|
+
|
|
|
+ String machiImgUrl = "码池/".concat(fileName);
|
|
|
+
|
|
|
+ saveFile(response.bodyStream(), machiPath.concat(machiImgUrl));
|
|
|
+ long end = System.currentTimeMillis();
|
|
|
+
|
|
|
+ videoInfoPool.setImgUrl(machiImgUrl);
|
|
|
+
|
|
|
+ videoInfoPool.setCreateTime(LocalDateTime.now());
|
|
|
+ videoInfoPool.setType(1);
|
|
|
+
|
|
|
+ return end - start;
|
|
|
+ }
|
|
|
+
|
|
|
private void parseDocument4Uncensored(Document document, String identificationCode, String machiPath, VideoInfoUncensored videoInfoUncensored) throws Exception {
|
|
|
Elements container = document.select("div.container");
|
|
|
if (container.size() == 0) {
|
|
|
@@ -1358,6 +1462,7 @@ public class BgServiceImpl implements BgService {
|
|
|
|
|
|
return picTime.toString();
|
|
|
}
|
|
|
+
|
|
|
private String parseDocument4QiBing4Javdb(Document javdbCodeDocument, String identificationCode, String qibingPath, VideoInfo videoInfo) throws Exception {
|
|
|
Elements container = javdbCodeDocument.select("section.section > div.container");
|
|
|
if (container.size() == 0) {
|