|
@@ -87,6 +87,16 @@ public class BgServiceImpl implements BgService {
|
|
|
|
|
|
|
|
private Proxy proxy = null;
|
|
private Proxy proxy = null;
|
|
|
|
|
|
|
|
|
|
+ public void beforeProxy() {
|
|
|
|
|
+ if (null == proxy) {
|
|
|
|
|
+ if ("dev".equals(env)) {
|
|
|
|
|
+ proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress("127.0.0.1", 1080));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ proxy = Proxy.NO_PROXY;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
/**
|
|
/**
|
|
|
* findDicCode
|
|
* findDicCode
|
|
|
*
|
|
*
|
|
@@ -421,7 +431,7 @@ public class BgServiceImpl implements BgService {
|
|
|
*/
|
|
*/
|
|
|
@Async
|
|
@Async
|
|
|
@Override
|
|
@Override
|
|
|
- public void jsoupVideoInfo(Integer status, String website, String url, String identificationCodeP) {
|
|
|
|
|
|
|
+ public void jsoupVideoInfo(Integer status, String url, String identificationCodeP) {
|
|
|
log.warn("jsoupVideoInfo 开始:status={}", status);
|
|
log.warn("jsoupVideoInfo 开始:status={}", status);
|
|
|
|
|
|
|
|
// 获取待抓取码列表
|
|
// 获取待抓取码列表
|
|
@@ -448,13 +458,7 @@ public class BgServiceImpl implements BgService {
|
|
|
javbusUrlList.add(url);
|
|
javbusUrlList.add(url);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- if (null == proxy) {
|
|
|
|
|
- if ("dev".equals(env)) {
|
|
|
|
|
- proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1080));
|
|
|
|
|
- } else {
|
|
|
|
|
- proxy = Proxy.NO_PROXY;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ beforeProxy();
|
|
|
|
|
|
|
|
// 获取图片保存路径
|
|
// 获取图片保存路径
|
|
|
String qibingPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && env.equals(x.getEnv()) && "apics_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
|
|
String qibingPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && env.equals(x.getEnv()) && "apics_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
|
|
@@ -511,6 +515,72 @@ public class BgServiceImpl implements BgService {
|
|
|
}
|
|
}
|
|
|
log.warn("jsoupVideoInfo 统计:total={},success={}", icodeList.size(), j);
|
|
log.warn("jsoupVideoInfo 统计:total={},success={}", icodeList.size(), j);
|
|
|
}
|
|
}
|
|
|
|
|
+ @Async
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public void jsoupVideoInfo4javdb(Integer status, String url, String identificationCodeP) {
|
|
|
|
|
+ log.warn("jsoupVideoInfo4javdb 开始:status={}", status);
|
|
|
|
|
+
|
|
|
|
|
+ // 获取待抓取码列表
|
|
|
|
|
+ List<String> icodeList = new ArrayList<>();
|
|
|
|
|
+ if (StringUtils.isEmpty(identificationCodeP)) {
|
|
|
|
|
+ icodeList = videoInfoMapper.findIcodeByStatus(status);
|
|
|
|
|
+ if (icodeList.size() == 0) {
|
|
|
|
|
+ log.warn("status={}的icodeList为空", status);
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ icodeList.add(identificationCodeP);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ beforeProxy();
|
|
|
|
|
+
|
|
|
|
|
+ // 获取图片保存路径
|
|
|
|
|
+ String qibingPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && env.equals(x.getEnv()) && "apics_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
|
|
|
|
|
+
|
|
|
|
|
+ // 遍历
|
|
|
|
|
+ Document document;
|
|
|
|
|
+ VideoInfo videoInfo;
|
|
|
|
|
+ int j = 0;
|
|
|
|
|
+ for (int i = 0; i < icodeList.size(); i++) {
|
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
|
|
+ String identificationCode = icodeList.get(i);
|
|
|
|
|
+
|
|
|
|
|
+ int retryCount = 0;
|
|
|
|
|
+ while (retryCount <= 3) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ Map<String, String> headerMap = new HashMap<>();
|
|
|
|
|
+ headerMap.put("referer", url);
|
|
|
|
|
+ document = JsoupUtil.requestDocument(url, JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
|
|
|
|
|
+
|
|
|
|
|
+ videoInfo = new VideoInfo();
|
|
|
|
|
+ videoInfo.setIdentificationCode(identificationCode);
|
|
|
|
|
+ String picTime = parseDocument4QiBing4Javdb(document, identificationCode, qibingPath, videoInfo);
|
|
|
|
|
+ videoInfo.setStatus(2);
|
|
|
|
|
+
|
|
|
|
|
+ // 获取男优
|
|
|
|
|
+ getMaleCast(videoInfo);
|
|
|
|
|
+
|
|
|
|
|
+ videoInfoMapper.updateJsoupInfoByCode(videoInfo);
|
|
|
|
|
+ videoInfoOtherMapper.insertOrUpdate(identificationCode, 2);
|
|
|
|
|
+
|
|
|
|
|
+ j++;
|
|
|
|
|
+
|
|
|
|
|
+ log.warn("jsoupVideoInfo4javdb success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, identificationCode);
|
|
|
|
|
+ break;
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ ++retryCount;
|
|
|
|
|
+
|
|
|
|
|
+ if (retryCount < 4) {
|
|
|
|
|
+ log.error("jsoupVideoInfo4javdb error重试:i={},retryCount={},time={},identificationCode={},javbusUrl={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, url, e);
|
|
|
|
|
+ } else if (retryCount == 4) {
|
|
|
|
|
+ videoInfoMapper.updateStatus(identificationCode, 3);
|
|
|
|
|
+ log.error("jsoupVideoInfo4javdb error:i={},time={},identificationCode={},javbusUrl={}", i, System.currentTimeMillis() - start, identificationCode, url, e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ log.warn("jsoupVideoInfo4javdb 统计:total={},success={}", icodeList.size(), j);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
@Override
|
|
@Override
|
|
|
public void getMaleCast(VideoInfo videoInfo) {
|
|
public void getMaleCast(VideoInfo videoInfo) {
|
|
@@ -1288,6 +1358,136 @@ public class BgServiceImpl implements BgService {
|
|
|
|
|
|
|
|
return picTime.toString();
|
|
return picTime.toString();
|
|
|
}
|
|
}
|
|
|
|
|
+ private String parseDocument4QiBing4Javdb(Document javdbCodeDocument, String identificationCode, String qibingPath, VideoInfo videoInfo) throws Exception {
|
|
|
|
|
+ Elements container = javdbCodeDocument.select("section.section > div.container");
|
|
|
|
|
+ if (container.size() == 0) {
|
|
|
|
|
+ throw new BusinessException(30000, "番号无效!");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ Elements videoDetail = container.select("div.video-detail");
|
|
|
|
|
+ // 名称
|
|
|
|
|
+ videoInfo.setName(videoDetail.select("h2.title").select("strong.current-title").text().trim());
|
|
|
|
|
+
|
|
|
|
|
+ Elements moviePanelInfos = videoDetail.select("nav.movie-panel-info");
|
|
|
|
|
+ Element pEle = moviePanelInfos.get(0);
|
|
|
|
|
+ // 识别码
|
|
|
|
|
+ String iCode = pEle.select("div:contains(番號)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
|
|
+ if (!identificationCode.equalsIgnoreCase(iCode)) {
|
|
|
|
|
+ throw new Exception("番号与站点不一致");
|
|
|
|
|
+ }
|
|
|
|
|
+ // 发行日期
|
|
|
|
|
+ String issueDate = pEle.select("div:contains(日期)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
|
|
+ videoInfo.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
|
|
|
|
|
+ // 长度
|
|
|
|
|
+ String length = pEle.select("div:contains(時長)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
|
|
+ videoInfo.setLength(length);
|
|
|
|
|
+ // 导演
|
|
|
|
|
+ Elements directorEles = pEle.select("div:contains(導演)").select("span.value");
|
|
|
|
|
+ if (directorEles.size() > 0) {
|
|
|
|
|
+ videoInfo.setDirector(directorEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
|
|
+ }
|
|
|
|
|
+ // 制作商
|
|
|
|
|
+ Elements markerEles = pEle.select("div:contains(片商)").select("span.value");
|
|
|
|
|
+ if (markerEles.size() > 0) {
|
|
|
|
|
+ videoInfo.setMaker(markerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
|
|
+ }
|
|
|
|
|
+ // 发行商
|
|
|
|
|
+ Elements issuerEles = pEle.select("div:contains(發行)").select("span.value");
|
|
|
|
|
+ if (issuerEles.size() > 0) {
|
|
|
|
|
+ videoInfo.setIssuer(issuerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
|
|
+ }
|
|
|
|
|
+ // 类别
|
|
|
|
|
+ Elements genresEles = pEle.select("div:contains(類別)").select("span.value");
|
|
|
|
|
+ if (genresEles.size() > 0) {
|
|
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
|
|
+ Elements ahrefEles = genresEles.first().select("a[href]");
|
|
|
|
|
+ for (Element ahrefEle : ahrefEles) {
|
|
|
|
|
+ sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (sb.length() > 0) {
|
|
|
|
|
+ sb = sb.deleteCharAt(sb.length() - 1);
|
|
|
|
|
+ }
|
|
|
|
|
+ videoInfo.setGenres(sb.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ // 演员
|
|
|
|
|
+ Elements castEles = pEle.select("div:contains(演員)").select("span.value");
|
|
|
|
|
+ if (castEles.size() > 0) {
|
|
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
|
|
+ Elements ahrefEles = castEles.first().select("a[href]");
|
|
|
|
|
+ for (Element ahrefEle : ahrefEles) {
|
|
|
|
|
+ sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (sb.length() > 0) {
|
|
|
|
|
+ sb = sb.deleteCharAt(sb.length() - 1);
|
|
|
|
|
+ }
|
|
|
|
|
+ videoInfo.setCast(sb.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 图片 骑兵步兵
|
|
|
|
|
+ String qibings = "骑兵步兵/".concat(iCode);
|
|
|
|
|
+ StringBuffer picTime = new StringBuffer("{cover:");
|
|
|
|
|
+
|
|
|
|
|
+ // 图片URL bigImage
|
|
|
|
|
+ String qibingCover = qibings.concat("/cover/");
|
|
|
|
|
+
|
|
|
|
|
+ String fileName = issueDate.concat(" ").concat(videoInfo.getIdentificationCode()).concat(" ").concat(videoInfo.getName()).replace("?", "?");
|
|
|
|
|
+ byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
|
|
|
|
|
+ if (imgUrlBytes.length > 251) {
|
|
|
|
|
+ byte[] imgUrlDestBytes = new byte[251];
|
|
|
|
|
+ System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
|
|
|
|
|
+ fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
|
|
|
|
|
+ }
|
|
|
|
|
+ fileName = fileName.concat(".jpg");
|
|
|
|
|
+
|
|
|
|
|
+ if (!new File(qibingPath.concat(qibingCover), fileName).exists()) {
|
|
|
|
|
+ Elements videoMetaPanel = videoDetail.select("div.column-video-cover");
|
|
|
|
|
+ String href = videoMetaPanel.select("a > img").first().attr("src");
|
|
|
|
|
+
|
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
|
|
+ Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
|
|
+
|
|
|
|
|
+ saveFile2(response.bodyStream(), qibingPath.concat(qibingCover), fileName);
|
|
|
|
|
+ picTime.append(System.currentTimeMillis() - start).append(",");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ videoInfo.setImgUrl(qibingCover.concat(fileName));
|
|
|
|
|
+
|
|
|
|
|
+ // 图片URL img_gf
|
|
|
|
|
+ String qibingImgGF = qibings.concat("/img_gf/");
|
|
|
|
|
+
|
|
|
|
|
+ File qibingImgGFFile = new File(qibingPath.concat(qibingImgGF));
|
|
|
|
|
+ if (!qibingImgGFFile.exists() || qibingImgGFFile.listFiles().length == 0) {
|
|
|
|
|
+ Elements sampleBoxEles = videoDetail.select("div.preview-images").select("a.tile-item");
|
|
|
|
|
+ long start2 = System.currentTimeMillis();
|
|
|
|
|
+ if (sampleBoxEles.size() > 0) {
|
|
|
|
|
+ Connection.Response responseImg;
|
|
|
|
|
+
|
|
|
|
|
+ for (Element sampleBoxEle : sampleBoxEles) {
|
|
|
|
|
+ String sampleBoxHref = sampleBoxEle.attr("href");
|
|
|
|
|
+ if (!sampleBoxHref.contains("http")) {
|
|
|
|
|
+ sampleBoxHref = sampleBoxEle.attr("abs:href");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ try {
|
|
|
|
|
+ responseImg = Jsoup.connect(sampleBoxHref).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
|
|
+ } catch (HttpStatusException e) {
|
|
|
|
|
+ sampleBoxHref = sampleBoxEle.select("img").attr("src");
|
|
|
|
|
+ if (!sampleBoxHref.contains("http")) {
|
|
|
|
|
+ sampleBoxHref = sampleBoxEle.select("img").attr("abs:src");
|
|
|
|
|
+ }
|
|
|
|
|
+ responseImg = Jsoup.connect(sampleBoxHref).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
|
|
+ }
|
|
|
|
|
+ String sampleBoxFileName = sampleBoxHref.substring(sampleBoxHref.lastIndexOf("/") + 1);
|
|
|
|
|
+ saveFile2(responseImg.bodyStream(), qibingPath.concat(qibingImgGF), sampleBoxFileName);
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ log.error("jsoupVideoInfo img_gf null,identificationCode={}", identificationCode);
|
|
|
|
|
+ }
|
|
|
|
|
+ picTime.append("img_gf:").append(System.currentTimeMillis() - start2).append("}");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return picTime.toString();
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
|
* 保存文件到本地2
|
|
* 保存文件到本地2
|