|
|
@@ -583,6 +583,256 @@ public class BgServiceImpl implements BgService {
|
|
|
log.warn("jsoupVideoInfo4javdb 统计:total={},success={}", icodeList.size(), j);
|
|
|
}
|
|
|
|
|
|
+ @Async
|
|
|
+ @Override
|
|
|
+ public void jsoupVideoInfoInfantry4javdb(String type, Integer status, String url, String identificationCodeP) {
|
|
|
+ log.warn("jsoupVideoInfoInfantry4javdb 开始:status={}", status);
|
|
|
+
|
|
|
+ // 获取待抓取码列表
|
|
|
+ List<VideoInfoInfantry> videoInfoInfantryList;
|
|
|
+ if (StringUtils.isEmpty(identificationCodeP)) {
|
|
|
+ videoInfoInfantryList = videoInfoInfantryMapper.findInfoByTypeAndStatus(type, status);
|
|
|
+ if (videoInfoInfantryList.size() == 0) {
|
|
|
+ log.warn("status={}的videoInfoInfantryList为空", status);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ videoInfoInfantryList = videoInfoInfantryMapper.findByCodeAndType(identificationCodeP, type, status, null);
|
|
|
+ videoInfoInfantryList.forEach(e -> {
|
|
|
+ e.setJavdbUrl(url);
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ List<DicCode> dicCodeList = dicCodeMapper.findAll();
|
|
|
+ Map<String, String> javdbConstantMap = dicCodeList.stream()
|
|
|
+ .filter(x -> x.getType() != null && 2 == x.getType() && x.getEnv().contains(env))
|
|
|
+ .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
|
+
|
|
|
+
|
|
|
+ beforeProxy();
|
|
|
+
|
|
|
+ // 获取图片保存路径
|
|
|
+ String infantryPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && env.equals(x.getEnv()) && "apics_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
|
|
|
+
|
|
|
+ // 遍历
|
|
|
+ Document document;
|
|
|
+ Document javdbSearchDocument;
|
|
|
+ int j = 0;
|
|
|
+ for (int i = 0; i < videoInfoInfantryList.size(); i++) {
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
+ VideoInfoInfantry videoInfoInfantry = videoInfoInfantryList.get(i);
|
|
|
+
|
|
|
+ int retryCount = 0;
|
|
|
+ while (retryCount <= 3) {
|
|
|
+ try {
|
|
|
+ Thread.sleep(3000);
|
|
|
+
|
|
|
+ String picTime;
|
|
|
+ Elements itembSelects;
|
|
|
+ if (StringUtils.isNotEmpty(videoInfoInfantry.getJavdbUrl())) {
|
|
|
+ Map<String, String> headerMap = new HashMap<>();
|
|
|
+ headerMap.put("referer", url);
|
|
|
+ document = JsoupUtil.requestDocument(url, JsoupUtil.HTTP_GET, proxy, null, headerMap, null);
|
|
|
+ } else {
|
|
|
+ String searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(videoInfoInfantry.getIdentificationCode()).concat("&f=all");
|
|
|
+ Map<String, String> header3Map = new HashMap<>();
|
|
|
+ header3Map.put("referer", searchUrl);
|
|
|
+
|
|
|
+ javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
+
|
|
|
+ itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
|
|
|
+ if (itembSelects.size() == 0) {
|
|
|
+ String newName = videoInfoInfantry.getName();
|
|
|
+ searchUrl = javdbConstantMap.get("javdb").concat("search?q=").concat(newName).concat("&f=all");
|
|
|
+ javdbSearchDocument = JsoupUtil.requestDocument(searchUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
+ itembSelects = javdbSearchDocument.select("div.movie-list").select("div.item");
|
|
|
+ }
|
|
|
+
|
|
|
+ if (itembSelects.size() == 0) {
|
|
|
+ throw new BusinessException(30000, "javdb search result null");
|
|
|
+ }
|
|
|
+
|
|
|
+ String title;
|
|
|
+ String code;
|
|
|
+ String codeUrl = null;
|
|
|
+ for (Element itembSelect : itembSelects) {
|
|
|
+ title = itembSelect.select("a.box").get(0).attr("title");
|
|
|
+ code = itembSelect.select("a.box").get(0).select("div.video-title").select("strong").text();
|
|
|
+ if (code.equalsIgnoreCase(videoInfoInfantry.getIdentificationCode())) {
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ String newName = videoInfoInfantry.getName().replace("●", "さ");
|
|
|
+ if (title.contains(newName)) {
|
|
|
+ codeUrl = itembSelect.select("a.box").get(0).attr("abs:href");
|
|
|
+ videoInfoInfantry.setName(newName);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (StringUtils.isEmpty(codeUrl)) {
|
|
|
+ throw new BusinessException(30000, "javdb search result mismatch");
|
|
|
+ }
|
|
|
+
|
|
|
+ document = JsoupUtil.requestDocument(codeUrl, JsoupUtil.HTTP_GET, proxy, null, header3Map, null);
|
|
|
+ videoInfoInfantry.setJavdbUrl(codeUrl);
|
|
|
+ }
|
|
|
+
|
|
|
+ picTime = parseDocument4Infantry4Javdb(document, infantryPath, videoInfoInfantry);
|
|
|
+ videoInfoInfantry.setStatus(2);
|
|
|
+ videoInfoInfantryMapper.updateJsoupInfoById(videoInfoInfantry);
|
|
|
+ videoInfoOtherMapper.insertOrUpdate(videoInfoInfantry.getIdentificationCode(), 2);
|
|
|
+
|
|
|
+ j++;
|
|
|
+
|
|
|
+ log.warn("jsoupVideoInfoInfantry4javdb success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, videoInfoInfantry.getIdentificationCode());
|
|
|
+ break;
|
|
|
+ } catch (Exception e) {
|
|
|
+ ++retryCount;
|
|
|
+
|
|
|
+ if (retryCount < 4) {
|
|
|
+ log.error("jsoupVideoInfoInfantry4javdb error重试:i={},retryCount={},time={},identificationCode={},javbusUrl={}", i, retryCount, System.currentTimeMillis() - start, videoInfoInfantry.getIdentificationCode(), url, e);
|
|
|
+ } else if (retryCount == 4) {
|
|
|
+ videoInfoInfantryMapper.updateStatus(videoInfoInfantry.getIdentificationCode(), 3);
|
|
|
+ log.error("jsoupVideoInfoInfantry4javdb error:i={},time={},identificationCode={},javbusUrl={}", i, System.currentTimeMillis() - start, videoInfoInfantry.getIdentificationCode(), url, e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ log.warn("jsoupVideoInfoInfantry4javdb 统计:total={},success={}", videoInfoInfantryList.size(), j);
|
|
|
+ }
|
|
|
+
|
|
|
+ private String parseDocument4Infantry4Javdb(Document document, String infantryPath, VideoInfoInfantry videoInfoInfantry) throws Exception {
|
|
|
+ Elements container = document.select("section.section > div.container");
|
|
|
+ if (container.size() == 0) {
|
|
|
+ throw new BusinessException(30000, "番号无效!");
|
|
|
+ }
|
|
|
+
|
|
|
+ Elements videoDetail = container.select("div.video-detail");
|
|
|
+ // 名称
|
|
|
+ videoInfoInfantry.setName(videoDetail.select("h2.title").select("strong.current-title").text().trim());
|
|
|
+
|
|
|
+ Elements moviePanelInfos = videoDetail.select("nav.movie-panel-info");
|
|
|
+ Element pEle = moviePanelInfos.get(0);
|
|
|
+ // 识别码
|
|
|
+ String iCode = pEle.select("div:contains(番號)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
+ if (!videoInfoInfantry.getIdentificationCode().equalsIgnoreCase(iCode)) {
|
|
|
+ throw new Exception("番号与站点不一致");
|
|
|
+ }
|
|
|
+ // 发行日期
|
|
|
+ String issueDate = pEle.select("div:contains(日期)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
+ videoInfoInfantry.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
|
|
|
+ // 长度
|
|
|
+ String length = pEle.select("div:contains(時長)").select("span.value").first().text().replace(" ", "").replaceAll("\\s+", "");
|
|
|
+ videoInfoInfantry.setLength(length);
|
|
|
+ // 导演
|
|
|
+ Elements directorEles = pEle.select("div:contains(導演)").select("span.value");
|
|
|
+ if (directorEles.size() > 0) {
|
|
|
+ videoInfoInfantry.setDirector(directorEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
+ }
|
|
|
+ // 制作商
|
|
|
+ Elements markerEles = pEle.select("div:contains(片商)").select("span.value");
|
|
|
+ if (markerEles.size() > 0) {
|
|
|
+ videoInfoInfantry.setMaker(markerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
+ }
|
|
|
+ // 发行商
|
|
|
+ Elements issuerEles = pEle.select("div:contains(發行)").select("span.value");
|
|
|
+ if (issuerEles.size() > 0) {
|
|
|
+ videoInfoInfantry.setIssuer(issuerEles.first().select("a[href]").first().text().replace(" ", "").replaceAll("\\s+", ""));
|
|
|
+ }
|
|
|
+ // 类别
|
|
|
+ Elements genresEles = pEle.select("div:contains(類別)").select("span.value");
|
|
|
+ if (genresEles.size() > 0) {
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
+ Elements ahrefEles = genresEles.first().select("a[href]");
|
|
|
+ for (Element ahrefEle : ahrefEles) {
|
|
|
+ sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
|
|
|
+ }
|
|
|
+ if (sb.length() > 0) {
|
|
|
+ sb = sb.deleteCharAt(sb.length() - 1);
|
|
|
+ }
|
|
|
+ videoInfoInfantry.setGenres(sb.toString());
|
|
|
+ }
|
|
|
+ // 演员
|
|
|
+ Elements castEles = pEle.select("div:contains(演員)").select("span.value");
|
|
|
+ if (castEles.size() > 0) {
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
+ Elements ahrefEles = castEles.first().select("a[href]");
|
|
|
+ for (Element ahrefEle : ahrefEles) {
|
|
|
+ sb.append(ahrefEle.text().replace(" ", "").replaceAll("\\s+", "")).append(",");
|
|
|
+ }
|
|
|
+ if (sb.length() > 0) {
|
|
|
+ sb = sb.deleteCharAt(sb.length() - 1);
|
|
|
+ }
|
|
|
+ videoInfoInfantry.setCast(sb.toString());
|
|
|
+ }
|
|
|
+
|
|
|
+ // 图片 步兵
|
|
|
+ String bubings = "步兵/".concat(videoInfoInfantry.getType()).concat("/").concat(iCode);
|
|
|
+ StringBuffer picTime = new StringBuffer("{cover:");
|
|
|
+
|
|
|
+ // 图片URL bigImage
|
|
|
+ String bubingCover = bubings.concat("/cover/");
|
|
|
+
|
|
|
+ String fileName = issueDate.concat(" ").concat(videoInfoInfantry.getIdentificationCode()).concat(" ").concat(videoInfoInfantry.getName()).replace("?", "?");
|
|
|
+ byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
|
|
|
+ if (imgUrlBytes.length > 251) {
|
|
|
+ byte[] imgUrlDestBytes = new byte[251];
|
|
|
+ System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
|
|
|
+ fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
|
|
|
+ }
|
|
|
+ fileName = fileName.concat(".jpg");
|
|
|
+
|
|
|
+ if (!new File(infantryPath.concat(bubingCover), fileName).exists()) {
|
|
|
+ Elements videoMetaPanel = videoDetail.select("div.column-video-cover");
|
|
|
+ String href = videoMetaPanel.select("a > img").first().attr("src");
|
|
|
+
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
+ Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
+
|
|
|
+ saveFile2(response.bodyStream(), infantryPath.concat(bubingCover), fileName);
|
|
|
+ picTime.append(System.currentTimeMillis() - start).append(",");
|
|
|
+ }
|
|
|
+
|
|
|
+ videoInfoInfantry.setImgUrl(iCode.concat("/cover/").concat(fileName));
|
|
|
+
|
|
|
+ // 图片URL img_gf
|
|
|
+ String bubingImgGF = bubings.concat("/img_gf/");
|
|
|
+
|
|
|
+ File bubingImgGFFile = new File(infantryPath.concat(bubingImgGF));
|
|
|
+ if (!bubingImgGFFile.exists() || bubingImgGFFile.listFiles().length == 0) {
|
|
|
+ Elements sampleBoxEles = videoDetail.select("div.preview-images").select("a.tile-item");
|
|
|
+ long start2 = System.currentTimeMillis();
|
|
|
+ if (sampleBoxEles.size() > 0) {
|
|
|
+ Connection.Response responseImg;
|
|
|
+
|
|
|
+ for (Element sampleBoxEle : sampleBoxEles) {
|
|
|
+ String sampleBoxHref = sampleBoxEle.attr("href");
|
|
|
+ if (!sampleBoxHref.contains("http")) {
|
|
|
+ sampleBoxHref = sampleBoxEle.attr("abs:href");
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ responseImg = Jsoup.connect(sampleBoxHref).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
+ } catch (HttpStatusException e) {
|
|
|
+ sampleBoxHref = sampleBoxEle.select("img").attr("src");
|
|
|
+ if (!sampleBoxHref.contains("http")) {
|
|
|
+ sampleBoxHref = sampleBoxEle.select("img").attr("abs:src");
|
|
|
+ }
|
|
|
+ responseImg = Jsoup.connect(sampleBoxHref).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
+ }
|
|
|
+ String sampleBoxFileName = sampleBoxHref.substring(sampleBoxHref.lastIndexOf("/") + 1);
|
|
|
+ saveFile2(responseImg.bodyStream(), infantryPath.concat(bubingImgGF), sampleBoxFileName);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ log.error("jsoupVideoInfoInfantry img_gf null,identificationCode={}", videoInfoInfantry.getIdentificationCode());
|
|
|
+ }
|
|
|
+ picTime.append("img_gf:").append(System.currentTimeMillis() - start2).append("}");
|
|
|
+ }
|
|
|
+
|
|
|
+ return picTime.toString();
|
|
|
+ }
|
|
|
+
|
|
|
@Override
|
|
|
public void getMaleCast(VideoInfo videoInfo) {
|
|
|
Document document;
|