| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984 |
- package top.lvzhiqiang.service.impl;
- import lombok.extern.slf4j.Slf4j;
- import org.jsoup.Connection;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- import org.springframework.scheduling.annotation.Async;
- import org.springframework.stereotype.Service;
- import org.springframework.transaction.annotation.Propagation;
- import org.springframework.transaction.annotation.Transactional;
- import top.lvzhiqiang.config.WebAppConfig;
- import top.lvzhiqiang.dto.JavAllInfo;
- import top.lvzhiqiang.dto.JavAllInfo4Uncensored;
- import top.lvzhiqiang.entity.*;
- import top.lvzhiqiang.enumeration.ResultCodeEnum;
- import top.lvzhiqiang.exception.BusinessException;
- import top.lvzhiqiang.mapper.*;
- import top.lvzhiqiang.service.BgService;
- import top.lvzhiqiang.util.DateUtils;
- import top.lvzhiqiang.util.StringUtils;
- import javax.annotation.Resource;
- import java.io.*;
- import java.lang.reflect.Field;
- import java.math.BigDecimal;
- import java.nio.charset.StandardCharsets;
- import java.time.Instant;
- import java.time.LocalDate;
- import java.time.LocalDateTime;
- import java.time.ZoneOffset;
- import java.util.*;
- import java.util.stream.Collectors;
- /**
- * Bg ServiceImpl
- *
- * @author lvzhiqiang
- * 2022/4/16 16:10
- */
- @Service
- @Slf4j
- public class BgServiceImpl implements BgService {
- @Resource
- private VideoGenresMapper videoGenresMapper;
- @Resource
- private VideoCastMapper videoCastMapper;
- @Resource
- private VideoInfoCastMapper videoInfoCastMapper;
- @Resource
- private VideoInfoGenresMapper videoInfoGenresMapper;
- @Resource
- private VideoInfoMapper videoInfoMapper;
- @Resource
- private IcodePoolMapper icodePoolMapper;
- @Resource
- private VideoInfoPoolMapper videoInfoPoolMapper;
- @Resource
- private VideoSitePoolMapper videoSitePoolMapper;
- @Resource
- private DicCodeMapper dicCodeMapper;
- @Resource
- private VideoInfoOtherMapper videoInfoOtherMapper;
- @Resource
- private VideoInfoUncensoredMapper videoInfoUncensoredMapper;
- /**
- * findDicCode
- *
- * @author lvzhiqiang
- * 2022/5/3 17:37
- */
- @Override
- public String findDicCode(String codeDesc, String order) throws IllegalAccessException {
- List<DicCode> dicCodeList = dicCodeMapper.findByCodeDesc(codeDesc, order);
- StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(dicCodeList.size())).concat("<br/>"));
- for (DicCode dicCode : dicCodeList) {
- sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>key</th><th>value</th></tr>");
- Field[] fields = dicCode.getClass().getDeclaredFields();
- for (Field field : fields) {
- field.setAccessible(true);
- sb.append("<tr>");
- sb.append("<td>").append(field.getName()).append("</td>");
- sb.append("<td>").append(field.get(dicCode)).append("</td>");
- sb.append("</tr>");
- }
- sb.append("</table>");
- }
- return sb.toString();
- }
- /**
- * findIcodePool
- *
- * @author lvzhiqiang
- * 2022/5/3 17:37
- */
- @Override
- public String findIcodePool(String identificationCode, Integer status, String order) throws IllegalAccessException {
- List<IcodePool> icodePoolList = icodePoolMapper.findByCodeAndStatus(identificationCode, status, order);
- StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(icodePoolList.size())).concat("<br/>"));
- for (IcodePool icodePool : icodePoolList) {
- sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>key</th><th>value</th></tr>");
- Field[] fields = icodePool.getClass().getDeclaredFields();
- for (Field field : fields) {
- field.setAccessible(true);
- sb.append("<tr>");
- sb.append("<td>").append(field.getName()).append("</td>");
- sb.append("<td>").append(field.get(icodePool)).append("</td>");
- sb.append("</tr>");
- }
- sb.append("</table>");
- }
- return sb.toString();
- }
- /**
- * findVideoSitePool
- *
- * @author lvzhiqiang
- * 2022/5/3 17:37
- */
- @Override
- public String findVideoSitePool(String url, String order) throws IllegalAccessException {
- List<VideoSitePool> videoSitePoolList = videoSitePoolMapper.findByUrl(url, order);
- StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(videoSitePoolList.size())).concat("<br/>"));
- for (VideoSitePool videoSitePool : videoSitePoolList) {
- sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>key</th><th>value</th></tr>");
- Field[] fields = videoSitePool.getClass().getDeclaredFields();
- for (Field field : fields) {
- field.setAccessible(true);
- sb.append("<tr>");
- sb.append("<td>").append(field.getName()).append("</td>");
- sb.append("<td>").append(field.get(videoSitePool)).append("</td>");
- sb.append("</tr>");
- }
- sb.append("</table>");
- }
- return sb.toString();
- }
- /**
- * findVideoInfoPool
- *
- * @author lvzhiqiang
- * 2022/5/3 17:37
- */
- @Override
- public String findVideoInfoPool(String identificationCode, Integer type, String order, String crudT) throws IllegalAccessException {
- if ("2".equals(crudT)) {
- //更新
- if (StringUtils.isEmpty(identificationCode) || null == type) {
- return "identificationCode和type不能为空";
- }
- videoInfoPoolMapper.updateTypeByCode(identificationCode, type);
- return "success";
- }
- if ("3".equals(crudT)) {
- //删除
- if (StringUtils.isEmpty(identificationCode)) {
- return "dentificationCode不能为空";
- }
- videoInfoPoolMapper.delByCode(identificationCode);
- return "success";
- }
- List<VideoInfoPool> videoInfoPoolList = videoInfoPoolMapper.findByCodeAndType(identificationCode, type, order);
- StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(videoInfoPoolList.size())).concat("<br/>"));
- for (VideoInfoPool videoInfoPool : videoInfoPoolList) {
- sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>key</th><th>value</th></tr>");
- Field[] fields = videoInfoPool.getClass().getDeclaredFields();
- for (Field field : fields) {
- field.setAccessible(true);
- sb.append("<tr>");
- sb.append("<td>").append(field.getName()).append("</td>");
- sb.append("<td>").append(field.get(videoInfoPool)).append("</td>");
- sb.append("</tr>");
- }
- sb.append("</table>");
- }
- return sb.toString();
- }
- /**
- * findVideoInfo
- *
- * @author lvzhiqiang
- * 2022/5/3 17:37
- */
- @Override
- public String findVideoInfo(String identificationCode, Integer type, String order, String crudT) throws IllegalAccessException {
- if ("3".equals(crudT)) {
- //删除
- if (StringUtils.isEmpty(identificationCode)) {
- return "dentificationCode不能为空";
- }
- videoInfoMapper.delByCode(identificationCode);
- return "success";
- }
- List<VideoInfo> videoInfoList = videoInfoMapper.findByCodeAndType(identificationCode, type, order);
- StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(videoInfoList.size())).concat("<br/>"));
- for (VideoInfo videoInfo : videoInfoList) {
- sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>key</th><th>value</th></tr>");
- Field[] fields = videoInfo.getClass().getDeclaredFields();
- for (Field field : fields) {
- field.setAccessible(true);
- sb.append("<tr>");
- sb.append("<td>").append(field.getName()).append("</td>");
- sb.append("<td>").append(field.get(videoInfo)).append("</td>");
- sb.append("</tr>");
- }
- sb.append("</table>");
- }
- return sb.toString();
- }
- /**
- * updateScoreOrComment
- *
- * @author lvzhiqiang
- * 2022/5/4 9:54
- */
- @Override
- public String insertOrUpdateScoreOrComment(String identificationCode, String score, String comment) {
- BigDecimal scoreBD = null;
- if (StringUtils.isNotEmpty(score)) {
- scoreBD = new BigDecimal(score);
- }
- int num = videoInfoOtherMapper.findByCode(identificationCode);
- if (num == 0) {
- videoInfoOtherMapper.insertScoreOrComment(identificationCode, scoreBD, comment);
- return "insert success";
- }
- videoInfoOtherMapper.updateScoreOrComment(identificationCode, scoreBD, comment);
- return "update success";
- }
- /**
- * Jsoup VideoInfoUncensored
- *
- * @author lvzhiqiang
- * 2022/5/4 19:27
- */
- @Override
- public void jsoupVideoInfoUncensored(Integer status) {
- log.info("jsoupVideoInfoUncensored 开始:status={}", status);
- // 获取待抓取码列表
- List<String> icodeList = videoInfoUncensoredMapper.findIcodeByStatus(status);
- if (icodeList.size() == 0) {
- log.warn("status={}的icodeList为空", status);
- return;
- }
- // 获取javbus防屏蔽地址
- List<String> javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
- if (javbusUrlList.size() == 0) {
- log.warn("javbusUrlList为空");
- return;
- }
- // 获取码池图片保存路径
- String liuchuPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "liuchu_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
- // 遍历
- Document document;
- VideoInfoUncensored videoInfoUncensored;
- String javbusUrl;
- int j = 0;
- for (int i = 0; i < icodeList.size(); i++) {
- long start = System.currentTimeMillis();
- String identificationCode = icodeList.get(i);
- int retryCount = 0;
- while (retryCount <= 3) {
- javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size() - 0)));
- try {
- document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCode))
- .timeout(50000)
- //.proxy()
- //.data()
- .ignoreContentType(true)
- .userAgent(getUserAgent())
- .header("referer", "https://www.javbus.com/".concat(identificationCode))
- .get();
- videoInfoUncensored = new VideoInfoUncensored();
- videoInfoUncensored.setIdentificationCode(identificationCode);
- parseDocument4Uncensored(document, identificationCode, liuchuPath, videoInfoUncensored);
- videoInfoUncensored.setStatus(2);
- videoInfoUncensoredMapper.updateJsoupInfoByCode(videoInfoUncensored);
- j++;
- log.info("jsoupVideoInfoUncensored success:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
- break;
- } catch (Exception e) {
- ++retryCount;
- if (retryCount < 4) {
- log.error("jsoupIcodePool error重试:i={},retryCount={},time={},identificationCode={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, e);
- } else if (retryCount == 4) {
- videoInfoUncensoredMapper.updateStatus(identificationCode, 3);
- log.error("jsoupIcodePool error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode, e);
- }
- }
- }
- }
- log.info("jsoupVideoInfoUncensored 统计:total={},success={}", icodeList.size(), j);
- }
- /**
- * 初始化骑兵数据
- */
- @Override
- @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
- @Async
- public void initVideoInfoData(Integer type, Integer isDel) {
- long startTime = System.currentTimeMillis();
- DicCode dicCode = null;
- if (type == 1) {
- dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "qibing_path".equals(x.getCodeKey())).findFirst().get();
- } else if (type == 3) {
- dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "liuchu_path".equals(x.getCodeKey())).findFirst().get();
- }
- if (dicCode == null) {
- return;
- }
- String picPath = dicCode.getCodeValue();
- if (type == 1) {
- JavAllInfo javAllInfo = new JavAllInfo();
- getAllFilePaths(picPath, javAllInfo);
- saveInfo(javAllInfo);
- } else if (type == 3) {
- JavAllInfo4Uncensored javAllInfo4Uncensored = new JavAllInfo4Uncensored();
- getAllFilePaths4Uncensored(picPath, javAllInfo4Uncensored);
- saveInfo4Uncensored(javAllInfo4Uncensored, isDel);
- }
- long endTime = System.currentTimeMillis();
- System.err.println((endTime - startTime) / 1000);
- }
- /**
- * 上传识别码文件
- *
- * @param is
- */
- @Override
- @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
- @Async
- public void uploadFile4IdentificationCode(InputStream is, Integer isDel) {
- if (isDel == 1) {
- icodePoolMapper.deleteAll();
- }
- List<String> uploadIcodeList = readFromIcodeStream(is);
- List<String> icodePoolList = icodePoolMapper.findIcode();
- // 过滤库中已存在的
- Integer beforeUploadSize = uploadIcodeList.size();
- uploadIcodeList.removeAll(icodePoolList);
- log.warn("uploadFile4IdentificationCode:beforeUpload={},icodePool={},afterUpload={}", beforeUploadSize, icodePoolList.size(), uploadIcodeList.size());
- // 插入
- int num = 0;
- if (uploadIcodeList.size() > 0) {
- num = icodePoolMapper.insertList(uploadIcodeList);
- }
- log.warn("uploadFile4IdentificationCode:success={}", num);
- }
- /**
- * 单个识别码
- *
- * @param identificationCode
- */
- @Override
- public void single4IdentificationCode(String identificationCode) {
- List<String> icodePoolList = icodePoolMapper.findIcode();
- identificationCode = identificationCode.trim().toUpperCase();
- if (icodePoolList.contains(identificationCode)) {
- throw new BusinessException(ResultCodeEnum.PARAM_DATA_EXIST_ERROR);
- }
- icodePoolMapper.insert(identificationCode);
- }
- /**
- * Jsoup IcodePool
- *
- * @param status
- */
- @Async
- @Override
- public void jsoupIcodePool(Integer status, Integer isDel) {
- log.info("jsoupIcodePool 开始:status={},isDel={}", status, isDel);
- if (isDel == 1) {
- videoInfoPoolMapper.deleteAll();
- }
- // 获取待抓取码列表
- List<String> icodePoolList = icodePoolMapper.findIcodeByStatus(status);
- if (icodePoolList.size() == 0) {
- log.warn("status={}的icodePoolList为空", status);
- return;
- }
- // 获取javbus防屏蔽地址
- List<String> javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
- if (javbusUrlList.size() == 0) {
- log.warn("javbusUrlList为空");
- return;
- }
- // 获取主表所有识别码
- List<String> allIcode = videoInfoMapper.findAllIcode();
- // 获取码池图片保存路径
- String machiPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "machi_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
- // 遍历
- Document document;
- VideoInfoPool videoInfoPool;
- String javbusUrl;
- int j = 0;
- for (int i = 0; i < icodePoolList.size(); i++) {
- long start = System.currentTimeMillis();
- String identificationCode = icodePoolList.get(i);
- IcodePool icodePool = new IcodePool();
- icodePool.setIdentificationCode(identificationCode);
- if (allIcode.contains(identificationCode)) {
- icodePool.setStatus(4);
- icodePool.setRetryCount(0);
- icodePoolMapper.updateStatus(icodePool);
- log.warn("jsoupIcodePool exists:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
- continue;
- }
- int retryCount = 0;
- while (retryCount <= 3) {
- javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size() - 0)));
- try {
- document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCode))
- .timeout(50000)
- //.proxy()
- //.data()
- .ignoreContentType(true)
- .userAgent(getUserAgent())
- .header("referer", "https://www.javbus.com/".concat(identificationCode))
- .get();
- videoInfoPool = new VideoInfoPool();
- long picTime = parseDocument(document, identificationCode, machiPath, videoInfoPool);
- if (videoInfoPool != null) {
- icodePool.setStatus(2);
- icodePool.setRetryCount(retryCount);
- icodePoolMapper.updateStatus(icodePool);
- videoInfoPoolMapper.insert(videoInfoPool);
- j++;
- }
- log.info("jsoupIcodePool success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, identificationCode);
- break;
- } catch (Exception e) {
- ++retryCount;
- if (retryCount < 4) {
- log.error("jsoupIcodePool error重试:i={},retryCount={},time={},identificationCode={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, e);
- } else if (retryCount == 4) {
- icodePool.setStatus(3);
- icodePool.setRetryCount(retryCount - 1);
- icodePool.setFailureCause(e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200));
- icodePoolMapper.updateStatus(icodePool);
- log.error("jsoupIcodePool error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode, e);
- }
- }
- }
- }
- log.info("jsoupIcodePool 统计:total={},success={}", icodePoolList.size(), j);
- }
- private String getUserAgent() {
- Random r = new Random();
- String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
- "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
- "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
- "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
- "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",
- "Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"};
- int i = r.nextInt(15);
- return ua[i];
- }
- private long parseDocument(Document document, String identificationCode, String machiPath, VideoInfoPool videoInfoPool) throws Exception {
- Elements container = document.select("div.container");
- if (container.size() == 0) {
- throw new Exception("番号无效!");
- }
- // 名称
- String h3 = container.select("h3").first().text();
- String[] nameArr = h3.split("\\s+");
- if (nameArr.length > 1) {
- videoInfoPool.setName(h3.substring(nameArr[0].length()).trim());
- } else {
- videoInfoPool.setName(nameArr[0]);
- }
- Elements pEles = container.select("div.info > p");
- // 识别码
- Element pEle = pEles.get(0);
- String iCode = pEle.select("span[style]").first().text();
- if (!identificationCode.equalsIgnoreCase(iCode)) {
- throw new Exception("番号与站点不一致");
- }
- videoInfoPool.setIdentificationCode(iCode);
- // 发行日期
- pEle = pEles.get(1);
- String issueDate = pEle.text().split(":")[1].replace("\"", "").trim();
- videoInfoPool.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
- // 长度
- pEle = pEles.get(2);
- String length = pEle.text().split(":")[1].replace("\"", "").trim();
- videoInfoPool.setLength(length);
- // 导演
- Elements directorEles = container.select("div.info").select("p:contains(導演)");
- if (directorEles.size() > 0) {
- pEle = directorEles.first().select("a[href]").first();
- videoInfoPool.setDirector(pEle.text());
- }
- // 制作商
- Elements markerEles = container.select("div.info").select("p:contains(製作商)");
- if (markerEles.size() > 0) {
- pEle = markerEles.first().select("a[href]").first();
- videoInfoPool.setMaker(pEle.text());
- }
- // 发行商
- Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
- if (issuerEles.size() > 0) {
- pEle = issuerEles.first().select("a[href]").first();
- videoInfoPool.setIssuer(pEle.text());
- }
- // 类别
- Elements genresEles = container.select("div.info").select("p:contains(類別)");
- if (genresEles.size() > 0) {
- StringBuffer sb = new StringBuffer();
- Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
- for (Element ahrefEle : ahrefEles) {
- sb.append(ahrefEle.text()).append(",");
- }
- if (sb.length() > 0) {
- sb = sb.deleteCharAt(sb.length() - 1);
- }
- videoInfoPool.setGenres(sb.toString());
- }
- // 演员
- Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
- if (castEles.size() > 0) {
- Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
- if (castElesTemp.size() == 0) {
- StringBuffer sb = new StringBuffer();
- Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
- for (Element ahrefEle : ahrefEles) {
- sb.append(ahrefEle.text()).append(",");
- }
- if (sb.length() > 0) {
- sb = sb.deleteCharAt(sb.length() - 1);
- }
- videoInfoPool.setCast(sb.toString());
- }
- }
- // 图片URL
- String href = container.select("a.bigImage").first().attr("abs:href");
- long start = System.currentTimeMillis();
- Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
- String fileName = issueDate.concat(" ").concat(h3);
- byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
- if (imgUrlBytes.length > 251) {
- byte[] imgUrlDestBytes = new byte[251];
- System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
- fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
- }
- fileName = fileName.concat(".jpg");
- saveFile(response.bodyStream(), machiPath.concat(fileName));
- long end = System.currentTimeMillis();
- videoInfoPool.setImgUrl(fileName);
- videoInfoPool.setCreateTime(LocalDateTime.now());
- videoInfoPool.setType(1);
- return end - start;
- }
- private void parseDocument4Uncensored(Document document, String identificationCode, String machiPath, VideoInfoUncensored videoInfoUncensored) throws Exception {
- Elements container = document.select("div.container");
- if (container.size() == 0) {
- throw new Exception("番号无效!");
- }
- Elements pEles = container.select("div.info > p");
- // 识别码
- Element pEle = pEles.get(0);
- String iCode = pEle.select("span[style]").first().text();
- if (!identificationCode.equalsIgnoreCase(iCode)) {
- throw new Exception("番号与站点不一致");
- }
- // 长度
- pEle = pEles.get(2);
- String length = pEle.text().split(":")[1].replace("\"", "").trim();
- videoInfoUncensored.setLength(length);
- // 导演
- Elements directorEles = container.select("div.info").select("p:contains(導演)");
- if (directorEles.size() > 0) {
- pEle = directorEles.first().select("a[href]").first();
- videoInfoUncensored.setDirector(pEle.text());
- }
- // 制作商
- Elements markerEles = container.select("div.info").select("p:contains(製作商)");
- if (markerEles.size() > 0) {
- pEle = markerEles.first().select("a[href]").first();
- videoInfoUncensored.setMaker(pEle.text());
- }
- // 发行商
- Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
- if (issuerEles.size() > 0) {
- pEle = issuerEles.first().select("a[href]").first();
- videoInfoUncensored.setIssuer(pEle.text());
- }
- // 类别
- Elements genresEles = container.select("div.info").select("p:contains(類別)");
- if (genresEles.size() > 0) {
- StringBuffer sb = new StringBuffer();
- Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
- for (Element ahrefEle : ahrefEles) {
- sb.append(ahrefEle.text()).append(",");
- }
- if (sb.length() > 0) {
- sb = sb.deleteCharAt(sb.length() - 1);
- }
- videoInfoUncensored.setGenres(sb.toString());
- }
- // 演员
- Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
- if (castEles.size() > 0) {
- Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
- if (castElesTemp.size() == 0) {
- StringBuffer sb = new StringBuffer();
- Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
- for (Element ahrefEle : ahrefEles) {
- sb.append(ahrefEle.text()).append(",");
- }
- if (sb.length() > 0) {
- sb = sb.deleteCharAt(sb.length() - 1);
- }
- videoInfoUncensored.setCast(sb.toString());
- }
- }
- }
- /**
- * 保存文件到本地
- *
- * @param bufferedInputStream
- * @param savePath
- */
- private void saveFile(BufferedInputStream bufferedInputStream, String savePath) throws IOException {
- //一次最多读取1k
- byte[] buffer = new byte[1024];
- //实际读取的长度
- int readLenghth;
- //创建的一个写出的缓冲流
- BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(new File(savePath)));
- //文件逐步写入本地
- while ((readLenghth = bufferedInputStream.read(buffer, 0, 1024)) != -1) {//先读出来,保存在buffer数组中
- bufferedOutputStream.write(buffer, 0, readLenghth);//再从buffer中取出来保存到本地
- }
- //关闭缓冲流
- bufferedOutputStream.close();
- bufferedInputStream.close();
- }
- // 递归获取某目录下的所有子目录以及子文件
- private void getAllFilePaths(String filePath, JavAllInfo javAllInfo) {
- File[] files = new File(filePath).listFiles();
- if (files == null) {
- return;
- }
- int type = 0;
- if (filePath.contains("骑兵")) {
- type = 1;
- } else if (filePath.contains("步兵")) {
- type = 2;
- }
- for (File file : files) {
- if (file.isDirectory()) {
- // 文件夹
- getAllFilePaths(file.getAbsolutePath(), javAllInfo);
- } else {
- String fileName = file.getName();
- if (fileName.endsWith(".jpg") || (fileName.endsWith(".lnk") && fileName.contains(".jpg"))) {
- String parentName = file.getParentFile().getName();
- // 识别码
- String name = fileName.substring(10).replace(".jpg", "").trim();
- String[] nameArr = name.split("\\s+");
- try {
- boolean isMain = false;
- if (fileName.endsWith(".jpg")) {
- isMain = true;
- // 获取正片信息
- VideoInfo videoInfo = new VideoInfo();
- // 发行日期
- String issueDate = fileName.substring(0, 10);
- videoInfo.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
- videoInfo.setIdentificationCode(nameArr[0]);
- // 名称
- if (nameArr.length > 1) {
- videoInfo.setName(name.substring(nameArr[0].length()).trim());
- } else {
- videoInfo.setName(nameArr[0]);
- }
- // 类型
- videoInfo.setType(type);
- // 图片URL
- videoInfo.setImgUrl(parentName.concat("/").concat(fileName));
- // 创建时间 TODO
- // 修改时间
- videoInfo.setCreateTime(Instant.ofEpochMilli(file.lastModified()).atZone(ZoneOffset.ofHours(8)).toLocalDateTime());
- // 主体是谁
- videoInfo.setMainWho(parentName);
- javAllInfo.getVideoInfoList().add(videoInfo);
- }
- if (parentName.contains("类别")) {
- // 获取类别
- String videoGenres = parentName.replace("(类别)", "");
- javAllInfo.getVideoGenresSet().add(videoGenres);
- VideoInfoGenres videoInfoGenres = new VideoInfoGenres();
- videoInfoGenres.setIdentificationCode(nameArr[0]);
- videoInfoGenres.setName(videoGenres);
- videoInfoGenres.setType(isMain ? 1 : 2);
- javAllInfo.getVideoInfoGenresSet().add(videoInfoGenres);
- } else if (parentName.contains("优)")) {
- // 获取演员
- String videoCast = "";
- if (parentName.contains("(男")) {
- videoCast = parentName.replace("(男优)", "");
- javAllInfo.getVideoCastMap().put(videoCast, "1");
- } else if (parentName.contains("(女")) {
- videoCast = parentName.replace("(女优)", "");
- javAllInfo.getVideoCastMap().put(videoCast, "2");
- }
- VideoInfoCast videoInfoCast = new VideoInfoCast();
- videoInfoCast.setIdentificationCode(nameArr[0]);
- videoInfoCast.setName(videoCast);
- videoInfoCast.setType(isMain ? 1 : 2);
- javAllInfo.getVideoInfoCastSet().add(videoInfoCast);
- }
- } catch (Exception e) {
- System.err.println("error:" + file.getAbsolutePath());
- System.err.println("error reason:" + e.getMessage());
- }
- } else if (!fileName.endsWith(".jpg") && !fileName.endsWith(".lnk")) {
- String[] nameArr = fileName.substring(0, fileName.lastIndexOf(".")).split("\\s+");
- String parentName = file.getParentFile().getName();
- javAllInfo.getVideoUrlMap().put(nameArr[1], parentName.concat("/").concat(fileName));
- }
- }
- }
- }
- private void getAllFilePaths4Uncensored(String filePath, JavAllInfo4Uncensored javAllInfo4Uncensored) {
- File[] files = new File(filePath).listFiles();
- if (files == null) {
- return;
- }
- for (File file : files) {
- if (file.isDirectory()) {
- // 文件夹
- getAllFilePaths4Uncensored(file.getAbsolutePath(), javAllInfo4Uncensored);
- } else {
- String fileName = file.getName();
- if (fileName.endsWith(".jpg")) {
- String parentName = file.getParentFile().getName();
- // 识别码
- String name = fileName.substring(10).replace(".jpg", "").trim();
- String[] nameArr = name.split("\\s+");
- try {
- // 获取正片信息
- VideoInfoUncensored videoInfoUncensored = new VideoInfoUncensored();
- // 发行日期
- String issueDate = fileName.substring(0, 10);
- videoInfoUncensored.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
- videoInfoUncensored.setIdentificationCode(nameArr[0]);
- // 名称
- if (nameArr.length > 1) {
- videoInfoUncensored.setName(name.substring(nameArr[0].length()).trim());
- } else {
- videoInfoUncensored.setName(nameArr[0]);
- }
- // 类型
- videoInfoUncensored.setStatus(1);
- // 图片URL
- videoInfoUncensored.setImgUrl(parentName.concat("/").concat(fileName));
- // 创建时间 TODO
- // 修改时间
- videoInfoUncensored.setCreateTime(Instant.ofEpochMilli(file.lastModified()).atZone(ZoneOffset.ofHours(8)).toLocalDateTime());
- // 主体是谁
- videoInfoUncensored.setMainWho(parentName);
- javAllInfo4Uncensored.getVideoInfoUncensoredList().add(videoInfoUncensored);
- System.out.println("success:" + file.getAbsolutePath());
- } catch (Exception e) {
- System.err.println("error:" + file.getAbsolutePath());
- System.err.println("error reason:" + e.getMessage());
- }
- } else if (!fileName.endsWith(".jpg") && !fileName.endsWith(".srt")) {
- String[] nameArr = fileName.substring(0, fileName.lastIndexOf(".")).split("\\s+");
- String parentName = file.getParentFile().getName();
- javAllInfo4Uncensored.getVideoUrlMap().put(nameArr[1], parentName.concat("/").concat(fileName));
- }
- }
- }
- }
- // 保存所有文件
- @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
- public void saveInfo(JavAllInfo javAllInfo) {
- // 删除所有
- videoGenresMapper.deleteAll();
- videoInfoMapper.deleteAll();
- videoCastMapper.deleteAll();
- videoGenresMapper.deleteAll();
- videoInfoCastMapper.deleteAll();
- // 保存分类
- Set<String> videoGenresSet = javAllInfo.getVideoGenresSet();
- //List<VideoGenres> videoGenresList = new ArrayList<>();
- for (String s : videoGenresSet) {
- VideoGenres videoGenres = new VideoGenres();
- videoGenres.setName(s);
- videoGenresMapper.insert(videoGenres);
- System.out.println(videoGenres);
- //videoGenresList.add(videoGenres);
- }
- //Map<String, VideoGenres> stringVideoGenresMap = videoGenresList.stream().collect(Collectors.toMap(VideoGenres::getName, Function.identity(), (k1, k2) -> k2));
- // 保存演员
- Map<String, String> videoCastMap = javAllInfo.getVideoCastMap();
- //List<VideoCast> videoCastList = new ArrayList<>();
- for (Map.Entry<String, String> entry : videoCastMap.entrySet()) {
- VideoCast videoCast = new VideoCast();
- videoCast.setName(entry.getKey());
- videoCast.setType(Integer.parseInt(entry.getValue()));
- videoCastMapper.insert(videoCast);
- System.out.println(videoCast);
- //videoCastList.add(videoCast);
- }
- // Map<String, VideoCast> stringVideoCastMap = videoCastList.stream().collect(Collectors.toMap(VideoCast::getName, Function.identity(), (k1, k2) -> k2));
- // 保存影片信息
- List<VideoInfo> videoInfoList = javAllInfo.getVideoInfoList();
- Map<String, String> videoUrlMap = javAllInfo.getVideoUrlMap();
- videoInfoList.parallelStream().forEach(e -> {
- e.setVideoUrl(videoUrlMap.get(e.getIdentificationCode()));
- });
- //for (VideoInfo videoInfo : videoInfoList) {
- // try {
- // videoInfoMapper.insert(videoInfo);
- // System.out.println("success:" + videoInfo);
- // } catch (Exception e) {
- // e.printStackTrace();
- // System.out.println("error:" + videoInfo);
- // }
- //}
- int videoInfoCount = videoInfoMapper.insertList(videoInfoList);
- System.out.println("videoInfoCount:" + videoInfoCount);
- // 保存影片类别关联信息
- Set<VideoInfoGenres> videoInfoGenresSet = javAllInfo.getVideoInfoGenresSet();
- videoInfoGenresMapper.insertList(videoInfoGenresSet);
- // 保存影片类别关联信息
- Set<VideoInfoCast> videoInfoCastSet = javAllInfo.getVideoInfoCastSet();
- videoInfoCastMapper.insertList(videoInfoCastSet);
- }
- @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
- public void saveInfo4Uncensored(JavAllInfo4Uncensored javAllInfo4Uncensored, Integer isDel) {
- // 删除所有
- if (isDel == 1) {
- videoInfoUncensoredMapper.deleteAll();
- }
- // 保存影片信息
- List<VideoInfoUncensored> videoInfoUncensoredList = javAllInfo4Uncensored.getVideoInfoUncensoredList();
- Map<String, String> videoUrlMap = javAllInfo4Uncensored.getVideoUrlMap();
- videoInfoUncensoredList.parallelStream().forEach(e -> {
- e.setVideoUrl(videoUrlMap.get(e.getIdentificationCode()));
- });
- int videoInfoCount = videoInfoUncensoredMapper.insertList(videoInfoUncensoredList);
- System.out.println("videoInfoCount:" + videoInfoCount);
- }
- private List<String> readFromIcodeStream(InputStream inputStream) {
- List<String> list = new ArrayList<>();
- BufferedReader br = null;
- try {
- br = new BufferedReader(new InputStreamReader(inputStream));
- String line;
- while ((line = br.readLine()) != null) {
- if (StringUtils.isNotEmpty(line)) {
- list.add(line.trim().toUpperCase());
- }
- }
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- if (br != null) {
- try {
- br.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- return list.stream().distinct().collect(Collectors.toList());
- }
- }
|