BgServiceImpl.java 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984
  1. package top.lvzhiqiang.service.impl;
  2. import lombok.extern.slf4j.Slf4j;
  3. import org.jsoup.Connection;
  4. import org.jsoup.Jsoup;
  5. import org.jsoup.nodes.Document;
  6. import org.jsoup.nodes.Element;
  7. import org.jsoup.select.Elements;
  8. import org.springframework.scheduling.annotation.Async;
  9. import org.springframework.stereotype.Service;
  10. import org.springframework.transaction.annotation.Propagation;
  11. import org.springframework.transaction.annotation.Transactional;
  12. import top.lvzhiqiang.config.WebAppConfig;
  13. import top.lvzhiqiang.dto.JavAllInfo;
  14. import top.lvzhiqiang.dto.JavAllInfo4Uncensored;
  15. import top.lvzhiqiang.entity.*;
  16. import top.lvzhiqiang.enumeration.ResultCodeEnum;
  17. import top.lvzhiqiang.exception.BusinessException;
  18. import top.lvzhiqiang.mapper.*;
  19. import top.lvzhiqiang.service.BgService;
  20. import top.lvzhiqiang.util.DateUtils;
  21. import top.lvzhiqiang.util.StringUtils;
  22. import javax.annotation.Resource;
  23. import java.io.*;
  24. import java.lang.reflect.Field;
  25. import java.math.BigDecimal;
  26. import java.nio.charset.StandardCharsets;
  27. import java.time.Instant;
  28. import java.time.LocalDate;
  29. import java.time.LocalDateTime;
  30. import java.time.ZoneOffset;
  31. import java.util.*;
  32. import java.util.stream.Collectors;
  33. /**
  34. * Bg ServiceImpl
  35. *
  36. * @author lvzhiqiang
  37. * 2022/4/16 16:10
  38. */
  39. @Service
  40. @Slf4j
  41. public class BgServiceImpl implements BgService {
  42. @Resource
  43. private VideoGenresMapper videoGenresMapper;
  44. @Resource
  45. private VideoCastMapper videoCastMapper;
  46. @Resource
  47. private VideoInfoCastMapper videoInfoCastMapper;
  48. @Resource
  49. private VideoInfoGenresMapper videoInfoGenresMapper;
  50. @Resource
  51. private VideoInfoMapper videoInfoMapper;
  52. @Resource
  53. private IcodePoolMapper icodePoolMapper;
  54. @Resource
  55. private VideoInfoPoolMapper videoInfoPoolMapper;
  56. @Resource
  57. private VideoSitePoolMapper videoSitePoolMapper;
  58. @Resource
  59. private DicCodeMapper dicCodeMapper;
  60. @Resource
  61. private VideoInfoOtherMapper videoInfoOtherMapper;
  62. @Resource
  63. private VideoInfoUncensoredMapper videoInfoUncensoredMapper;
  64. /**
  65. * findDicCode
  66. *
  67. * @author lvzhiqiang
  68. * 2022/5/3 17:37
  69. */
  70. @Override
  71. public String findDicCode(String codeDesc, String order) throws IllegalAccessException {
  72. List<DicCode> dicCodeList = dicCodeMapper.findByCodeDesc(codeDesc, order);
  73. StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(dicCodeList.size())).concat("<br/>"));
  74. for (DicCode dicCode : dicCodeList) {
  75. sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>key</th><th>value</th></tr>");
  76. Field[] fields = dicCode.getClass().getDeclaredFields();
  77. for (Field field : fields) {
  78. field.setAccessible(true);
  79. sb.append("<tr>");
  80. sb.append("<td>").append(field.getName()).append("</td>");
  81. sb.append("<td>").append(field.get(dicCode)).append("</td>");
  82. sb.append("</tr>");
  83. }
  84. sb.append("</table>");
  85. }
  86. return sb.toString();
  87. }
  88. /**
  89. * findIcodePool
  90. *
  91. * @author lvzhiqiang
  92. * 2022/5/3 17:37
  93. */
  94. @Override
  95. public String findIcodePool(String identificationCode, Integer status, String order) throws IllegalAccessException {
  96. List<IcodePool> icodePoolList = icodePoolMapper.findByCodeAndStatus(identificationCode, status, order);
  97. StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(icodePoolList.size())).concat("<br/>"));
  98. for (IcodePool icodePool : icodePoolList) {
  99. sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>key</th><th>value</th></tr>");
  100. Field[] fields = icodePool.getClass().getDeclaredFields();
  101. for (Field field : fields) {
  102. field.setAccessible(true);
  103. sb.append("<tr>");
  104. sb.append("<td>").append(field.getName()).append("</td>");
  105. sb.append("<td>").append(field.get(icodePool)).append("</td>");
  106. sb.append("</tr>");
  107. }
  108. sb.append("</table>");
  109. }
  110. return sb.toString();
  111. }
  112. /**
  113. * findVideoSitePool
  114. *
  115. * @author lvzhiqiang
  116. * 2022/5/3 17:37
  117. */
  118. @Override
  119. public String findVideoSitePool(String url, String order) throws IllegalAccessException {
  120. List<VideoSitePool> videoSitePoolList = videoSitePoolMapper.findByUrl(url, order);
  121. StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(videoSitePoolList.size())).concat("<br/>"));
  122. for (VideoSitePool videoSitePool : videoSitePoolList) {
  123. sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>key</th><th>value</th></tr>");
  124. Field[] fields = videoSitePool.getClass().getDeclaredFields();
  125. for (Field field : fields) {
  126. field.setAccessible(true);
  127. sb.append("<tr>");
  128. sb.append("<td>").append(field.getName()).append("</td>");
  129. sb.append("<td>").append(field.get(videoSitePool)).append("</td>");
  130. sb.append("</tr>");
  131. }
  132. sb.append("</table>");
  133. }
  134. return sb.toString();
  135. }
  136. /**
  137. * findVideoInfoPool
  138. *
  139. * @author lvzhiqiang
  140. * 2022/5/3 17:37
  141. */
  142. @Override
  143. public String findVideoInfoPool(String identificationCode, Integer type, String order, String crudT) throws IllegalAccessException {
  144. if ("2".equals(crudT)) {
  145. //更新
  146. if (StringUtils.isEmpty(identificationCode) || null == type) {
  147. return "identificationCode和type不能为空";
  148. }
  149. videoInfoPoolMapper.updateTypeByCode(identificationCode, type);
  150. return "success";
  151. }
  152. if ("3".equals(crudT)) {
  153. //删除
  154. if (StringUtils.isEmpty(identificationCode)) {
  155. return "dentificationCode不能为空";
  156. }
  157. videoInfoPoolMapper.delByCode(identificationCode);
  158. return "success";
  159. }
  160. List<VideoInfoPool> videoInfoPoolList = videoInfoPoolMapper.findByCodeAndType(identificationCode, type, order);
  161. StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(videoInfoPoolList.size())).concat("<br/>"));
  162. for (VideoInfoPool videoInfoPool : videoInfoPoolList) {
  163. sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>key</th><th>value</th></tr>");
  164. Field[] fields = videoInfoPool.getClass().getDeclaredFields();
  165. for (Field field : fields) {
  166. field.setAccessible(true);
  167. sb.append("<tr>");
  168. sb.append("<td>").append(field.getName()).append("</td>");
  169. sb.append("<td>").append(field.get(videoInfoPool)).append("</td>");
  170. sb.append("</tr>");
  171. }
  172. sb.append("</table>");
  173. }
  174. return sb.toString();
  175. }
  176. /**
  177. * findVideoInfo
  178. *
  179. * @author lvzhiqiang
  180. * 2022/5/3 17:37
  181. */
  182. @Override
  183. public String findVideoInfo(String identificationCode, Integer type, String order, String crudT) throws IllegalAccessException {
  184. if ("3".equals(crudT)) {
  185. //删除
  186. if (StringUtils.isEmpty(identificationCode)) {
  187. return "dentificationCode不能为空";
  188. }
  189. videoInfoMapper.delByCode(identificationCode);
  190. return "success";
  191. }
  192. List<VideoInfo> videoInfoList = videoInfoMapper.findByCodeAndType(identificationCode, type, order);
  193. StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(videoInfoList.size())).concat("<br/>"));
  194. for (VideoInfo videoInfo : videoInfoList) {
  195. sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>key</th><th>value</th></tr>");
  196. Field[] fields = videoInfo.getClass().getDeclaredFields();
  197. for (Field field : fields) {
  198. field.setAccessible(true);
  199. sb.append("<tr>");
  200. sb.append("<td>").append(field.getName()).append("</td>");
  201. sb.append("<td>").append(field.get(videoInfo)).append("</td>");
  202. sb.append("</tr>");
  203. }
  204. sb.append("</table>");
  205. }
  206. return sb.toString();
  207. }
  208. /**
  209. * updateScoreOrComment
  210. *
  211. * @author lvzhiqiang
  212. * 2022/5/4 9:54
  213. */
  214. @Override
  215. public String insertOrUpdateScoreOrComment(String identificationCode, String score, String comment) {
  216. BigDecimal scoreBD = null;
  217. if (StringUtils.isNotEmpty(score)) {
  218. scoreBD = new BigDecimal(score);
  219. }
  220. int num = videoInfoOtherMapper.findByCode(identificationCode);
  221. if (num == 0) {
  222. videoInfoOtherMapper.insertScoreOrComment(identificationCode, scoreBD, comment);
  223. return "insert success";
  224. }
  225. videoInfoOtherMapper.updateScoreOrComment(identificationCode, scoreBD, comment);
  226. return "update success";
  227. }
  228. /**
  229. * Jsoup VideoInfoUncensored
  230. *
  231. * @author lvzhiqiang
  232. * 2022/5/4 19:27
  233. */
  234. @Override
  235. public void jsoupVideoInfoUncensored(Integer status) {
  236. log.info("jsoupVideoInfoUncensored 开始:status={}", status);
  237. // 获取待抓取码列表
  238. List<String> icodeList = videoInfoUncensoredMapper.findIcodeByStatus(status);
  239. if (icodeList.size() == 0) {
  240. log.warn("status={}的icodeList为空", status);
  241. return;
  242. }
  243. // 获取javbus防屏蔽地址
  244. List<String> javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
  245. if (javbusUrlList.size() == 0) {
  246. log.warn("javbusUrlList为空");
  247. return;
  248. }
  249. // 获取码池图片保存路径
  250. String liuchuPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "liuchu_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
  251. // 遍历
  252. Document document;
  253. VideoInfoUncensored videoInfoUncensored;
  254. String javbusUrl;
  255. int j = 0;
  256. for (int i = 0; i < icodeList.size(); i++) {
  257. long start = System.currentTimeMillis();
  258. String identificationCode = icodeList.get(i);
  259. int retryCount = 0;
  260. while (retryCount <= 3) {
  261. javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size() - 0)));
  262. try {
  263. document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCode))
  264. .timeout(50000)
  265. //.proxy()
  266. //.data()
  267. .ignoreContentType(true)
  268. .userAgent(getUserAgent())
  269. .header("referer", "https://www.javbus.com/".concat(identificationCode))
  270. .get();
  271. videoInfoUncensored = new VideoInfoUncensored();
  272. videoInfoUncensored.setIdentificationCode(identificationCode);
  273. parseDocument4Uncensored(document, identificationCode, liuchuPath, videoInfoUncensored);
  274. videoInfoUncensored.setStatus(2);
  275. videoInfoUncensoredMapper.updateJsoupInfoByCode(videoInfoUncensored);
  276. j++;
  277. log.info("jsoupVideoInfoUncensored success:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
  278. break;
  279. } catch (Exception e) {
  280. ++retryCount;
  281. if (retryCount < 4) {
  282. log.error("jsoupIcodePool error重试:i={},retryCount={},time={},identificationCode={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, e);
  283. } else if (retryCount == 4) {
  284. videoInfoUncensoredMapper.updateStatus(identificationCode, 3);
  285. log.error("jsoupIcodePool error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode, e);
  286. }
  287. }
  288. }
  289. }
  290. log.info("jsoupVideoInfoUncensored 统计:total={},success={}", icodeList.size(), j);
  291. }
  292. /**
  293. * 初始化骑兵数据
  294. */
  295. @Override
  296. @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
  297. @Async
  298. public void initVideoInfoData(Integer type, Integer isDel) {
  299. long startTime = System.currentTimeMillis();
  300. DicCode dicCode = null;
  301. if (type == 1) {
  302. dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "qibing_path".equals(x.getCodeKey())).findFirst().get();
  303. } else if (type == 3) {
  304. dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "liuchu_path".equals(x.getCodeKey())).findFirst().get();
  305. }
  306. if (dicCode == null) {
  307. return;
  308. }
  309. String picPath = dicCode.getCodeValue();
  310. if (type == 1) {
  311. JavAllInfo javAllInfo = new JavAllInfo();
  312. getAllFilePaths(picPath, javAllInfo);
  313. saveInfo(javAllInfo);
  314. } else if (type == 3) {
  315. JavAllInfo4Uncensored javAllInfo4Uncensored = new JavAllInfo4Uncensored();
  316. getAllFilePaths4Uncensored(picPath, javAllInfo4Uncensored);
  317. saveInfo4Uncensored(javAllInfo4Uncensored, isDel);
  318. }
  319. long endTime = System.currentTimeMillis();
  320. System.err.println((endTime - startTime) / 1000);
  321. }
  322. /**
  323. * 上传识别码文件
  324. *
  325. * @param is
  326. */
  327. @Override
  328. @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
  329. @Async
  330. public void uploadFile4IdentificationCode(InputStream is, Integer isDel) {
  331. if (isDel == 1) {
  332. icodePoolMapper.deleteAll();
  333. }
  334. List<String> uploadIcodeList = readFromIcodeStream(is);
  335. List<String> icodePoolList = icodePoolMapper.findIcode();
  336. // 过滤库中已存在的
  337. Integer beforeUploadSize = uploadIcodeList.size();
  338. uploadIcodeList.removeAll(icodePoolList);
  339. log.warn("uploadFile4IdentificationCode:beforeUpload={},icodePool={},afterUpload={}", beforeUploadSize, icodePoolList.size(), uploadIcodeList.size());
  340. // 插入
  341. int num = 0;
  342. if (uploadIcodeList.size() > 0) {
  343. num = icodePoolMapper.insertList(uploadIcodeList);
  344. }
  345. log.warn("uploadFile4IdentificationCode:success={}", num);
  346. }
  347. /**
  348. * 单个识别码
  349. *
  350. * @param identificationCode
  351. */
  352. @Override
  353. public void single4IdentificationCode(String identificationCode) {
  354. List<String> icodePoolList = icodePoolMapper.findIcode();
  355. identificationCode = identificationCode.trim().toUpperCase();
  356. if (icodePoolList.contains(identificationCode)) {
  357. throw new BusinessException(ResultCodeEnum.PARAM_DATA_EXIST_ERROR);
  358. }
  359. icodePoolMapper.insert(identificationCode);
  360. }
  361. /**
  362. * Jsoup IcodePool
  363. *
  364. * @param status
  365. */
  366. @Async
  367. @Override
  368. public void jsoupIcodePool(Integer status, Integer isDel) {
  369. log.info("jsoupIcodePool 开始:status={},isDel={}", status, isDel);
  370. if (isDel == 1) {
  371. videoInfoPoolMapper.deleteAll();
  372. }
  373. // 获取待抓取码列表
  374. List<String> icodePoolList = icodePoolMapper.findIcodeByStatus(status);
  375. if (icodePoolList.size() == 0) {
  376. log.warn("status={}的icodePoolList为空", status);
  377. return;
  378. }
  379. // 获取javbus防屏蔽地址
  380. List<String> javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
  381. if (javbusUrlList.size() == 0) {
  382. log.warn("javbusUrlList为空");
  383. return;
  384. }
  385. // 获取主表所有识别码
  386. List<String> allIcode = videoInfoMapper.findAllIcode();
  387. // 获取码池图片保存路径
  388. String machiPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "machi_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
  389. // 遍历
  390. Document document;
  391. VideoInfoPool videoInfoPool;
  392. String javbusUrl;
  393. int j = 0;
  394. for (int i = 0; i < icodePoolList.size(); i++) {
  395. long start = System.currentTimeMillis();
  396. String identificationCode = icodePoolList.get(i);
  397. IcodePool icodePool = new IcodePool();
  398. icodePool.setIdentificationCode(identificationCode);
  399. if (allIcode.contains(identificationCode)) {
  400. icodePool.setStatus(4);
  401. icodePool.setRetryCount(0);
  402. icodePoolMapper.updateStatus(icodePool);
  403. log.warn("jsoupIcodePool exists:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
  404. continue;
  405. }
  406. int retryCount = 0;
  407. while (retryCount <= 3) {
  408. javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size() - 0)));
  409. try {
  410. document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCode))
  411. .timeout(50000)
  412. //.proxy()
  413. //.data()
  414. .ignoreContentType(true)
  415. .userAgent(getUserAgent())
  416. .header("referer", "https://www.javbus.com/".concat(identificationCode))
  417. .get();
  418. videoInfoPool = new VideoInfoPool();
  419. long picTime = parseDocument(document, identificationCode, machiPath, videoInfoPool);
  420. if (videoInfoPool != null) {
  421. icodePool.setStatus(2);
  422. icodePool.setRetryCount(retryCount);
  423. icodePoolMapper.updateStatus(icodePool);
  424. videoInfoPoolMapper.insert(videoInfoPool);
  425. j++;
  426. }
  427. log.info("jsoupIcodePool success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, identificationCode);
  428. break;
  429. } catch (Exception e) {
  430. ++retryCount;
  431. if (retryCount < 4) {
  432. log.error("jsoupIcodePool error重试:i={},retryCount={},time={},identificationCode={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, e);
  433. } else if (retryCount == 4) {
  434. icodePool.setStatus(3);
  435. icodePool.setRetryCount(retryCount - 1);
  436. icodePool.setFailureCause(e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200));
  437. icodePoolMapper.updateStatus(icodePool);
  438. log.error("jsoupIcodePool error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode, e);
  439. }
  440. }
  441. }
  442. }
  443. log.info("jsoupIcodePool 统计:total={},success={}", icodePoolList.size(), j);
  444. }
  445. private String getUserAgent() {
  446. Random r = new Random();
  447. String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
  448. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",
  449. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
  450. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
  451. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
  452. "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
  453. "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
  454. "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
  455. "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",
  456. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",
  457. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",
  458. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",
  459. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
  460. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",
  461. "Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0",
  462. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"};
  463. int i = r.nextInt(15);
  464. return ua[i];
  465. }
  466. private long parseDocument(Document document, String identificationCode, String machiPath, VideoInfoPool videoInfoPool) throws Exception {
  467. Elements container = document.select("div.container");
  468. if (container.size() == 0) {
  469. throw new Exception("番号无效!");
  470. }
  471. // 名称
  472. String h3 = container.select("h3").first().text();
  473. String[] nameArr = h3.split("\\s+");
  474. if (nameArr.length > 1) {
  475. videoInfoPool.setName(h3.substring(nameArr[0].length()).trim());
  476. } else {
  477. videoInfoPool.setName(nameArr[0]);
  478. }
  479. Elements pEles = container.select("div.info > p");
  480. // 识别码
  481. Element pEle = pEles.get(0);
  482. String iCode = pEle.select("span[style]").first().text();
  483. if (!identificationCode.equalsIgnoreCase(iCode)) {
  484. throw new Exception("番号与站点不一致");
  485. }
  486. videoInfoPool.setIdentificationCode(iCode);
  487. // 发行日期
  488. pEle = pEles.get(1);
  489. String issueDate = pEle.text().split(":")[1].replace("\"", "").trim();
  490. videoInfoPool.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
  491. // 长度
  492. pEle = pEles.get(2);
  493. String length = pEle.text().split(":")[1].replace("\"", "").trim();
  494. videoInfoPool.setLength(length);
  495. // 导演
  496. Elements directorEles = container.select("div.info").select("p:contains(導演)");
  497. if (directorEles.size() > 0) {
  498. pEle = directorEles.first().select("a[href]").first();
  499. videoInfoPool.setDirector(pEle.text());
  500. }
  501. // 制作商
  502. Elements markerEles = container.select("div.info").select("p:contains(製作商)");
  503. if (markerEles.size() > 0) {
  504. pEle = markerEles.first().select("a[href]").first();
  505. videoInfoPool.setMaker(pEle.text());
  506. }
  507. // 发行商
  508. Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
  509. if (issuerEles.size() > 0) {
  510. pEle = issuerEles.first().select("a[href]").first();
  511. videoInfoPool.setIssuer(pEle.text());
  512. }
  513. // 类别
  514. Elements genresEles = container.select("div.info").select("p:contains(類別)");
  515. if (genresEles.size() > 0) {
  516. StringBuffer sb = new StringBuffer();
  517. Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
  518. for (Element ahrefEle : ahrefEles) {
  519. sb.append(ahrefEle.text()).append(",");
  520. }
  521. if (sb.length() > 0) {
  522. sb = sb.deleteCharAt(sb.length() - 1);
  523. }
  524. videoInfoPool.setGenres(sb.toString());
  525. }
  526. // 演员
  527. Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
  528. if (castEles.size() > 0) {
  529. Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
  530. if (castElesTemp.size() == 0) {
  531. StringBuffer sb = new StringBuffer();
  532. Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
  533. for (Element ahrefEle : ahrefEles) {
  534. sb.append(ahrefEle.text()).append(",");
  535. }
  536. if (sb.length() > 0) {
  537. sb = sb.deleteCharAt(sb.length() - 1);
  538. }
  539. videoInfoPool.setCast(sb.toString());
  540. }
  541. }
  542. // 图片URL
  543. String href = container.select("a.bigImage").first().attr("abs:href");
  544. long start = System.currentTimeMillis();
  545. Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
  546. String fileName = issueDate.concat(" ").concat(h3);
  547. byte[] imgUrlBytes = fileName.getBytes(StandardCharsets.UTF_8);
  548. if (imgUrlBytes.length > 251) {
  549. byte[] imgUrlDestBytes = new byte[251];
  550. System.arraycopy(imgUrlBytes, 0, imgUrlDestBytes, 0, 251);
  551. fileName = new String(imgUrlDestBytes, StandardCharsets.UTF_8).replace("�", "");
  552. }
  553. fileName = fileName.concat(".jpg");
  554. saveFile(response.bodyStream(), machiPath.concat(fileName));
  555. long end = System.currentTimeMillis();
  556. videoInfoPool.setImgUrl(fileName);
  557. videoInfoPool.setCreateTime(LocalDateTime.now());
  558. videoInfoPool.setType(1);
  559. return end - start;
  560. }
  561. private void parseDocument4Uncensored(Document document, String identificationCode, String machiPath, VideoInfoUncensored videoInfoUncensored) throws Exception {
  562. Elements container = document.select("div.container");
  563. if (container.size() == 0) {
  564. throw new Exception("番号无效!");
  565. }
  566. Elements pEles = container.select("div.info > p");
  567. // 识别码
  568. Element pEle = pEles.get(0);
  569. String iCode = pEle.select("span[style]").first().text();
  570. if (!identificationCode.equalsIgnoreCase(iCode)) {
  571. throw new Exception("番号与站点不一致");
  572. }
  573. // 长度
  574. pEle = pEles.get(2);
  575. String length = pEle.text().split(":")[1].replace("\"", "").trim();
  576. videoInfoUncensored.setLength(length);
  577. // 导演
  578. Elements directorEles = container.select("div.info").select("p:contains(導演)");
  579. if (directorEles.size() > 0) {
  580. pEle = directorEles.first().select("a[href]").first();
  581. videoInfoUncensored.setDirector(pEle.text());
  582. }
  583. // 制作商
  584. Elements markerEles = container.select("div.info").select("p:contains(製作商)");
  585. if (markerEles.size() > 0) {
  586. pEle = markerEles.first().select("a[href]").first();
  587. videoInfoUncensored.setMaker(pEle.text());
  588. }
  589. // 发行商
  590. Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
  591. if (issuerEles.size() > 0) {
  592. pEle = issuerEles.first().select("a[href]").first();
  593. videoInfoUncensored.setIssuer(pEle.text());
  594. }
  595. // 类别
  596. Elements genresEles = container.select("div.info").select("p:contains(類別)");
  597. if (genresEles.size() > 0) {
  598. StringBuffer sb = new StringBuffer();
  599. Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
  600. for (Element ahrefEle : ahrefEles) {
  601. sb.append(ahrefEle.text()).append(",");
  602. }
  603. if (sb.length() > 0) {
  604. sb = sb.deleteCharAt(sb.length() - 1);
  605. }
  606. videoInfoUncensored.setGenres(sb.toString());
  607. }
  608. // 演员
  609. Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
  610. if (castEles.size() > 0) {
  611. Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
  612. if (castElesTemp.size() == 0) {
  613. StringBuffer sb = new StringBuffer();
  614. Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
  615. for (Element ahrefEle : ahrefEles) {
  616. sb.append(ahrefEle.text()).append(",");
  617. }
  618. if (sb.length() > 0) {
  619. sb = sb.deleteCharAt(sb.length() - 1);
  620. }
  621. videoInfoUncensored.setCast(sb.toString());
  622. }
  623. }
  624. }
  625. /**
  626. * 保存文件到本地
  627. *
  628. * @param bufferedInputStream
  629. * @param savePath
  630. */
  631. private void saveFile(BufferedInputStream bufferedInputStream, String savePath) throws IOException {
  632. //一次最多读取1k
  633. byte[] buffer = new byte[1024];
  634. //实际读取的长度
  635. int readLenghth;
  636. //创建的一个写出的缓冲流
  637. BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(new File(savePath)));
  638. //文件逐步写入本地
  639. while ((readLenghth = bufferedInputStream.read(buffer, 0, 1024)) != -1) {//先读出来,保存在buffer数组中
  640. bufferedOutputStream.write(buffer, 0, readLenghth);//再从buffer中取出来保存到本地
  641. }
  642. //关闭缓冲流
  643. bufferedOutputStream.close();
  644. bufferedInputStream.close();
  645. }
  646. // 递归获取某目录下的所有子目录以及子文件
  647. private void getAllFilePaths(String filePath, JavAllInfo javAllInfo) {
  648. File[] files = new File(filePath).listFiles();
  649. if (files == null) {
  650. return;
  651. }
  652. int type = 0;
  653. if (filePath.contains("骑兵")) {
  654. type = 1;
  655. } else if (filePath.contains("步兵")) {
  656. type = 2;
  657. }
  658. for (File file : files) {
  659. if (file.isDirectory()) {
  660. // 文件夹
  661. getAllFilePaths(file.getAbsolutePath(), javAllInfo);
  662. } else {
  663. String fileName = file.getName();
  664. if (fileName.endsWith(".jpg") || (fileName.endsWith(".lnk") && fileName.contains(".jpg"))) {
  665. String parentName = file.getParentFile().getName();
  666. // 识别码
  667. String name = fileName.substring(10).replace(".jpg", "").trim();
  668. String[] nameArr = name.split("\\s+");
  669. try {
  670. boolean isMain = false;
  671. if (fileName.endsWith(".jpg")) {
  672. isMain = true;
  673. // 获取正片信息
  674. VideoInfo videoInfo = new VideoInfo();
  675. // 发行日期
  676. String issueDate = fileName.substring(0, 10);
  677. videoInfo.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
  678. videoInfo.setIdentificationCode(nameArr[0]);
  679. // 名称
  680. if (nameArr.length > 1) {
  681. videoInfo.setName(name.substring(nameArr[0].length()).trim());
  682. } else {
  683. videoInfo.setName(nameArr[0]);
  684. }
  685. // 类型
  686. videoInfo.setType(type);
  687. // 图片URL
  688. videoInfo.setImgUrl(parentName.concat("/").concat(fileName));
  689. // 创建时间 TODO
  690. // 修改时间
  691. videoInfo.setCreateTime(Instant.ofEpochMilli(file.lastModified()).atZone(ZoneOffset.ofHours(8)).toLocalDateTime());
  692. // 主体是谁
  693. videoInfo.setMainWho(parentName);
  694. javAllInfo.getVideoInfoList().add(videoInfo);
  695. }
  696. if (parentName.contains("类别")) {
  697. // 获取类别
  698. String videoGenres = parentName.replace("(类别)", "");
  699. javAllInfo.getVideoGenresSet().add(videoGenres);
  700. VideoInfoGenres videoInfoGenres = new VideoInfoGenres();
  701. videoInfoGenres.setIdentificationCode(nameArr[0]);
  702. videoInfoGenres.setName(videoGenres);
  703. videoInfoGenres.setType(isMain ? 1 : 2);
  704. javAllInfo.getVideoInfoGenresSet().add(videoInfoGenres);
  705. } else if (parentName.contains("优)")) {
  706. // 获取演员
  707. String videoCast = "";
  708. if (parentName.contains("(男")) {
  709. videoCast = parentName.replace("(男优)", "");
  710. javAllInfo.getVideoCastMap().put(videoCast, "1");
  711. } else if (parentName.contains("(女")) {
  712. videoCast = parentName.replace("(女优)", "");
  713. javAllInfo.getVideoCastMap().put(videoCast, "2");
  714. }
  715. VideoInfoCast videoInfoCast = new VideoInfoCast();
  716. videoInfoCast.setIdentificationCode(nameArr[0]);
  717. videoInfoCast.setName(videoCast);
  718. videoInfoCast.setType(isMain ? 1 : 2);
  719. javAllInfo.getVideoInfoCastSet().add(videoInfoCast);
  720. }
  721. } catch (Exception e) {
  722. System.err.println("error:" + file.getAbsolutePath());
  723. System.err.println("error reason:" + e.getMessage());
  724. }
  725. } else if (!fileName.endsWith(".jpg") && !fileName.endsWith(".lnk")) {
  726. String[] nameArr = fileName.substring(0, fileName.lastIndexOf(".")).split("\\s+");
  727. String parentName = file.getParentFile().getName();
  728. javAllInfo.getVideoUrlMap().put(nameArr[1], parentName.concat("/").concat(fileName));
  729. }
  730. }
  731. }
  732. }
  733. private void getAllFilePaths4Uncensored(String filePath, JavAllInfo4Uncensored javAllInfo4Uncensored) {
  734. File[] files = new File(filePath).listFiles();
  735. if (files == null) {
  736. return;
  737. }
  738. for (File file : files) {
  739. if (file.isDirectory()) {
  740. // 文件夹
  741. getAllFilePaths4Uncensored(file.getAbsolutePath(), javAllInfo4Uncensored);
  742. } else {
  743. String fileName = file.getName();
  744. if (fileName.endsWith(".jpg")) {
  745. String parentName = file.getParentFile().getName();
  746. // 识别码
  747. String name = fileName.substring(10).replace(".jpg", "").trim();
  748. String[] nameArr = name.split("\\s+");
  749. try {
  750. // 获取正片信息
  751. VideoInfoUncensored videoInfoUncensored = new VideoInfoUncensored();
  752. // 发行日期
  753. String issueDate = fileName.substring(0, 10);
  754. videoInfoUncensored.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
  755. videoInfoUncensored.setIdentificationCode(nameArr[0]);
  756. // 名称
  757. if (nameArr.length > 1) {
  758. videoInfoUncensored.setName(name.substring(nameArr[0].length()).trim());
  759. } else {
  760. videoInfoUncensored.setName(nameArr[0]);
  761. }
  762. // 类型
  763. videoInfoUncensored.setStatus(1);
  764. // 图片URL
  765. videoInfoUncensored.setImgUrl(parentName.concat("/").concat(fileName));
  766. // 创建时间 TODO
  767. // 修改时间
  768. videoInfoUncensored.setCreateTime(Instant.ofEpochMilli(file.lastModified()).atZone(ZoneOffset.ofHours(8)).toLocalDateTime());
  769. // 主体是谁
  770. videoInfoUncensored.setMainWho(parentName);
  771. javAllInfo4Uncensored.getVideoInfoUncensoredList().add(videoInfoUncensored);
  772. System.out.println("success:" + file.getAbsolutePath());
  773. } catch (Exception e) {
  774. System.err.println("error:" + file.getAbsolutePath());
  775. System.err.println("error reason:" + e.getMessage());
  776. }
  777. } else if (!fileName.endsWith(".jpg") && !fileName.endsWith(".srt")) {
  778. String[] nameArr = fileName.substring(0, fileName.lastIndexOf(".")).split("\\s+");
  779. String parentName = file.getParentFile().getName();
  780. javAllInfo4Uncensored.getVideoUrlMap().put(nameArr[1], parentName.concat("/").concat(fileName));
  781. }
  782. }
  783. }
  784. }
  785. // 保存所有文件
  786. @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
  787. public void saveInfo(JavAllInfo javAllInfo) {
  788. // 删除所有
  789. videoGenresMapper.deleteAll();
  790. videoInfoMapper.deleteAll();
  791. videoCastMapper.deleteAll();
  792. videoGenresMapper.deleteAll();
  793. videoInfoCastMapper.deleteAll();
  794. // 保存分类
  795. Set<String> videoGenresSet = javAllInfo.getVideoGenresSet();
  796. //List<VideoGenres> videoGenresList = new ArrayList<>();
  797. for (String s : videoGenresSet) {
  798. VideoGenres videoGenres = new VideoGenres();
  799. videoGenres.setName(s);
  800. videoGenresMapper.insert(videoGenres);
  801. System.out.println(videoGenres);
  802. //videoGenresList.add(videoGenres);
  803. }
  804. //Map<String, VideoGenres> stringVideoGenresMap = videoGenresList.stream().collect(Collectors.toMap(VideoGenres::getName, Function.identity(), (k1, k2) -> k2));
  805. // 保存演员
  806. Map<String, String> videoCastMap = javAllInfo.getVideoCastMap();
  807. //List<VideoCast> videoCastList = new ArrayList<>();
  808. for (Map.Entry<String, String> entry : videoCastMap.entrySet()) {
  809. VideoCast videoCast = new VideoCast();
  810. videoCast.setName(entry.getKey());
  811. videoCast.setType(Integer.parseInt(entry.getValue()));
  812. videoCastMapper.insert(videoCast);
  813. System.out.println(videoCast);
  814. //videoCastList.add(videoCast);
  815. }
  816. // Map<String, VideoCast> stringVideoCastMap = videoCastList.stream().collect(Collectors.toMap(VideoCast::getName, Function.identity(), (k1, k2) -> k2));
  817. // 保存影片信息
  818. List<VideoInfo> videoInfoList = javAllInfo.getVideoInfoList();
  819. Map<String, String> videoUrlMap = javAllInfo.getVideoUrlMap();
  820. videoInfoList.parallelStream().forEach(e -> {
  821. e.setVideoUrl(videoUrlMap.get(e.getIdentificationCode()));
  822. });
  823. //for (VideoInfo videoInfo : videoInfoList) {
  824. // try {
  825. // videoInfoMapper.insert(videoInfo);
  826. // System.out.println("success:" + videoInfo);
  827. // } catch (Exception e) {
  828. // e.printStackTrace();
  829. // System.out.println("error:" + videoInfo);
  830. // }
  831. //}
  832. int videoInfoCount = videoInfoMapper.insertList(videoInfoList);
  833. System.out.println("videoInfoCount:" + videoInfoCount);
  834. // 保存影片类别关联信息
  835. Set<VideoInfoGenres> videoInfoGenresSet = javAllInfo.getVideoInfoGenresSet();
  836. videoInfoGenresMapper.insertList(videoInfoGenresSet);
  837. // 保存影片类别关联信息
  838. Set<VideoInfoCast> videoInfoCastSet = javAllInfo.getVideoInfoCastSet();
  839. videoInfoCastMapper.insertList(videoInfoCastSet);
  840. }
  841. @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
  842. public void saveInfo4Uncensored(JavAllInfo4Uncensored javAllInfo4Uncensored, Integer isDel) {
  843. // 删除所有
  844. if (isDel == 1) {
  845. videoInfoUncensoredMapper.deleteAll();
  846. }
  847. // 保存影片信息
  848. List<VideoInfoUncensored> videoInfoUncensoredList = javAllInfo4Uncensored.getVideoInfoUncensoredList();
  849. Map<String, String> videoUrlMap = javAllInfo4Uncensored.getVideoUrlMap();
  850. videoInfoUncensoredList.parallelStream().forEach(e -> {
  851. e.setVideoUrl(videoUrlMap.get(e.getIdentificationCode()));
  852. });
  853. int videoInfoCount = videoInfoUncensoredMapper.insertList(videoInfoUncensoredList);
  854. System.out.println("videoInfoCount:" + videoInfoCount);
  855. }
  856. private List<String> readFromIcodeStream(InputStream inputStream) {
  857. List<String> list = new ArrayList<>();
  858. BufferedReader br = null;
  859. try {
  860. br = new BufferedReader(new InputStreamReader(inputStream));
  861. String line;
  862. while ((line = br.readLine()) != null) {
  863. if (StringUtils.isNotEmpty(line)) {
  864. list.add(line.trim().toUpperCase());
  865. }
  866. }
  867. } catch (FileNotFoundException e) {
  868. e.printStackTrace();
  869. } catch (IOException e) {
  870. e.printStackTrace();
  871. } finally {
  872. if (br != null) {
  873. try {
  874. br.close();
  875. } catch (IOException e) {
  876. e.printStackTrace();
  877. }
  878. }
  879. }
  880. return list.stream().distinct().collect(Collectors.toList());
  881. }
  882. }