Test2.java 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import org.jsoup.Connection;
  2. import org.jsoup.Jsoup;
  3. import org.jsoup.nodes.Document;
  4. import org.jsoup.nodes.Element;
  5. import org.jsoup.select.Elements;
  6. import top.lvzhiqiang.entity.VideoInfoPool;
  7. import top.lvzhiqiang.util.DateUtils;
  8. import java.io.*;
  9. import java.net.InetSocketAddress;
  10. import java.net.Proxy;
  11. import java.time.LocalDate;
  12. import java.time.LocalDateTime;
  13. public class Test2 {
  14. public static void main(String[] args) {
  15. String javbusUrl = "https://www.seejav.work/";
  16. String identificationCode = "MGMQ-053";// DTT-049 HAWA-243 HISN-011 DANDY-745 VOSS-172 PFES-024 VIDA-005 SHKD-843 CAWD-176 BLK-467
  17. Document document;
  18. VideoInfoPool videoInfoPool;
  19. try {
  20. document = Jsoup.connect(javbusUrl.concat(identificationCode)).timeout(50000).get();
  21. videoInfoPool = new VideoInfoPool();
  22. parseDocument(document, identificationCode, videoInfoPool);
  23. System.out.println(videoInfoPool);
  24. } catch (Exception e) {
  25. e.printStackTrace();
  26. }
  27. }
  28. private static void parseDocument(Document document, String identificationCode, VideoInfoPool videoInfoPool) throws Exception {
  29. Elements container = document.select("div.container");
  30. if (container.size() == 0) {
  31. throw new Exception("番号无效!");
  32. }
  33. // 名称
  34. String h3 = container.select("h3").first().text();
  35. String[] nameArr = h3.split("\\s+");
  36. if (nameArr.length > 1) {
  37. videoInfoPool.setName(h3.substring(nameArr[0].length()).trim());
  38. } else {
  39. videoInfoPool.setName(nameArr[0]);
  40. }
  41. Elements pEles = container.select("div.info > p");
  42. // 识别码
  43. Element pEle = pEles.get(0);
  44. String iCode = pEle.select("span[style]").first().text();
  45. if (!identificationCode.equalsIgnoreCase(iCode)) {
  46. throw new Exception("番号与站点不一致");
  47. }
  48. videoInfoPool.setIdentificationCode(iCode);
  49. // 发行日期
  50. pEle = pEles.get(1);
  51. String issueDate = pEle.text().split(":")[1].replace("\"", "").trim();
  52. videoInfoPool.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
  53. // 长度
  54. pEle = pEles.get(2);
  55. String length = pEle.text().split(":")[1].replace("\"", "").trim();
  56. videoInfoPool.setLength(length);
  57. // 导演
  58. Elements directorEles = container.select("div.info").select("p:contains(導演)");
  59. if (directorEles.size() > 0) {
  60. pEle = directorEles.first().select("a[href]").first();
  61. videoInfoPool.setDirector(pEle.text());
  62. }
  63. // 制作商
  64. Elements markerEles = container.select("div.info").select("p:contains(製作商)");
  65. if (markerEles.size() > 0) {
  66. pEle = markerEles.first().select("a[href]").first();
  67. videoInfoPool.setMaker(pEle.text());
  68. }
  69. // 发行商
  70. Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
  71. if (issuerEles.size() > 0) {
  72. pEle = issuerEles.first().select("a[href]").first();
  73. videoInfoPool.setIssuer(pEle.text());
  74. }
  75. // 类别
  76. Elements genresEles = container.select("div.info").select("p:contains(類別)");
  77. if (genresEles.size() > 0) {
  78. StringBuffer sb = new StringBuffer();
  79. Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
  80. for (Element ahrefEle : ahrefEles) {
  81. sb.append(ahrefEle.text()).append(",");
  82. }
  83. if (sb.length() > 0) {
  84. sb = sb.deleteCharAt(sb.length() - 1);
  85. }
  86. videoInfoPool.setGenres(sb.toString());
  87. }
  88. // 演员
  89. Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
  90. if (castEles.size() > 0) {
  91. Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
  92. if (castElesTemp.size() == 0) {
  93. StringBuffer sb = new StringBuffer();
  94. Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
  95. for (Element ahrefEle : ahrefEles) {
  96. sb.append(ahrefEle.text()).append(",");
  97. }
  98. if (sb.length() > 0) {
  99. sb = sb.deleteCharAt(sb.length() - 1);
  100. }
  101. videoInfoPool.setCast(sb.toString());
  102. }
  103. }
  104. // 图片URL
  105. String href = container.select("a.bigImage").first().attr("abs:href");
  106. Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
  107. String machiPath = "F:/1/0/2/4/视频/电影/A级(成人级)/码池/";
  108. String fileName = issueDate.concat(" ").concat(h3).concat(".jpg");
  109. //saveFile(response.bodyStream(), machiPath.concat(fileName));
  110. videoInfoPool.setImgUrl(fileName);
  111. videoInfoPool.setCreateTime(LocalDateTime.now());
  112. }
  113. /**
  114. * 保存文件到本地
  115. *
  116. * @param bufferedInputStream
  117. * @param savePath
  118. */
  119. private static void saveFile(BufferedInputStream bufferedInputStream, String savePath) throws IOException {
  120. //一次最多读取1k
  121. byte[] buffer = new byte[1024];
  122. //实际读取的长度
  123. int readLenghth;
  124. //创建的一个写出的缓冲流
  125. BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(new File(savePath)));
  126. //文件逐步写入本地
  127. while ((readLenghth = bufferedInputStream.read(buffer, 0, 1024)) != -1) {//先读出来,保存在buffer数组中
  128. bufferedOutputStream.write(buffer, 0, readLenghth);//再从buffer中取出来保存到本地
  129. }
  130. //关闭缓冲流
  131. bufferedOutputStream.close();
  132. bufferedInputStream.close();
  133. }
  134. }