Selaa lähdekoodia

add:码池数据抓取v1

tujidelv 3 vuotta sitten
vanhempi
commit
bc83ceaf1d

+ 6 - 0
pom.xml

@@ -90,6 +90,12 @@
             <artifactId>fastjson</artifactId>
             <version>1.2.54</version>
         </dependency>
+        <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.14.3</version>
+        </dependency>
     </dependencies>
 
     <build>

+ 17 - 0
src/main/java/top/lvzhiqiang/controller/BgController.java

@@ -67,4 +67,21 @@ public class BgController {
         bgService.uploadFile4IdentificationCode(file.getInputStream());
         return R.ok();
     }
+
+    /**
+     * Jsoup IcodePool
+     *
+     * @author lvzhiqiang
+     * 2022/4/16 16:10
+     */
+    @RequestMapping("/jsoupIcodePool")
+    @ResponseBody
+    public R jsoupIcodePool(Integer status) {
+        if (null == status) {
+            status = 1;
+        }
+
+        bgService.jsoupIcodePool(status);
+        return R.ok();
+    }
 }

+ 12 - 1
src/main/java/top/lvzhiqiang/entity/IcodePool.java

@@ -26,10 +26,21 @@ public class IcodePool implements Serializable {
     private String identificationCode;
 
     /**
-     * 状态(1:待爬取,2:爬取成功,3:爬取失败)
+     * 状态(1:待爬取,2:爬取成功,3:爬取失败,4:主表已存在
      */
     private Integer status;
 
+
+    /**
+     * 失败原因
+     */
+    private String failureCause;
+
+    /**
+     * 已重试次数
+     */
+    private Integer retryCount;
+
     /**
      * 删除标志{1:正常,2:已删除}
      */

+ 97 - 0
src/main/java/top/lvzhiqiang/entity/VideoInfoPool.java

@@ -0,0 +1,97 @@
+package top.lvzhiqiang.entity;
+
+import com.fasterxml.jackson.annotation.JsonFormat;
+import lombok.Data;
+import top.lvzhiqiang.util.DateUtils;
+
+import java.io.Serializable;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+
+/**
+ * 电影信息池
+ *
+ * @author lvzhiqiang
+ * 2022/4/16 18:03
+ */
+@Data
+public class VideoInfoPool implements Serializable {
+
+    /**
+     * 主键
+     */
+    private Long id;
+
+    /**
+     * 名称
+     */
+    private String name;
+
+    /**
+     * 识别码
+     */
+    private String identificationCode;
+
+    /**
+     * 发行日期
+     */
+    @JsonFormat(pattern = DateUtils.PATTERN_TO_DAYS)
+    private LocalDate issueDate;
+
+    /**
+     * 长度
+     */
+    private String length;
+
+    /**
+     * 导演
+     */
+    private String director;
+
+    /**
+     * 制作商
+     */
+    private String maker;
+
+    /**
+     * 发行商
+     */
+    private String issuer;
+
+    /**
+     * 类别
+     */
+    private String genres;
+
+    /**
+     * 演员
+     */
+    private String cast;
+
+    /**
+     * 图片URL
+     */
+    private String imgUrl;
+
+    /**
+     * 类型{1:待审查,2:审查已通过,3:审查未通过}
+     */
+    private Integer type;
+
+    /**
+     * 删除标志{1:正常,2:已删除}
+     */
+    private Integer deleteFlag;
+
+    /**
+     * 创建时间
+     */
+    @JsonFormat(pattern = DateUtils.PATTERN_TO_SECONDS)
+    private LocalDateTime createTime;
+
+    /**
+     * 最后修改时间
+     */
+    @JsonFormat(pattern = DateUtils.PATTERN_TO_SECONDS)
+    private LocalDateTime modifyTime;
+}

+ 10 - 1
src/main/java/top/lvzhiqiang/mapper/IcodePoolMapper.java

@@ -3,7 +3,7 @@ package top.lvzhiqiang.mapper;
 import org.apache.ibatis.annotations.Delete;
 import org.apache.ibatis.annotations.Insert;
 import org.apache.ibatis.annotations.Select;
-import top.lvzhiqiang.entity.DicCode;
+import org.apache.ibatis.annotations.Update;
 import top.lvzhiqiang.entity.IcodePool;
 
 import java.util.List;
@@ -47,4 +47,13 @@ public interface IcodePoolMapper {
      */
     @Select("SELECT identification_code FROM icode_pool WHERE delete_flag = 1")
     List<String> findIcode();
+
+    /**
+     * 更新状态
+     *
+     * @param icodePool
+     * @return
+     */
+    @Update("update icode_pool set status = #{status},failure_cause = #{failureCause},retry_count = #{retryCount},modify_time = now() where identification_code = #{identificationCode}")
+    int updateStatus(IcodePool icodePool);
 }

+ 6 - 0
src/main/java/top/lvzhiqiang/mapper/VideoInfoMapper.java

@@ -56,6 +56,12 @@ public interface VideoInfoMapper {
     List<VideoInfo> findAll();
 
     /**
+     * 查询所有识别码
+     */
+    @Select("select distinct identification_code from video_info")
+    List<String> findAllIcode();
+
+    /**
      * 根据条件查询
      */
     List<VideoInfo> getVideoInfoList(Map<String, Object> params);

+ 51 - 0
src/main/java/top/lvzhiqiang/mapper/VideoInfoPoolMapper.java

@@ -0,0 +1,51 @@
+package top.lvzhiqiang.mapper;
+
+import org.apache.ibatis.annotations.Delete;
+import org.apache.ibatis.annotations.Insert;
+import org.apache.ibatis.annotations.Options;
+import top.lvzhiqiang.entity.VideoInfoPool;
+
+import java.util.List;
+
+/**
+ * 电影信息池Mapper
+ *
+ * @author lvzhiqiang
+ * 2022/4/16 18:03
+ */
+public interface VideoInfoPoolMapper {
+
+    /**
+     * 删除所有
+     */
+    @Delete("DELETE FROM video_info_pool where 1=1")
+    void deleteAll();
+
+    /**
+     * 批量新增
+     *
+     * @param videoInfoPoolList
+     */
+    @Insert({"<script>" +
+            "INSERT INTO video_info_pool(name, identification_code, issue_date, length, director, maker, issuer, genres, cast, img_url, type, create_time, modify_time) " +
+            "VALUES " +
+            "<foreach collection='list' item='vip' index=\"index\" separator=\",\">" +
+            "   (#{vip.name}, #{vip.identificationCode}, #{vip.issueDate}, #{vip.length}, #{vip.director}, #{vip.maker}, #{vip.issuer}, #{vip.genres}, #{vip.cast}, #{vip.imgUrl}, #{vip.type}, #{vip.createTime}, now())" +
+            " </foreach>" +
+            "</script>"})
+    int insertList(List<VideoInfoPool> videoInfoPoolList);
+
+    /**
+     * 新增
+     *
+     * @param videoInfoPool
+     */
+    @Insert("INSERT INTO video_info_pool(name, identification_code, issue_date, length, director, maker, issuer, genres, cast, img_url, type, create_time, modify_time) " +
+            "VALUES (#{name}, #{identificationCode}, #{issueDate}, #{length}, #{director}, #{maker}, #{issuer}, #{genres}, #{cast}, #{imgUrl}, #{type}, #{createTime}, now())")
+    @Options(useGeneratedKeys = true, keyProperty = "id", keyColumn = "id")
+    int insert(VideoInfoPool videoInfoPool);
+
+    /**
+     * 根据条件查询
+     */
+}

+ 7 - 0
src/main/java/top/lvzhiqiang/service/BgService.java

@@ -22,4 +22,11 @@ public interface BgService {
      * @param is
      */
     void uploadFile4IdentificationCode(InputStream is);
+
+    /**
+     * Jsoup IcodePool
+     *
+     * @param status
+     */
+    void jsoupIcodePool(Integer status);
 }

+ 191 - 0
src/main/java/top/lvzhiqiang/service/impl/BgServiceImpl.java

@@ -1,6 +1,11 @@
 package top.lvzhiqiang.service.impl;
 
 import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Connection;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
 import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Propagation;
@@ -17,6 +22,7 @@ import javax.annotation.Resource;
 import java.io.*;
 import java.time.Instant;
 import java.time.LocalDate;
+import java.time.LocalDateTime;
 import java.time.ZoneOffset;
 import java.util.ArrayList;
 import java.util.List;
@@ -46,6 +52,8 @@ public class BgServiceImpl implements BgService {
     private VideoInfoMapper videoInfoMapper;
     @Resource
     private IcodePoolMapper icodePoolMapper;
+    @Resource
+    private VideoInfoPoolMapper videoInfoPoolMapper;
 
     /**
      * 初始化骑兵数据
@@ -97,6 +105,189 @@ public class BgServiceImpl implements BgService {
         log.warn("uploadFile4IdentificationCode:success={}", num);
     }
 
+    /**
+     * Jsoup IcodePool
+     *
+     * @param status
+     */
+    @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
+    @Async
+    @Override
+    public void jsoupIcodePool(Integer status) {
+        // 获取待抓取码列表
+        List<String> icodePoolList = icodePoolMapper.findIcodeByStatus(status);
+        // 获取主表所有识别码
+        List<String> allIcode = videoInfoMapper.findAllIcode();
+
+        // 获取javbus防屏蔽地址
+        DicCode dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 2 == x.getType() && "javbus".equals(x.getCodeKey())).findFirst().get();
+        if (dicCode == null) {
+            return;
+        }
+        String javbusUrl = dicCode.getCodeValue();
+        // 校验地址
+        try {
+            Jsoup.connect(javbusUrl.concat(javbusUrl));
+            log.info("jsoupIcodePool:javbus防屏蔽地址有效!javbusUrl={}", javbusUrl);
+        } catch (Exception e) {
+            log.error("jsoupIcodePool:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl, e);
+            return;
+        }
+
+        // 获取码池图片保存路径
+        String machiPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "machi_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
+
+        // 遍历
+        Document document;
+        VideoInfoPool videoInfoPool;
+        for (int i = 0; i < icodePoolList.size(); i++) {
+            String identificationCode = icodePoolList.get(i);
+
+            IcodePool icodePool = new IcodePool();
+            icodePool.setIdentificationCode(identificationCode);
+            if (allIcode.contains(identificationCode)) {
+                icodePool.setStatus(4);
+                icodePool.setRetryCount(0);
+                icodePoolMapper.updateStatus(icodePool);
+                log.warn("jsoupIcodePool exists:i={},identificationCode={}", i, identificationCode);
+                continue;
+            }
+
+            try {
+                document = Jsoup.connect(javbusUrl.concat(identificationCode)).timeout(50000).get();
+
+                videoInfoPool = new VideoInfoPool();
+                parseDocument(document, identificationCode, machiPath, videoInfoPool);
+                if (videoInfoPool != null) {
+                    icodePool.setStatus(2);
+                    icodePool.setRetryCount(0);
+                    icodePoolMapper.updateStatus(icodePool);
+                    videoInfoPoolMapper.insert(videoInfoPool);
+                }
+
+                log.info("jsoupIcodePool success:i={},identificationCode={}", i, identificationCode);
+            } catch (Exception e) {
+                icodePool.setStatus(3);
+                icodePool.setRetryCount(0);
+                icodePool.setFailureCause(e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200));
+                icodePoolMapper.updateStatus(icodePool);
+                log.error("jsoupIcodePool error:i={},identificationCode={}", i, identificationCode, e);
+            }
+        }
+    }
+
+    private void parseDocument(Document document, String identificationCode, String machiPath, VideoInfoPool videoInfoPool) throws Exception {
+        Elements container = document.select("div.container");
+        if (container.size() == 0) {
+            throw new Exception("番号无效!");
+        }
+
+        // 名称
+        String h3 = container.select("h3").first().text();
+        String[] nameArr = h3.split("\\s+");
+        if (nameArr.length > 1) {
+            videoInfoPool.setName(h3.substring(nameArr[0].length()).trim());
+        } else {
+            videoInfoPool.setName(nameArr[0]);
+        }
+
+        Elements pEles = container.select("div.info > p");
+        // 识别码
+        Element pEle = pEles.get(0);
+        String iCode = pEle.select("span[style]").first().text();
+        if (!identificationCode.equalsIgnoreCase(iCode)) {
+            throw new Exception("番号与站点不一致");
+        }
+        videoInfoPool.setIdentificationCode(iCode);
+        // 发行日期
+        pEle = pEles.get(1);
+        String issueDate = pEle.text().split(":")[1].replace("\"", "").trim();
+        videoInfoPool.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
+        // 长度
+        pEle = pEles.get(2);
+        String length = pEle.text().split(":")[1].replace("\"", "").trim();
+        videoInfoPool.setLength(length);
+        // 导演
+        Elements directorEles = container.select("div.info").select("p:contains(導演)");
+        if (directorEles.size() > 0) {
+            pEle = directorEles.first().select("a[href]").first();
+            videoInfoPool.setDirector(pEle.text());
+        }
+        // 制作商
+        Elements markerEles = container.select("div.info").select("p:contains(製作商)");
+        if (markerEles.size() > 0) {
+            pEle = markerEles.first().select("a[href]").first();
+            videoInfoPool.setMaker(pEle.text());
+        }
+        // 发行商
+        Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
+        if (issuerEles.size() > 0) {
+            pEle = issuerEles.first().select("a[href]").first();
+            videoInfoPool.setIssuer(pEle.text());
+        }
+        // 类别
+        Elements genresEles = container.select("div.info").select("p:contains(類別)");
+        if (genresEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text()).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            videoInfoPool.setGenres(sb.toString());
+        }
+        // 演员
+        Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
+        if (castEles.size() > 0) {
+            Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
+            if (castElesTemp.size() == 0) {
+                StringBuffer sb = new StringBuffer();
+                Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
+                for (Element ahrefEle : ahrefEles) {
+                    sb.append(ahrefEle.text()).append(",");
+                }
+                if (sb.length() > 0) {
+                    sb = sb.deleteCharAt(sb.length() - 1);
+                }
+                videoInfoPool.setCast(sb.toString());
+            }
+        }
+        // 图片URL
+        String href = container.select("a.bigImage").first().attr("abs:href");
+
+        Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+        String fileName = issueDate.concat(" ").concat(h3).concat(".jpg");
+        saveFile(response.bodyStream(), machiPath.concat(fileName));
+        videoInfoPool.setImgUrl(fileName);
+
+        videoInfoPool.setCreateTime(LocalDateTime.now());
+        videoInfoPool.setType(1);
+    }
+
+    /**
+     * 保存文件到本地
+     *
+     * @param bufferedInputStream
+     * @param savePath
+     */
+    private void saveFile(BufferedInputStream bufferedInputStream, String savePath) throws IOException {
+        //一次最多读取1k
+        byte[] buffer = new byte[1024];
+        //实际读取的长度
+        int readLenghth;
+        //创建的一个写出的缓冲流
+        BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(new File(savePath)));
+        //文件逐步写入本地
+        while ((readLenghth = bufferedInputStream.read(buffer, 0, 1024)) != -1) {//先读出来,保存在buffer数组中
+            bufferedOutputStream.write(buffer, 0, readLenghth);//再从buffer中取出来保存到本地
+        }
+        //关闭缓冲流
+        bufferedOutputStream.close();
+        bufferedInputStream.close();
+    }
+
     // 递归获取某目录下的所有子目录以及子文件
     private void getAllFilePaths(String filePath, JavAllInfo javAllInfo) {
         File[] files = new File(filePath).listFiles();

+ 173 - 0
src/main/java/top/lvzhiqiang/util/JsoupUtil.java

@@ -0,0 +1,173 @@
+package top.lvzhiqiang.util;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+public class JsoupUtil {
+
+    private static Document doc = null;
+    private static Elements eles = null;
+
+
+    /**
+     * 获取xml文件(file格式)
+     *
+     * @param file
+     */
+    public static Document setXmlFile(File file) {
+        try {
+            doc = Jsoup.parse(file, "UTF-8");
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return doc;
+    }
+
+
+    /**
+     * 获取xml文件(绝对路径)
+     *
+     * @param path
+     */
+    public static Document setXmlFile(String path) {
+        try {
+            File file = new File(path);
+            doc = Jsoup.parse(file, "UTF-8");
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return doc;
+    }
+
+
+    /**
+     * 根据拼接节点获取元素集合
+     *
+     * @param nodeQuery
+     * @return
+     */
+    public static Elements getEles(String nodeQuery) {
+        try {
+            eles = doc.select(nodeQuery);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+
+        return eles;
+    }
+
+
+    public static void main(String[] args) {
+
+		/*String http="http://zizhan.mot.gov.cn/sj/kejs/kejifzh_kjs/";
+		List<String> newsLink =new ArrayList<String>();
+		try {
+			//获取所需要的所有页面链接
+			Document doc=Jsoup.connect(http).get();
+			Elements eles=doc.select("div.main_cont1 > ul > li > a");
+			for (Element element : eles) {
+				
+				String href=element.attr("href").toString();
+				href=href.substring(8);
+				newsLink.add("http://zizhan.mot.gov.cn"+href);
+			}
+			
+			//从每个页面中获取所需字段
+			for (String str : newsLink) {
+				try {
+					doc=Jsoup.connect(str).get();
+					String title=doc.select("div#cont_detail > div.docTitleCls").text();
+					String content=doc.select("div#cont_detail > div").get(1).html();
+					if(title==null || title.equals("") || content==null || content.equals(""))
+						continue;
+					//获取来源和时间
+					Elements ele=doc.select("div.continfo>table>tbody>tr").get(2).select("td");
+					String createDate=ele.get(0).text().replaceAll("发文日期:","");
+					
+					Elements elesource=doc.select("div.continfo>table>tbody>tr").get(1).select("td");
+					String source=elesource.get(1).text().replaceAll("发布机构:","");
+					System.out.println(createDate);
+					System.out.println(title);
+					System.out.println(source);
+					System.out.println(content);
+					System.out.println("=============================================");
+					
+				} catch (Exception e) {
+					continue;
+				}
+		
+				
+			}
+			
+			
+			
+		} catch (Exception e) {
+			e.printStackTrace();
+		}*/
+
+
+        try {
+
+            List<String> newsLink = new ArrayList<String>();
+            Document doc = Jsoup.connect("http://www.cmzz100.com/cn/G100/toutiao.html").get();
+
+            //Elements eles=doc.select("table.border-style>tbody>tr>td>table>tbody>tr").get(1).select("table>tbody>tr>td>table>tbody>tr").get(1).select("td");
+            Elements eles = doc.select("div.title>a");
+
+
+            for (Element element : eles) {
+                newsLink.add("http://www.cmzz100.com" + element.attr("href"));
+            }
+
+
+            //从每个页面中获取所需字段
+            for (String str : newsLink) {
+                try {
+                    doc = Jsoup.connect(str).get();
+                    String title = doc.select("div.title").text();
+                    Elements ele1 = doc.select("div.info");
+                    for (Element el : ele1) {
+                        if (!doc.select("div.info").get(0).select("a>img").equals("")) {
+                            Elements ele2 = doc.select("div.info").get(0).select("a>img");
+                            ele2.attr("src", "http://www.cmzz100.com" + ele2.attr("src"));
+                        }
+                        Elements els3 = doc.select("div.info p>img[src]");
+                        els3.attr("src", "http://www.cmzz100.com" + els3.attr("src"));
+                        //	System.out.println(els3.toString());
+                        //	System.out.println("=============================================");
+                        //	el.attr("src","http://www.cmzz100.com"+el.attr("src"));
+                        //	System.out.print(el.toString());
+                    }
+                    String content = doc.select("div.info").html();
+
+                    System.out.print(ele1);
+                    System.out.println("=============================================");
+                    if (title == null || title.equals("") || content == null || content.equals(""))
+                        continue;
+                    //获取来源和时间
+                    Elements ele = doc.select("div.datetime");
+                    String createDate = ele.text().substring(0, ele.text().indexOf("|")).replaceAll("发表:", "");
+
+                    //System.out.println(createDate);
+
+
+                    //System.out.println(title);
+                    //System.out.println(source);
+                    //System.out.println(content);
+
+
+                } catch (Exception e) {
+                    continue;
+                }
+            }
+        } catch (Exception e) {
+            // TODO: handle exception
+        }
+    }
+}

+ 9 - 0
src/main/resources/static/bg.html

@@ -41,6 +41,15 @@
             <input type="submit" value="提交">
         </form>
     </div>
+    <br/>
+    <div style="margin-right:20px;">
+        <span class="font">jsoupIcodePool</span>
+        <form method="post" action="bg/jsoupIcodePool" enctype="multipart/form-data">
+            <span>status</span>
+            <input type="text" name="status" placeholder="1:未爬取,2:爬取失败"/>
+            <input type="submit" value="提交">
+        </form>
+    </div>
 </div>
 </body>
 </html>

+ 145 - 14
src/test/java/Test.java

@@ -1,12 +1,14 @@
-import top.lvzhiqiang.entity.VideoInfo;
-import top.lvzhiqiang.entity.VideoInfoGenres;
+import org.jsoup.Connection;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import top.lvzhiqiang.entity.VideoInfoPool;
 import top.lvzhiqiang.util.DateUtils;
 
-import java.io.File;
-import java.time.Instant;
+import java.io.*;
 import java.time.LocalDate;
-import java.time.ZoneOffset;
-import java.util.*;
+import java.time.LocalDateTime;
 
 public class Test {
     public static void main(String[] args) {
@@ -40,13 +42,142 @@ public class Test {
         //videoInfoGenresSet.add(videoInfoGenres3);
         //System.out.println(videoInfoGenresSet);
 
-        List<String> s1 = new ArrayList<>();
-        s1.add("aa");
-        s1.add("bb");
-        List<String> s2 = new ArrayList<>();
-        s2.add("cc");
-        s2.add("bb");
-        s1.removeAll(s2);
-        System.out.println(s1);
+        //List<String> s1 = new ArrayList<>();
+        //s1.add("aa");
+        //s1.add("bb");
+        //List<String> s2 = new ArrayList<>();
+        //s2.add("cc");
+        //s2.add("bb");
+        //s1.removeAll(s2);
+        //System.out.println(s1);
+
+
+
+        String javbusUrl = "https://www.seejav.men/";
+        String identificationCode = "DANDY-745";// DTT-049  HAWA-243  HISN-011 DANDY-745  VOSS-172  PFES-024 VIDA-005  SHKD-843  CAWD-176  BLK-467
+        Document document;
+        VideoInfoPool videoInfoPool;
+        try {
+            document = Jsoup.connect(javbusUrl.concat(identificationCode)).timeout(50000).get();
+
+            videoInfoPool = new VideoInfoPool();
+            parseDocument(document, identificationCode, videoInfoPool);
+
+            System.out.println(videoInfoPool);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private static void parseDocument(Document document, String identificationCode, VideoInfoPool videoInfoPool) throws Exception {
+        Elements container = document.select("div.container");
+        if (container.size() == 0) {
+            throw new Exception("番号无效!");
+        }
+
+        // 名称
+        String h3 = container.select("h3").first().text();
+        String[] nameArr = h3.split("\\s+");
+        if (nameArr.length > 1) {
+            videoInfoPool.setName(h3.substring(nameArr[0].length()).trim());
+        } else {
+            videoInfoPool.setName(nameArr[0]);
+        }
+
+        Elements pEles = container.select("div.info > p");
+        // 识别码
+        Element pEle = pEles.get(0);
+        String iCode = pEle.select("span[style]").first().text();
+        if (!identificationCode.equalsIgnoreCase(iCode)) {
+            throw new Exception("番号与站点不一致");
+        }
+        videoInfoPool.setIdentificationCode(iCode);
+        // 发行日期
+        pEle = pEles.get(1);
+        String issueDate = pEle.text().split(":")[1].replace("\"", "").trim();
+        videoInfoPool.setIssueDate(LocalDate.parse(issueDate, DateUtils.dateFormatter));
+        // 长度
+        pEle = pEles.get(2);
+        String length = pEle.text().split(":")[1].replace("\"", "").trim();
+        videoInfoPool.setLength(length);
+        // 导演
+        Elements directorEles = container.select("div.info").select("p:contains(導演)");
+        if (directorEles.size() > 0) {
+            pEle = directorEles.first().select("a[href]").first();
+            videoInfoPool.setDirector(pEle.text());
+        }
+        // 制作商
+        Elements markerEles = container.select("div.info").select("p:contains(製作商)");
+        if (markerEles.size() > 0) {
+            pEle = markerEles.first().select("a[href]").first();
+            videoInfoPool.setMaker(pEle.text());
+        }
+        // 发行商
+        Elements issuerEles = container.select("div.info").select("p:contains(發行商)");
+        if (issuerEles.size() > 0) {
+            pEle = issuerEles.first().select("a[href]").first();
+            videoInfoPool.setIssuer(pEle.text());
+        }
+        // 类别
+        Elements genresEles = container.select("div.info").select("p:contains(類別)");
+        if (genresEles.size() > 0) {
+            StringBuffer sb = new StringBuffer();
+            Elements ahrefEles = genresEles.first().nextElementSibling().select("a[href]");
+            for (Element ahrefEle : ahrefEles) {
+                sb.append(ahrefEle.text()).append(",");
+            }
+            if (sb.length() > 0) {
+                sb = sb.deleteCharAt(sb.length() - 1);
+            }
+            videoInfoPool.setGenres(sb.toString());
+        }
+        // 演员
+        Elements castEles = container.select("div.info").select("p.star-show:contains(演員)");
+        if (castEles.size() > 0) {
+            Elements castElesTemp = container.select("div.info:contains(暫無出演者資訊)");
+            if (castElesTemp.size() == 0) {
+                StringBuffer sb = new StringBuffer();
+                Elements ahrefEles = castEles.first().nextElementSibling().nextElementSibling().select("a[href]");
+                for (Element ahrefEle : ahrefEles) {
+                    sb.append(ahrefEle.text()).append(",");
+                }
+                if (sb.length() > 0) {
+                    sb = sb.deleteCharAt(sb.length() - 1);
+                }
+                videoInfoPool.setCast(sb.toString());
+            }
+        }
+        // 图片URL
+        String href = container.select("a.bigImage").first().attr("abs:href");
+
+        Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
+        String machiPath = "F:/1/0/2/4/视频/电影/A级(成人级)/码池/";
+        String fileName = issueDate.concat(" ").concat(h3).concat(".jpg");
+        //saveFile(response.bodyStream(), machiPath.concat(fileName));
+        videoInfoPool.setImgUrl(fileName);
+
+        videoInfoPool.setCreateTime(LocalDateTime.now());
+    }
+
+    /**
+     * 保存文件到本地
+     *
+     * @param bufferedInputStream
+     * @param savePath
+     */
+    private static void saveFile(BufferedInputStream bufferedInputStream, String savePath) throws IOException {
+        //一次最多读取1k
+        byte[] buffer = new byte[1024];
+        //实际读取的长度
+        int readLenghth;
+        //创建的一个写出的缓冲流
+        BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(new File(savePath)));
+        //文件逐步写入本地
+        while ((readLenghth = bufferedInputStream.read(buffer, 0, 1024)) != -1) {//先读出来,保存在buffer数组中
+            bufferedOutputStream.write(buffer, 0, readLenghth);//再从buffer中取出来保存到本地
+        }
+        //关闭缓冲流
+        bufferedOutputStream.close();
+        bufferedInputStream.close();
     }
 }