Преглед изворни кода

update:javbus站点防屏蔽地址抓取v1

tujidelv пре 3 година
родитељ
комит
0749869b8b

+ 22 - 0
pom.xml

@@ -25,6 +25,28 @@
         <version>2.1.9.RELEASE</version>
     </parent>
 
+    <!--配置不同的profile,对应不同的运行环境-->
+    <profiles>
+        <profile>
+            <!-- 开发 -->
+            <id>dev</id>
+            <activation>
+                <!--默认开发环境-->
+                <activeByDefault>true</activeByDefault>
+            </activation>
+            <properties>
+                <activatedProperties>dev</activatedProperties>
+            </properties>
+        </profile>
+        <profile>
+            <!-- 测试 -->
+            <id>test</id>
+            <properties>
+                <activatedProperties>test</activatedProperties>
+            </properties>
+        </profile>
+    </profiles>
+
     <dependencies>
         <dependency>
             <groupId>org.springframework.boot</groupId>

+ 2 - 0
src/main/java/top/lvzhiqiang/App.java

@@ -4,10 +4,12 @@ import org.mybatis.spring.annotation.MapperScan;
 import org.springframework.boot.SpringApplication;
 import org.springframework.boot.autoconfigure.SpringBootApplication;
 import org.springframework.scheduling.annotation.EnableAsync;
+import org.springframework.scheduling.annotation.EnableScheduling;
 
 @MapperScan(basePackages = "top.lvzhiqiang.mapper")
 @SpringBootApplication
 @EnableAsync
+@EnableScheduling
 public class App {
     public static void main(String[] args) {
         SpringApplication.run(App.class, args);

+ 114 - 0
src/main/java/top/lvzhiqiang/config/MyJobs.java

@@ -0,0 +1,114 @@
+package top.lvzhiqiang.config;
+
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.springframework.scheduling.annotation.Scheduled;
+import org.springframework.stereotype.Component;
+import org.springframework.transaction.annotation.Propagation;
+import org.springframework.transaction.annotation.Transactional;
+import top.lvzhiqiang.entity.DicCode;
+import top.lvzhiqiang.entity.VideoSitePool;
+import top.lvzhiqiang.mapper.VideoSitePoolMapper;
+
+import javax.annotation.Resource;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * 定时任务
+ *
+ * @author lvzhiqiang
+ * 2022/4/28 15:49
+ */
+@Component
+@Slf4j
+public class MyJobs {
+
+    @Resource
+    private VideoSitePoolMapper videoSitePoolMapper;
+
+    private static final String SCHEDULED_ZONE = "Asia/Shanghai";
+
+    /**
+     * 每天6点校验站点有效性
+     */
+    @Scheduled(cron = "0 0 6 * * ?", zone = SCHEDULED_ZONE)
+    //@Scheduled(cron = "0 10 19 * * ?",zone = SCHEDULED_ZONE)
+    @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
+    public void checkVideoSite() {
+        log.info("checkVideoSite开始==============================");
+        // 获取javbus官方地址
+        DicCode dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 2 == x.getType() && "javbus".equals(x.getCodeKey())).findFirst().get();
+        if (dicCode == null) {
+            log.warn("javbus官方站点为Null");
+            return;
+        }
+
+        // 获取javbusNewUrlList
+        Set<String> javbusNewUrlList = new HashSet<>();
+        try {
+            Document document = Jsoup.connect(dicCode.getCodeValue()).timeout(50000).ignoreContentType(true).get();
+
+            Elements ahrefList = document.select("strong:contains(防屏蔽地址)").next("a");
+            for (Element element : ahrefList) {
+                String text = element.text();
+                log.info("javbus防屏蔽地址:{}", text);
+                javbusNewUrlList.add(text);
+            }
+        } catch (Exception e) {
+            log.error("Jsoup抓取javbus防屏蔽地址异常", e);
+        }
+
+        // 获取javbusUrlList
+        List<String> javbusUrlList = videoSitePoolMapper.findUrlByType(1);
+
+        if (javbusNewUrlList.size() == 0) {
+            log.warn("javbusNewUrlList为空");
+        }
+        if (javbusNewUrlList.size() == 0 && javbusUrlList.size() == 0) {
+            log.warn("javbusUrlList和javbusNewUrlList为空");
+            return;
+        }
+
+        // 校验新地址
+        List<String> javbusNewUrlFinalList = javbusNewUrlList.stream().filter(e -> !javbusUrlList.contains(e)).collect(Collectors.toList());
+        List<VideoSitePool> videoSitePoolList = new ArrayList<>();
+        VideoSitePool videoSitePool;
+        for (String javbusNewUrlFinal : javbusNewUrlFinalList) {
+            try {
+                Jsoup.connect(javbusNewUrlFinal).timeout(50000);
+
+                videoSitePool = new VideoSitePool();
+                videoSitePool.setUrl(javbusNewUrlFinal);
+                videoSitePool.setType(1);
+                videoSitePoolList.add(videoSitePool);
+                log.info("javbusNewUrlFinalList:javbus防屏蔽地址有效!javbusUrl={}", javbusNewUrlFinal);
+            } catch (Exception e) {
+                log.error("javbusNewUrlFinalList:javbus防屏蔽地址失效!javbusUrl={}", javbusNewUrlFinal, e);
+            }
+        }
+        if (videoSitePoolList.size() > 0) {
+            videoSitePoolMapper.insertList(videoSitePoolList);
+        }
+        // 校验存量地址
+        for (String javbusUrl : javbusUrlList) {
+            int deleteFlag = 1;
+            try {
+                Jsoup.connect(javbusUrl).timeout(50000);
+                log.info("javbusUrlList:javbus防屏蔽地址有效!javbusUrl={}", javbusUrl);
+            } catch (Exception e) {
+                deleteFlag = 2;
+                log.error("javbusUrlList:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl, e);
+            }
+            videoSitePoolMapper.updateDeleteFlag(javbusUrl, deleteFlag);
+        }
+
+        log.info("checkVideoSite结束==============================");
+    }
+}

+ 49 - 0
src/main/java/top/lvzhiqiang/entity/VideoSitePool.java

@@ -0,0 +1,49 @@
+package top.lvzhiqiang.entity;
+
+import com.fasterxml.jackson.annotation.JsonFormat;
+import lombok.Data;
+
+import java.io.Serializable;
+import java.time.LocalDateTime;
+
+/**
+ * 电影站点池表
+ *
+ * @author lvzhiqiang
+ * 2022/4/28 15:53
+ */
+@Data
+public class VideoSitePool implements Serializable {
+
+    /**
+     * 主键
+     */
+    private Long id;
+
+    /**
+     * url
+     */
+    private String url;
+
+    /**
+     * 类型{1:javbus}
+     */
+    private Integer type;
+
+    /**
+     * 删除标志(1:正常,2:失效)
+     */
+    private Integer deleteFlag;
+
+    /**
+     * 创建时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime createTime;
+
+    /**
+     * 最后修改时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime modifyTime;
+}

+ 67 - 0
src/main/java/top/lvzhiqiang/mapper/VideoSitePoolMapper.java

@@ -0,0 +1,67 @@
+package top.lvzhiqiang.mapper;
+
+import org.apache.ibatis.annotations.*;
+import top.lvzhiqiang.entity.VideoSitePool;
+
+import java.util.List;
+
+/**
+ * 电影站点池Mapper
+ *
+ * @author lvzhiqiang
+ * 2022/4/28 15:53
+ */
+public interface VideoSitePoolMapper {
+
+    /**
+     * 删除所有
+     */
+    @Delete("DELETE FROM video_site_pool where 1=1")
+    void deleteAll();
+
+    /**
+     * 批量新增
+     *
+     * @param videoSitePoolList
+     */
+    @Insert({"<script>" +
+            "INSERT INTO video_site_pool(url, type, create_time, modify_time) " +
+            "VALUES " +
+            "<foreach collection='list' item='vsp' index=\"index\" separator=\",\">" +
+            "   (#{vsp.url}, #{vsp.type}, now(), now())" +
+            " </foreach>" +
+            "</script>"})
+    int insertList(List<VideoSitePool> videoSitePoolList);
+
+    /**
+     * 新增
+     *
+     * @param videoSitePool
+     */
+    @Insert("INSERT INTO video_site_pool(url, type, create_time, modify_time) " +
+            "VALUES (#{url}, #{type}, now(), now())")
+    @Options(useGeneratedKeys = true, keyProperty = "id", keyColumn = "id")
+    int insert(VideoSitePool videoSitePool);
+
+    /**
+     * 查询所有
+     */
+    @Select("SELECT url FROM video_site_pool where type = #{type}")
+    List<String> findUrlByType(@Param("type") Integer type);
+
+    /**
+     * 查询所有
+     */
+    @Select("SELECT url FROM video_site_pool where type = #{type} and delete_flag = #{deleteFlag}")
+    List<String> findUrlByTypeAndDeleteFlag(@Param("type") Integer type, @Param("deleteFlag") Integer deleteFlag);
+
+    /**
+     * 更新状态
+     *
+     * @param url
+     * @param deleteFlag
+     * @return
+     */
+    @Update("update video_site_pool set delete_flag = #{deleteFlag},modify_time = now() where url = #{url}")
+    int updateDeleteFlag(@Param("url") String url, @Param("deleteFlag") Integer deleteFlag);
+}

+ 17 - 19
src/main/java/top/lvzhiqiang/service/impl/BgServiceImpl.java

@@ -51,6 +51,8 @@ public class BgServiceImpl implements BgService {
     private IcodePoolMapper icodePoolMapper;
     @Resource
     private VideoInfoPoolMapper videoInfoPoolMapper;
+    @Resource
+    private VideoSitePoolMapper videoSitePoolMapper;
 
     /**
      * 初始化骑兵数据
@@ -121,29 +123,18 @@ public class BgServiceImpl implements BgService {
 
         // 获取待抓取码列表
         List<String> icodePoolList = icodePoolMapper.findIcodeByStatus(status);
-        // 获取主表所有识别码
-        List<String> allIcode = videoInfoMapper.findAllIcode();
-
-        // 获取javbus防屏蔽地址
-        DicCode dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 2 == x.getType() && "javbus".equals(x.getCodeKey())).findFirst().get();
-        if (dicCode == null) {
+        if (icodePoolList.size() == 0) {
+            log.warn("icodePoolList为空");
             return;
         }
-        String[] javbusUrlArr = dicCode.getCodeValue().split(",");
-        List<String> javbusUrlList = new ArrayList<>();
-        for (String javbusUrl : javbusUrlArr) {
-            // 校验地址
-            try {
-                Jsoup.connect(javbusUrl.concat(javbusUrl));
-                log.info("jsoupIcodePool:javbus防屏蔽地址有效!javbusUrl={}", javbusUrl);
-                javbusUrlList.add(javbusUrl);
-            } catch (Exception e) {
-                log.error("jsoupIcodePool:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl, e);
-            }
-        }
+        // 获取javbus防屏蔽地址
+        List<String> javbusUrlList = videoSitePoolMapper.findUrlByTypeAndDeleteFlag(1, 1);
         if (javbusUrlList.size() == 0) {
+            log.warn("javbusUrlList为空");
             return;
         }
+        // 获取主表所有识别码
+        List<String> allIcode = videoInfoMapper.findAllIcode();
 
         // 获取码池图片保存路径
         String machiPath = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && "machi_path".equals(x.getCodeKey())).findFirst().get().getCodeValue();
@@ -172,7 +163,14 @@ public class BgServiceImpl implements BgService {
             while (retryCount <= 3) {
                 javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size() - 0)));
                 try {
-                    document = Jsoup.connect(javbusUrl.concat(identificationCode)).timeout(50000).header("referer", "https://www.javbus.com/".concat(identificationCode)).userAgent(getUserAgent()).get();
+                    document = Jsoup.connect(javbusUrl.concat("/").concat(identificationCode))
+                            .timeout(50000)
+                            //.proxy()
+                            //.data()
+                            .ignoreContentType(true)
+                            .userAgent(getUserAgent())
+                            .header("referer", "https://www.javbus.com/".concat(identificationCode))
+                            .get();
 
                     videoInfoPool = new VideoInfoPool();
                     long picTime = parseDocument(document, identificationCode, machiPath, videoInfoPool);