Kaynağa Gözat

add:小鹅通绿洲大学公开课爬取v1

tujidelv 3 yıl önce
ebeveyn
işleme
16bf999a28

+ 16 - 2
src/main/java/top/lvzhiqiang/config/MyJobs.java

@@ -13,6 +13,7 @@ import top.lvzhiqiang.entity.DicCode;
 import top.lvzhiqiang.entity.VideoSitePool;
 import top.lvzhiqiang.mapper.VideoSitePoolMapper;
 import top.lvzhiqiang.service.BgService;
+import top.lvzhiqiang.service.CrawlerService;
 
 import javax.annotation.Resource;
 import java.util.ArrayList;
@@ -36,6 +37,8 @@ public class MyJobs {
 
     @Resource
     private BgService bgService;
+    @Resource
+    private CrawlerService crawlerService;
 
     private static final String SCHEDULED_ZONE = "Asia/Shanghai";
 
@@ -142,7 +145,7 @@ public class MyJobs {
     }
 
     /**
-     * 每天20Jsoup码池
+     * 每天20:00 Jsoup码池
      */
     @Scheduled(cron = "0 00 20 * * ?", zone = SCHEDULED_ZONE)
     public void jsoupIcodePool4CrawingNo() {
@@ -152,7 +155,7 @@ public class MyJobs {
     }
 
     /**
-     * 每天6.30点Jsoup码池
+     * 每天20:30 Jsoup码池
      */
     @Scheduled(cron = "0 30 20 * * ?", zone = SCHEDULED_ZONE)
     public void jsoupIcodePool4CrawingFail() {
@@ -160,4 +163,15 @@ public class MyJobs {
 
         bgService.jsoupIcodePool(3, 2, 2);
     }
+
+    /**
+     * 每天6:30 jsoupXiaoeknowCourse
+     */
+    @Scheduled(cron = "0 30 6 * * ?", zone = SCHEDULED_ZONE)
+    @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
+    public void jsoupXiaoeknowCourse() {
+        log.warn("jsoupXiaoeknowCourse开始==============================");
+
+        crawlerService.jsoupXiaoeknowCourse(null, 2, null);
+    }
 }

+ 64 - 0
src/main/java/top/lvzhiqiang/controller/CrawlerController.java

@@ -0,0 +1,64 @@
+package top.lvzhiqiang.controller;
+
+import org.springframework.stereotype.Controller;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.ResponseBody;
+import top.lvzhiqiang.dto.R;
+import top.lvzhiqiang.service.CrawlerService;
+import top.lvzhiqiang.util.StringUtils;
+
+import javax.annotation.Resource;
+
+/**
+ * Crawler Controller
+ *
+ * @author lvzhiqiang
+ * 2022/9/10 21:24
+ */
+@Controller
+@RequestMapping("/bg/crawler")
+public class CrawlerController {
+
+    @Resource
+    private CrawlerService crawlerService;
+
+    /**
+     * findXiaoeknowCourse
+     *
+     * @author lvzhiqiang
+     * 2022/9/11 17:01
+     */
+    @RequestMapping("/findXiaoeknowCourse")
+    @ResponseBody
+    public String findXiaoeknowCourse(String title, Integer type, String orderField, String order, String crudT) {
+        if (StringUtils.isEmpty(crudT)) {
+            crudT = "1";
+        }
+
+        return crawlerService.findXiaoeknowCourse(title, type, orderField, order, crudT);
+    }
+
+
+    /**
+     * Jsoup CrawlerXaoeknowCourse
+     *
+     * @author lvzhiqiang
+     * 2022/9/10 21:24
+     */
+    @RequestMapping("/jsoupXiaoeknowCourse")
+    @ResponseBody
+    public R jsoupXiaoeknowCourse(Integer status, Integer isDel, Integer ignoreRetryCount) {
+        if (null == status) {
+            // status = 1;
+        }
+        if (null == isDel) {
+            isDel = 2;
+        }
+        if (null == ignoreRetryCount) {
+            // ignoreRetryCount = 2;
+        }
+
+        crawlerService.jsoupXiaoeknowCourse(status, isDel, ignoreRetryCount);
+        return R.ok();
+    }
+}

+ 76 - 0
src/main/java/top/lvzhiqiang/entity/CrawlerXiaoeknowCourse.java

@@ -0,0 +1,76 @@
+package top.lvzhiqiang.entity;
+
+import com.fasterxml.jackson.annotation.JsonFormat;
+import lombok.Data;
+
+import java.io.Serializable;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+
+/**
+ * 爬虫小鹅通课程表
+ *
+ * @author lvzhiqiang
+ * 2022/9/10 21:24
+ */
+@Data
+public class CrawlerXiaoeknowCourse implements Serializable {
+
+    /**
+     * 主键
+     */
+    private Long id;
+
+    /**
+     * appId
+     */
+    private String appId;
+
+    /**
+     * columnId
+     */
+    private String columnId;
+
+    /**
+     * resourceId
+     */
+    private String resourceId;
+
+    /**
+     * resourceTitle
+     */
+    private String resourceTitle;
+
+    /**
+     * resourceType{3:视频,4:直播}
+     */
+    private Integer resourceType;
+
+    /**
+     * viewCount
+     */
+    private Integer viewCount;
+
+    /**
+     * startAt
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd")
+    private LocalDate startAt;
+
+    /**
+     * 删除标志{1:正常,2:已删除}
+     */
+    private Integer deleteFlag;
+
+    /**
+     * 创建时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime createTime;
+
+    /**
+     * 最后修改时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime modifyTime;
+}

+ 72 - 0
src/main/java/top/lvzhiqiang/mapper/CrawlerXiaoeknowCourseMapper.java

@@ -0,0 +1,72 @@
+package top.lvzhiqiang.mapper;
+
+import org.apache.ibatis.annotations.*;
+import top.lvzhiqiang.entity.CrawlerXiaoeknowCourse;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 爬虫小鹅通课程表Mapper
+ *
+ * @author lvzhiqiang
+ * 2022/9/10 21:24
+ */
+public interface CrawlerXiaoeknowCourseMapper {
+
+    /**
+     * 删除所有
+     */
+    @Delete("DELETE FROM crawler_xiaoeknow_course where 1=1")
+    void deleteAll();
+
+    /**
+     * 批量新增
+     *
+     * @param crawlerXiaoeknowCourseList
+     */
+    @Insert({"<script>" +
+            "INSERT INTO crawler_xiaoeknow_course(app_id, column_id, resource_id, resource_title, resource_type, view_count, start_at, create_time, modify_time) " +
+            "VALUES " +
+            "<foreach collection='list' item='cxc' index=\"index\" separator=\",\">" +
+            "   (#{cxc.appId}, #{cxc.columnId}, #{cxc.resourceId}, #{cxc.resourceTitle}, #{cxc.resourceType}, #{cxc.viewCount}, #{cxc.startAt}, #{cxc.createTime}, now())" +
+            " </foreach>" +
+            "</script>"})
+    int insertList(List<CrawlerXiaoeknowCourse> crawlerXiaoeknowCourseList);
+
+    /**
+     * 新增
+     *
+     * @param crawlerXiaoeknowCourse
+     */
+    @Insert("INSERT INTO crawler_xiaoeknow_course(app_id, column_id, resource_id, resource_title, resource_type, view_count, start_at, create_time, modify_time) " +
+            "VALUES (#{appId}, #{columnId}, #{issueDate}, #{resourceId}, #{resourceTitle}, #{resourceType}, #{viewCount}, #{startAt}, #{createTime}, now())")
+    @Options(useGeneratedKeys = true, keyProperty = "id", keyColumn = "id")
+    int insert(CrawlerXiaoeknowCourse crawlerXiaoeknowCourse);
+
+    /**
+     * 查询所有
+     */
+    @Select("SELECT * FROM crawler_xiaoeknow_course WHERE delete_flag = 1")
+    List<CrawlerXiaoeknowCourse> findAll();
+
+    /**
+     * 查询最新的一条
+     */
+    @Select("SELECT * FROM crawler_xiaoeknow_course WHERE delete_flag = 1 and app_id=#{appId} and column_id=#{columnId} order by start_at desc limit 1")
+    CrawlerXiaoeknowCourse findLatestInfo(String appId, String columnId);
+
+    @Select({"<script>" +
+            "select * from crawler_xiaoeknow_course WHERE delete_flag = 1" +
+            "<if test=\"title != null and title != ''\">" +
+            "   and resource_title like concat('%',#{title},'%')" +
+            "</if>" +
+            "<if test=\"type != null and type != ''\">" +
+            "   and resource_type = #{type}" +
+            "</if>" +
+            "<if test=\"order != null and order != ''\">" +
+            "   order by ${orderField} ${order}" +
+            "</if>" +
+            "</script>"})
+    List<CrawlerXiaoeknowCourse> findXiaoeknowCourse4MultipleParams(Map<String, Object> params);
+}

+ 26 - 0
src/main/java/top/lvzhiqiang/service/CrawlerService.java

@@ -0,0 +1,26 @@
+package top.lvzhiqiang.service;
+
+/**
+ * Crawler Service
+ *
+ * @author lvzhiqiang
+ * 2022/9/10 21:24
+ */
+public interface CrawlerService {
+
+    /**
+     * findXiaoeknowCourse
+     *
+     * @author lvzhiqiang
+     * 2022/9/11 17:01
+     */
+    String findXiaoeknowCourse(String title, Integer type, String orderField, String order, String crudT);
+
+    /**
+     * jsoupXiaoeknowCourse
+     *
+     * @author lvzhiqiang
+     * 2022/9/10 21:24
+     */
+    void jsoupXiaoeknowCourse(Integer status, Integer isDel, Integer ignoreRetryCount);
+}

+ 261 - 0
src/main/java/top/lvzhiqiang/service/impl/CrawlerServiceImpl.java

@@ -0,0 +1,261 @@
+package top.lvzhiqiang.service.impl;
+
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Connection;
+import org.jsoup.Jsoup;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Propagation;
+import org.springframework.transaction.annotation.Transactional;
+import org.springframework.util.StopWatch;
+import top.lvzhiqiang.entity.CrawlerXiaoeknowCourse;
+import top.lvzhiqiang.entity.DicCode;
+import top.lvzhiqiang.exception.BusinessException;
+import top.lvzhiqiang.mapper.CrawlerXiaoeknowCourseMapper;
+import top.lvzhiqiang.mapper.DicCodeMapper;
+import top.lvzhiqiang.service.CrawlerService;
+import top.lvzhiqiang.util.DateUtils;
+
+import javax.annotation.Resource;
+import java.io.IOException;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * Crawler ServiceImpl
+ *
+ * @author lvzhiqiang
+ * 2022/9/10 21:24
+ */
+@Service
+@Slf4j
+public class CrawlerServiceImpl implements CrawlerService {
+
+    @Resource
+    private CrawlerXiaoeknowCourseMapper crawlerXiaoeknowCourseMapper;
+    @Resource
+    private DicCodeMapper dicCodeMapper;
+
+    @Value("${spring.profiles.active}")
+    private String env;
+
+    private final int defaultPageSize = 8;
+
+    /**
+     * findXiaoeknowCourse
+     *
+     * @author lvzhiqiang
+     * 2022/9/11 17:01
+     */
+    @Override
+    public String findXiaoeknowCourse(String title, Integer type, String orderField, String order, String crudT) {
+        if ("2".equals(crudT)) {
+            //更新
+            return "success";
+        }
+        if ("3".equals(crudT)) {
+            //删除
+            return "success";
+        }
+
+        Map<String, Object> params = new HashMap<>();
+        params.put("title", title);
+        params.put("type", type);
+        params.put("orderField", orderField);
+        params.put("order", order);
+        List<CrawlerXiaoeknowCourse> crawlerXiaoeknowCourseList = crawlerXiaoeknowCourseMapper.findXiaoeknowCourse4MultipleParams(params);
+
+        StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(crawlerXiaoeknowCourseList.size())).concat("<br/>"));
+        sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>resourceTitle</th><th>resourceType</th><th>viewCount</th><th>startAt</th><th>appId</th><th>columnId</th><th>resourceId</th></tr>");
+
+        String videoTemplateUrl = "https://appId.h5.xiaoeknow.com/p/course/video/resourceId?product_id=columnId";
+        String liveTemplateUrl = "https://appId.h5.xiaoeknow.com/v2/course/alive/resourceId?type=2&pro_id=columnId&app_id=appId";
+        for (CrawlerXiaoeknowCourse crawlerXiaoeknowCourse : crawlerXiaoeknowCourseList) {
+            sb.append("<tr>");
+
+            String resourceType = "";
+            String url = "";
+            if (3 == crawlerXiaoeknowCourse.getResourceType()) {
+                resourceType = "视频";
+                url = videoTemplateUrl.replace("appId", crawlerXiaoeknowCourse.getAppId()).replace("columnId", crawlerXiaoeknowCourse.getColumnId()).replace("resourceId", crawlerXiaoeknowCourse.getResourceId());
+            } else if (4 == crawlerXiaoeknowCourse.getResourceType()) {
+                resourceType = "直播";
+                url = liveTemplateUrl.replace("appId", crawlerXiaoeknowCourse.getAppId()).replace("columnId", crawlerXiaoeknowCourse.getColumnId()).replace("resourceId", crawlerXiaoeknowCourse.getResourceId());
+            }
+            sb.append("<td><a target=\"_blank\" href=\"" + url + "\">").append(crawlerXiaoeknowCourse.getResourceTitle()).append("</a></td>");
+
+            sb.append("<td>").append(resourceType).append("</td>");
+
+            sb.append("<td>").append(crawlerXiaoeknowCourse.getViewCount()).append("</td>");
+            sb.append("<td>").append(crawlerXiaoeknowCourse.getStartAt()).append("</td>");
+            sb.append("<td>").append(crawlerXiaoeknowCourse.getAppId()).append("</td>");
+            sb.append("<td>").append(crawlerXiaoeknowCourse.getColumnId()).append("</td>");
+            sb.append("<td>").append(crawlerXiaoeknowCourse.getResourceId()).append("</td>");
+
+            sb.append("</tr>");
+        }
+        sb.append("</table>");
+
+        return sb.toString();
+    }
+
+    /**
+     * jsoupXiaoeknowCourse
+     *
+     * @author lvzhiqiang
+     * 2022/9/10 21:24
+     */
+    //@Async
+    @Override
+    @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
+    public void jsoupXiaoeknowCourse(Integer status, Integer isDel, Integer ignoreRetryCount) {
+        log.warn("jsoupXiaoeknowCourse 开始:status={},isDel={},ignoreRetryCount={}", status, isDel, ignoreRetryCount);
+        StopWatch stopWatch = new StopWatch();
+        stopWatch.start();
+        if (isDel == 1) {
+            crawlerXiaoeknowCourseMapper.deleteAll();
+        }
+
+        String itemsURL = "https://aaaaaaaaaaaaaaaaaaaa.h5.xiaoeknow.com/xe.course.business.column.items.get/2.0.0";
+        String baseInfoURL = "https://aaaaaaaaaaaaaaaaaaaa.h5.xiaoeknow.com/xe.course.business.column.base_info.get/2.0.0";
+
+        // 获取课程参数
+        List<DicCode> dicCodeList = dicCodeMapper.findAll().stream().filter(x -> "xiaoeknow_url".equals(x.getCodeDesc()) && x.getEnv().contains(env)).collect(Collectors.toList());
+
+        Map<String, String> params = new HashMap<>();
+        int insertTotalNum = 0;
+        for (DicCode dicCode : dicCodeList) {
+            String appId = dicCode.getCodeKey();
+            String[] columnIdArr = dicCode.getCodeValue().split(",");
+
+            itemsURL = itemsURL.replace("aaaaaaaaaaaaaaaaaaaa", appId);
+            baseInfoURL = baseInfoURL.replace("aaaaaaaaaaaaaaaaaaaa", appId);
+            for (String columnId : columnIdArr) {
+                params.put("bizData[column_id]", columnId);
+
+                // 获取最新的一条
+                CrawlerXiaoeknowCourse latestInfo = crawlerXiaoeknowCourseMapper.findLatestInfo(appId, columnId);
+                LocalDate latestDate = latestInfo == null ? LocalDate.of(1970, 1, 1) : latestInfo.getStartAt();
+
+                StringBuffer referer = new StringBuffer("https://");
+                referer.append(appId).append(".h5.xiaoeknow.com/p/course/column/").append(columnId).append("?type=3");
+
+                // 获取总数
+                Connection.Response response;
+                int total = 0;
+                Map<String, String> cookies;
+                try {
+                    cookies = getCookies(referer.toString());
+                    response = Jsoup.connect(baseInfoURL)
+                            .header("Content-Type", "application/x-www-form-urlencoded")
+                            .timeout(50000)
+                            //.proxy()
+                            .data(params)
+                            .ignoreContentType(true)
+                            .userAgent(getUserAgent())
+                            .header("referer", referer.toString())
+                            .cookies(cookies)
+                            .method(Connection.Method.POST)
+                            .execute();
+
+                    JSONObject result = JSONObject.parseObject(response.body());
+                    total = result.getJSONObject("data").getInteger("resource_count");
+                    log.warn("jsoupXiaoeknowCourse 获取总数:appId={},columnId={},total={}", appId, columnId, total);
+                } catch (Exception e) {
+                    e.printStackTrace();
+                    throw new BusinessException(500, e.getMessage());
+                }
+
+                params.put("bizData[page_size]", String.valueOf(defaultPageSize));
+                params.put("bizData[sort]", "desc");
+                int lastPageNo = (total / defaultPageSize) + (total % defaultPageSize > 0 ? 1 : 0);
+                List<CrawlerXiaoeknowCourse> xiaoeknowCourseList = new ArrayList<>();
+                outer:
+                for (int currentPageNo = 1; currentPageNo <= lastPageNo; currentPageNo++) {
+                    params.put("bizData[page_index]", String.valueOf(currentPageNo));
+                    try {
+                        response = Jsoup.connect(itemsURL)
+                                .header("Content-Type", "application/x-www-form-urlencoded")
+                                .timeout(50000)
+                                //.proxy()
+                                .data(params)
+                                .ignoreContentType(true)
+                                .userAgent(getUserAgent())
+                                .header("referer", referer.toString())
+                                .cookies(cookies)
+                                .method(Connection.Method.POST)
+                                .execute();
+
+                        JSONObject result = JSONObject.parseObject(response.body());
+                        JSONArray jsonArray = result.getJSONObject("data").getJSONArray("list");
+                        int currentPageIndex = 0;
+                        for (Object o : jsonArray) {
+                            JSONObject jo = (JSONObject) o;
+                            ++currentPageIndex;
+
+                            LocalDate startAt = LocalDate.parse(jo.getString("start_at").substring(0, 10), DateUtils.dateFormatter2);
+                            if (startAt.isBefore(latestDate) || startAt.isEqual(latestDate)) {
+                                break outer;
+                            }
+
+                            CrawlerXiaoeknowCourse crawlerXiaoeknowCourse = new CrawlerXiaoeknowCourse();
+                            crawlerXiaoeknowCourse.setAppId(appId);
+                            crawlerXiaoeknowCourse.setColumnId(columnId);
+                            crawlerXiaoeknowCourse.setResourceId(jo.getString("resource_id"));
+                            crawlerXiaoeknowCourse.setResourceTitle(jo.getString("resource_title"));
+                            crawlerXiaoeknowCourse.setResourceType(jo.getInteger("resource_type"));
+                            crawlerXiaoeknowCourse.setViewCount(jo.getInteger("view_count"));
+                            crawlerXiaoeknowCourse.setStartAt(startAt);
+                            crawlerXiaoeknowCourse.setCreateTime(LocalDateTime.now());
+                            xiaoeknowCourseList.add(crawlerXiaoeknowCourse);
+                            log.warn("jsoupXiaoeknowCourse success:currentPageNo={},currentPageIndex={},resourceTitle={}", currentPageNo, currentPageIndex, crawlerXiaoeknowCourse.getResourceTitle());
+                        }
+                    } catch (Exception e) {
+                        e.printStackTrace();
+                        log.error("jsoupXiaoeknowCourse error,params={}", params, e);
+                    }
+                }
+                if (xiaoeknowCourseList.size() > 0) {
+                    crawlerXiaoeknowCourseMapper.insertList(xiaoeknowCourseList);
+                    insertTotalNum += xiaoeknowCourseList.size();
+                }
+            }
+        }
+
+        stopWatch.stop();
+        log.warn("jsoupXiaoeknowCourse 结束:insertTotalNum={},耗时={}", insertTotalNum, stopWatch.getTotalTimeSeconds());
+    }
+
+    private Map<String, String> getCookies(String url) throws IOException {
+        Connection.Response res1 = Jsoup.connect(url).method(Connection.Method.GET).execute();
+        return res1.cookies();
+    }
+
+    private String getUserAgent() {
+        Random r = new Random();
+        String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
+                "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
+                "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
+                "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
+                "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",
+                "Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0",
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"};
+        int i = r.nextInt(15);
+        return ua[i];
+    }
+}
+
+

+ 2 - 0
src/main/java/top/lvzhiqiang/util/DateUtils.java

@@ -33,6 +33,7 @@ public class DateUtils {
 	 * (精确到天的)日期样式
 	 */
 	public static final String PATTERN_TO_DAYS = "yyyy-MM-dd";
+	public static final String PATTERN_TO_DAYS2 = "yyyy.MM.dd";
 
 	/**
 	 * (精确到秒的)日期样式
@@ -40,6 +41,7 @@ public class DateUtils {
 	public static final String PATTERN_TO_SECONDS = "yyyy-MM-dd HH:mm:ss";
 
 	public static final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern(PATTERN_TO_DAYS);
+	public static final DateTimeFormatter dateFormatter2 = DateTimeFormatter.ofPattern(PATTERN_TO_DAYS2);
 
 	public static final DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern(PATTERN_TO_SECONDS);
 

+ 2 - 2
src/main/resources/env/test/log4j2-spring.xml

@@ -31,7 +31,7 @@
             <!-- 输出日志的格式 -->
             <PatternLayout pattern="${CONSOLE_LOG_PATTERN}"/>
             <!-- 控制台只输出level及其以上级别的信息(onMatch),其他的直接拒绝(onMismatch)-->
-            <ThresholdFilter level="info" onMatch="ACCEPT" onMismatch="DENY"/>
+            <ThresholdFilter level="debug" onMatch="ACCEPT" onMismatch="DENY"/>
         </console>
 
         <!-- 日志文件的 Appender -->
@@ -94,7 +94,7 @@
 
         <!-- 设置 Appender ,同时 ROOT 的日志级别为INFO -->
         <root level="info">
-            <appender-ref ref="Console"/>
+            <!--<appender-ref ref="Console"/>-->
             <appender-ref ref="RollingFileDebug"/>
             <appender-ref ref="RollingFileWarn"/>
             <appender-ref ref="RollingFileError"/>

+ 71 - 0
src/main/resources/static/crawler.html

@@ -0,0 +1,71 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>CRAWLER HOME</title>
+</head>
+<style type="text/css">
+    .font {
+        color: blue;
+    }
+
+    .dynamic_hide {
+        display: none;
+    }
+</style>
+<script type="text/javascript">
+    function show() {
+        var my = document.getElementById("my");
+        if (my.className.indexOf("dynamic_hide") > -1) {
+            my.classList.remove("dynamic_hide");
+        } else {
+            my.classList.add("dynamic_hide");
+        }
+
+    }
+</script>
+<body>
+<span>Hello <font id="myc" style="cursor: pointer;" onclick="show()">W</font>orld!</span>
+<div id="my" class="dynamic_hide">
+    <hr/>
+    <div style="margin-right:20px;">
+        <span class="font">findXiaoeknowCourse</span>
+        <form method="post" action="bg/crawler/findXiaoeknowCourse">
+            <span>title</span>
+            <input type="text" name="title" placeholder="标题关键词,可为空"/>
+            <span>type</span>
+            <input type="text" name="type" placeholder="类型{3:视频,4:直播},可为空" style="width: 230px;"/>
+            <span>order</span>
+            <select name="orderField" style="height: 21.43px;">
+                <option value="start_at">时间</option>
+                <option value="view_count">数量</option>
+            </select>
+            <select name="order" style="height: 21.43px;">
+                <option value="desc">desc</option>
+                <option value="asc">asc</option>
+            </select>
+            <span>crudT</span>
+            <select name="crudT" style="height: 21.43px;">
+                <option value="1">查询</option>
+                <option value="2">更新</option>
+                <option value="3">删除</option>
+            </select>
+            <input type="submit" value="提交">
+        </form>
+    </div>
+    <hr/>
+    <div style="margin-right:20px;">
+        <span class="font">jsoupXiaoeknowCourse</span>
+        <form method="post" action="bg/crawler/jsoupXiaoeknowCourse">
+            <span>status</span>
+            <input type="text" name="status" placeholder="1:未爬取,3:爬取失败。默认是未爬取" style="width: 300px;"/>
+            <span>isDel</span>
+            <input type="text" name="isDel" placeholder="1:是,2:否。默认否"/>
+            <span>ignoreRetryCount</span>
+            <input type="text" name="ignoreRetryCount" placeholder="1:是,2:否。默认否"/>
+            <input type="submit" value="提交">
+        </form>
+    </div>
+</div>
+</body>
+</html>

+ 1 - 0
src/main/resources/static/index.html

@@ -27,6 +27,7 @@
             <ul>
                 <li><a href="video.html">VIDEO</a></li>
                 <li><a href="bg.html">BG</a></li>
+                <li><a href="crawler.html">CRAWLER</a></li>
             </ul>
         </nav>
     </header>

+ 34 - 0
src/test/java/top/lvzhiqiang/CrawlerTest.java

@@ -0,0 +1,34 @@
+package top.lvzhiqiang;
+
+import lombok.extern.slf4j.Slf4j;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
+import top.lvzhiqiang.service.CrawlerService;
+
+import javax.annotation.Resource;
+
+/**
+ * 单元测试类
+ *
+ * @author lvzhiqiang
+ * 2022/9/10 21:24
+ */
+@Slf4j
+@RunWith(SpringJUnit4ClassRunner.class)
+@SpringBootTest(properties = {
+        "spring.profiles.active=dev",
+        "logging.level.top.lvzhiqiang=DEBUG"
+}
+)
+public class CrawlerTest {
+
+    @Resource
+    private CrawlerService crawlerService;
+
+    @Test
+    public void testJsoupXiaoeknowCourse() {
+        crawlerService.jsoupXiaoeknowCourse(null, 2, null);
+    }
+}