Răsfoiți Sursa

update:javbus v4

lvzhiqiang 3 ani în urmă
părinte
comite
b4ead2a0cf

+ 1 - 1
src/main/java/top/lvzhiqiang/config/WebAppConfig.java

@@ -47,7 +47,7 @@ public class WebAppConfig implements WebMvcConfigurer {
 
         // 初始化字典码表
         dicCodeList = dicCodeMapper.findAll();
-        List<DicCode> dicCodeList = WebAppConfig.dicCodeList.stream().filter(x -> 1 == x.getType() && env.equals(x.getEnv())).collect(Collectors.toList());
+        List<DicCode> dicCodeList = WebAppConfig.dicCodeList.stream().filter(x -> x.getType() != null && 1 == x.getType() && env.equals(x.getEnv())).collect(Collectors.toList());
         for (DicCode dicCode : dicCodeList) {
             StringBuffer resourceHandlerSB = new StringBuffer("/");
             resourceHandlerSB.append(dicCode.getCodeKey().split("_")[0]).append("/**");

+ 3 - 3
src/main/java/top/lvzhiqiang/controller/CrawlerController.java

@@ -185,9 +185,9 @@ public class CrawlerController {
      */
     @RequestMapping("/jsoupJavbusProfile")
     @ResponseBody
-    public String jsoupJavbusProfile(String start, String limit) throws Exception {
-        if (StringUtils.isEmpty(start) || StringUtils.isEmpty(limit)) {
-            throw new ParameterException("start|password不能为空");
+    public String jsoupJavbusProfile(Long start, Integer limit) throws Exception {
+        if (null == start) {
+            throw new ParameterException("start不能为空");
         }
 
         crawler4JavbusService.jsoupJavbusProfile(start, limit);

+ 84 - 0
src/main/java/top/lvzhiqiang/mapper/CrawlerJavbusProfileMapper.java

@@ -0,0 +1,84 @@
+package top.lvzhiqiang.mapper;
+
+import org.apache.ibatis.annotations.Delete;
+import org.apache.ibatis.annotations.Insert;
+import org.apache.ibatis.annotations.Select;
+import top.lvzhiqiang.entity.CrawlerJavbusLog;
+import top.lvzhiqiang.entity.CrawlerJavbusProfile;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 爬虫javbus个人资料表Mapper
+ *
+ * @author lvzhiqiang
+ * 2022/10/19 9:42
+ */
+public interface CrawlerJavbusProfileMapper {
+
+    /**
+     * 删除所有
+     */
+    @Delete("DELETE FROM crawler_javbus_profile where 1=1")
+    void deleteAll();
+
+    /**
+     * 新增或更新
+     *
+     * @param crawlerJavbusProfile
+     */
+    @Insert("INSERT INTO crawler_javbus_profile(uid, nick_name, email_status,friend_num,reply_num," +
+            "thread_num,user_group,online_time,registration_time,last_visit,last_activity_time,last_published_time," +
+            "time_zone,used_space,mileage,money,avatar_url,avatar_local_url,sign_str,sign_img,create_time,modify_time) " +
+            "VALUES (#{uid}, #{nickName}, #{emailStatus}, #{friendNum}, #{replyNum}, #{threadNum}, #{userGroup}, #{onlineTime}, " +
+            "#{registrationTime}, #{lastVisit}, #{lastActivityTime}, #{lastPublishedTime}, #{timeZone}, #{usedSpace}, #{mileage}, " +
+            "#{money}, #{avatarUrl}, #{avatarLocalUrl}, #{signStr}, #{signImg}, now(), now()) " +
+            "ON DUPLICATE KEY UPDATE nick_name=values(nick_name),email_status=values(email_status),friend_num=values(friend_num),reply_num=values(reply_num)," +
+            "thread_num=values(thread_num),user_group=values(user_group),online_time=values(online_time),registration_time=values(registration_time)," +
+            "last_visit=values(last_visit),last_activity_time=values(last_activity_time),last_published_time=values(last_published_time),time_zone=values(time_zone)," +
+            "used_space=values(used_space),mileage=values(mileage),money=values(money),avatar_url=values(avatar_url),sign_str=values(sign_str)," +
+            "sign_img=values(sign_img),modify_time=now()")
+    void insertOrUpdate(CrawlerJavbusProfile crawlerJavbusProfile);
+
+    /**
+     * 查询所有
+     *
+     * @return
+     */
+    @Select("SELECT * FROM crawler_javbus_profile WHERE delete_flag = 1")
+    List<CrawlerJavbusProfile> findAll();
+
+    /**
+     * 查询最新的一条
+     *
+     * @return
+     */
+    @Select("SELECT * FROM crawler_javbus_profile WHERE delete_flag = 1 order by uid desc limit 1")
+    CrawlerJavbusProfile findLatestInfo();
+
+    /**
+     * 多条件查询
+     *
+     * @param params
+     * @return
+     */
+    @Select({"<script>" +
+            "select * from crawler_javbus_profile WHERE delete_flag = 1" +
+            "<if test=\"nickName != null and nickName != ''\">" +
+            "   and nick_name like concat('%',#{nickName},'%')" +
+            "</if>" +
+            "</script>"})
+    List<CrawlerJavbusProfile> findJavbusProfile4MultipleParams(Map<String, Object> params);
+
+
+    /**
+     * 新增日志
+     *
+     * @param crawlerJavbusLog
+     */
+    @Insert("INSERT INTO crawler_javbus_log(type, business_key, error_msg,create_time,modify_time) " +
+            "VALUES (#{type}, #{businessKey}, #{errorMsg}, now(), now()) " +
+            "ON DUPLICATE KEY UPDATE error_msg=values(error_msg),modify_time=now()")
+    void insertOrUpdateLog(CrawlerJavbusLog crawlerJavbusLog);
+}

+ 1 - 1
src/main/java/top/lvzhiqiang/service/Crawler4JavbusService.java

@@ -8,5 +8,5 @@ package top.lvzhiqiang.service;
  */
 public interface Crawler4JavbusService {
 
-    void jsoupJavbusProfile(String start, String limit) throws Exception;
+    void jsoupJavbusProfile(Long start, Integer limit) throws Exception;
 }

+ 147 - 13
src/main/java/top/lvzhiqiang/service/impl/Crawler4JavbusServiceImpl.java

@@ -6,12 +6,19 @@ import org.jsoup.Connection;
 import org.jsoup.HttpStatusException;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
 import org.springframework.beans.factory.annotation.Value;
+import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Service;
 import org.springframework.util.StopWatch;
+import top.lvzhiqiang.entity.CrawlerJavbusLog;
+import top.lvzhiqiang.entity.CrawlerJavbusProfile;
 import top.lvzhiqiang.entity.DicCode;
+import top.lvzhiqiang.mapper.CrawlerJavbusProfileMapper;
 import top.lvzhiqiang.mapper.DicCodeMapper;
 import top.lvzhiqiang.service.Crawler4JavbusService;
+import top.lvzhiqiang.util.DateUtils;
 import top.lvzhiqiang.util.JsoupUtil;
 import top.lvzhiqiang.util.StringUtils;
 
@@ -22,10 +29,8 @@ import java.net.HttpURLConnection;
 import java.net.InetSocketAddress;
 import java.net.Proxy;
 import java.net.URL;
-import java.util.Base64;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.time.LocalDateTime;
+import java.util.*;
 import java.util.stream.Collectors;
 
 /**
@@ -40,15 +45,18 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
 
     @Resource
     private DicCodeMapper dicCodeMapper;
+    @Resource
+    private CrawlerJavbusProfileMapper crawlerJavbusProfileMapper;
     @Value("${spring.profiles.active}")
     private String env;
 
-    Map<String, String> javbusConstantMap;
+    Map<String, String> javbusConstantMap = null;
     Map<String, String> javbusCookiesMap = null;
     private String bdAccessToken = "";
 
+    @Async
     @Override
-    public void jsoupJavbusProfile(String start, String limit) throws Exception {
+    public void jsoupJavbusProfile(Long start, Integer limit) throws Exception {
         log.warn("jsoupJavbusProfile 开始:start={},limit={}", start, limit);
         StopWatch stopWatch = new StopWatch();
         stopWatch.start();
@@ -68,15 +76,137 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
             bdAccessToken = getAuth(javbusConstantMap.get("bd_ak"), javbusConstantMap.get("bd_sk"));
         }
         if (null == javbusCookiesMap) {
-            generateJavbusCookies(proxy);
+            for (int i = 0; i < 3; i++) {
+                if (generateJavbusCookies(proxy)) {
+                    break;
+                } else {
+                    javbusCookiesMap = null;
+                }
+            }
+            if (javbusCookiesMap == null) {
+                throw new Exception("获取javbusCookies失败!");
+            }
         }
         // 获取个人资料
-        Connection.Response memberInfoResponse = JsoupUtil.requestBody("https://www.javbus.com/forum/?355292", JsoupUtil.HTTP_GET, proxy, javbusCookiesMap, null, null);
-        System.out.println(memberInfoResponse.body());
+        jsoupJavbusProfileSub(proxy, start, limit);
+    }
+
+    private void jsoupJavbusProfileSub(Proxy proxy, Long start, Integer limit) throws Exception {
+        CrawlerJavbusProfile latestJavbusProfile = crawlerJavbusProfileMapper.findLatestInfo();
+        if (start == null && latestJavbusProfile == null) {
+            start = 1L;
+        } else if (start == null && latestJavbusProfile != null) {
+            start = latestJavbusProfile.getUid() + 1;
+        }
+
+        long startFinal = 0;
+        if (limit != null) {
+            startFinal = start + limit;
+        }
+
+        String profileUrl = "https://www.javbus.com/forum/?";
+        Document profileDocument;
+        int continueCount = 0;
+        while (true) {
+            if (startFinal != 0 && start > startFinal) {
+                log.warn("jsoupJavbusProfileSub结束,start={},startFinal={}", start, startFinal);
+                return;
+            }
+            if (start > 500000 && continueCount > 10) {
+                log.warn("jsoupJavbusProfileSub结束,start={},continueCount={}", start, continueCount);
+                return;
+            }
+
+            String uid = "";
+            String nickName = "";
+            try {
+                profileDocument = JsoupUtil.requestDocument(profileUrl.concat(String.valueOf(start)), JsoupUtil.HTTP_GET, proxy, javbusCookiesMap, null, null);
+                start++;
 
+                if (profileDocument.html().contains("您指定的用戶空間不存在")) {
+                    log.warn("jsoupJavbusProfileSub您指定的用戶空間不存在,start={}", start - 1);
+                    continueCount++;
+                    continue;
+                }
+
+                String avatarUrl = profileDocument.select("div.avt").select("img").attr("src");
+                String[] mbn0Arr = profileDocument.select("div.u_profile").select("div.cl").get(0).select("h2.mbn").get(0).text().replace("(", "").replace(")", "").split("UID:");
+                nickName = mbn0Arr[0].trim();
+                uid = mbn0Arr[1].trim();
+                String emailStatus = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").first().text().replace("郵箱狀態", "").trim();
+
+                Elements signEles = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").get(1).select("li:contains(個人簽名)");
+                String signStr = "";
+                ArrayList<String> signImgList = new ArrayList<>();
+                if (signEles.size() > 0) {
+                    signStr = signEles.first().select("table").text();
+                    Elements signImgEles = signEles.first().select("table").select("img");
+                    for (Element signImgEle : signImgEles) {
+                        signImgList.add(signImgEle.attr("src"));
+                    }
+                }
+
+                String friendNum = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").get(2)
+                        .select("a").get(0).text().replace("好友數", "").trim();
+                String replyNum = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").get(2)
+                        .select("a").get(1).text().replace("回帖數", "").trim();
+                String threadNum = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").get(2)
+                        .select("a").get(2).text().replace("主題數", "").trim();
+                String userGroup = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(0)
+                        .select("a").text();
+                String onlineTime = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
+                        .select("li:contains(在線時間)").text().replace("在線時間", "").replace("小時", "").trim();
+                String registrationTime = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
+                        .select("li:contains(註冊時間)").text().replace("註冊時間", "").trim();
+                String lastVisit = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
+                        .select("li:contains(最後訪問)").text().replace("最後訪問", "").trim();
+                String lastActivityTime = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
+                        .select("li:contains(上次活動時間)").text().replace("上次活動時間", "").trim();
+                String lastPublishedTime = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
+                        .select("li:contains(上次發表時間)").text().replace("上次發表時間", "").trim();
+                String timeZone = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
+                        .select("li:contains(所在時區)").text().replace("所在時區", "").trim();
+                String usedSpace = profileDocument.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
+                        .select("li").get(0).text().replace("已用空間", "").replace("B", "").trim();
+                String mileage = profileDocument.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
+                        .select("li").get(1).text().replace("里程", "").trim();
+                String money = profileDocument.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
+                        .select("li").last().text().replace("金錢", "").trim();
+
+                CrawlerJavbusProfile crawlerJavbusProfile = new CrawlerJavbusProfile();
+                crawlerJavbusProfile.setUid(Long.valueOf(uid));
+                crawlerJavbusProfile.setNickName(nickName);
+                crawlerJavbusProfile.setEmailStatus(emailStatus);
+                crawlerJavbusProfile.setFriendNum(Integer.valueOf(friendNum));
+                crawlerJavbusProfile.setReplyNum(Integer.valueOf(replyNum));
+                crawlerJavbusProfile.setThreadNum(Integer.valueOf(threadNum));
+                crawlerJavbusProfile.setUserGroup(userGroup);
+                crawlerJavbusProfile.setOnlineTime(StringUtils.isNotEmpty(onlineTime) ? Integer.valueOf(onlineTime) : null);
+                crawlerJavbusProfile.setRegistrationTime(StringUtils.isNotEmpty(registrationTime) && registrationTime.length() >= 10 ? LocalDateTime.parse(registrationTime, DateUtils.dateTimeFormatter3) : null);
+                crawlerJavbusProfile.setLastVisit(StringUtils.isNotEmpty(lastVisit) && lastVisit.length() >= 10 ? LocalDateTime.parse(lastVisit, DateUtils.dateTimeFormatter3) : null);
+                crawlerJavbusProfile.setLastActivityTime(StringUtils.isNotEmpty(lastActivityTime) && lastActivityTime.length() >= 10 ? LocalDateTime.parse(lastActivityTime, DateUtils.dateTimeFormatter3) : null);
+                crawlerJavbusProfile.setLastPublishedTime(StringUtils.isNotEmpty(lastPublishedTime) && lastPublishedTime.length() >= 10 ? LocalDateTime.parse(lastPublishedTime, DateUtils.dateTimeFormatter3) : null);
+                crawlerJavbusProfile.setTimeZone(StringUtils.isNotEmpty(timeZone) ? timeZone : null);
+                crawlerJavbusProfile.setUsedSpace(Integer.valueOf(usedSpace));
+                crawlerJavbusProfile.setMileage(Integer.valueOf(mileage));
+                crawlerJavbusProfile.setMoney(Integer.valueOf(money));
+                crawlerJavbusProfile.setAvatarUrl(avatarUrl);
+                crawlerJavbusProfile.setSignStr(signStr);
+                crawlerJavbusProfile.setSignImg(org.apache.commons.lang3.StringUtils.join(signImgList, ","));
+                crawlerJavbusProfileMapper.insertOrUpdate(crawlerJavbusProfile);
+                log.warn("jsoupJavbusProfileSub成功插入,uid={}", uid);
+            } catch (Exception e) {
+                log.error("jsoupJavbusProfileSub插入异常,nickName={},uid={}", nickName, uid, e);
+                CrawlerJavbusLog crawlerJavbusLog = new CrawlerJavbusLog();
+                crawlerJavbusLog.setType(1);
+                crawlerJavbusLog.setBusinessKey(String.valueOf(uid));
+                crawlerJavbusLog.setErrorMsg(e.getMessage());
+                crawlerJavbusProfileMapper.insertOrUpdateLog(crawlerJavbusLog);
+            }
+        }
     }
 
-    private void generateJavbusCookies(Proxy proxy) throws Exception {
+    private boolean generateJavbusCookies(Proxy proxy) throws Exception {
         // 1 登陆获取cookies
         // 1.0 https://www.javbus.com/forum/forum.php
         Connection.Response forumResponse = JsoupUtil.requestBody(javbusConstantMap.get("forum_url"), JsoupUtil.HTTP_GET, proxy, null);
@@ -162,10 +292,8 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
         }
         String loginUrl = javbusConstantMap.get("member_url").concat(sbParams);
         Connection.Response loginResponse = JsoupUtil.requestBody(loginUrl, JsoupUtil.HTTP_POST, proxy, forumCookies, headerParams, params);
-
+        String loginBody = loginResponse.body();
         Map<String, String> loginCookies = loginResponse.cookies();
-        System.out.println("loginCookies=" + loginCookies);
-        System.out.println(loginResponse.body());
         log.warn("generateJavbusCookies=>,loginUrl={},params={},forumCookies={},loginCookies={},loginResponseBody={}", loginUrl, params, forumCookies, loginCookies, loginResponse.body());
         for (Map.Entry<String, String> loginCookie : loginCookies.entrySet()) {
             if (loginCookie.getKey().contains("ulastactivity")) {
@@ -180,6 +308,12 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
         }
         log.warn("generateJavbusCookies=>,forumFinalCookies={}", forumCookies);
         javbusCookiesMap = forumCookies;
+
+        if (loginBody.contains("歡迎您回來")) {
+            return true;
+        } else {
+            return false;
+        }
     }
 
     public String getAuth(String ak, String sk) {

+ 305 - 302
src/main/java/top/lvzhiqiang/util/DateUtils.java

@@ -1,5 +1,6 @@
 /**
  * All rights Reserved, Designed By 上海风声企业信用征信有限公司
+ *
  * @Title: DateUtils.java
  * @Package com.riskraider.fxld.utils
  * @Description: TODO(用一句话描述该文件做什么)
@@ -29,304 +30,306 @@ import java.util.Date;
  */
 public class DateUtils {
 
-	/**
-	 * (精确到天的)日期样式
-	 */
-	public static final String PATTERN_TO_DAYS = "yyyy-MM-dd";
-	public static final String PATTERN_TO_DAYS2 = "yyyy.MM.dd";
-
-	/**
-	 * (精确到秒的)日期样式
-	 */
-	public static final String PATTERN_TO_SECONDS = "yyyy-MM-dd HH:mm:ss";
-
-	public static final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern(PATTERN_TO_DAYS);
-	public static final DateTimeFormatter dateFormatter2 = DateTimeFormatter.ofPattern(PATTERN_TO_DAYS2);
-
-	public static final DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern(PATTERN_TO_SECONDS);
-
-	public static final DateTimeFormatter dateTimeFormatter2 = DateTimeFormatter.ISO_OFFSET_DATE_TIME;
-
-	/**
-	* @Title: getToday 
-	* @Description: 获取今天的日期
-	* @param @return 设定文件 
-	* @return Date 返回类型 
-	* @throws 
-	* @author shiyong
-	* @date 2017年8月24日 下午2:11:03
-	*/
-	public static Date getToday() {
-		Calendar cal = Calendar.getInstance();
-		
-		return cal.getTime();
-	}
-	
-	/**
-	 * @Title: getYesterday
-	 * @Description: 获取昨天的日期
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午7:18:38
-	 * @param: @return
-	 * @return: Date
-	 * @throws
-	 */
-	public static Date getYesterday() {
-		Calendar cal = Calendar.getInstance();
-		cal.add(Calendar.DATE, -1);
-		
-		return cal.getTime();
-	}
-	
-	/**
-	 * @Title: getTomorrow
-	 * @Description: 获取明天的日期
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午7:19:28
-	 * @param: @return
-	 * @return: Date
-	 * @throws
-	 */
-	public static Date getTomorrow() {
-		Calendar cal = Calendar.getInstance();
-		cal.add(Calendar.DATE, 1);
-		
-		return cal.getTime();
-	}
-	
-	/**
-	 * @Title: getTodayStr
-	 * @Description: 获取今天的日期字符串
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午7:21:51
-	 * @param: @param pattern
-	 * @param: @return
-	 * @return: String
-	 * @throws
-	 */
-	public static String getTodayStr(String pattern){
-		SimpleDateFormat format = new SimpleDateFormat(pattern);		
-		
-		return format.format(getToday());
-	}
-	
-	/**
-	 * @Title: getTodayStr
-	 * @Description: 获取今天的日期字符串
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午7:33:22
-	 * @param: @return
-	 * @return: String
-	 * @throws
-	 */
-	public static String getTodayStr(){
-		SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
-		
-		return format.format(getToday());
-	}
-	
-	/**
-	 * @Title: getYesterdayStr
-	 * @Description: 获取昨天的日期字符串
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午7:21:11
-	 * @param: @return
-	 * @return: String
-	 * @throws
-	 */
-	public static String getYesterdayStr(){
-		SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
-		
-		return format.format(getYesterday());
-	}
-	
-	/**
-	 * @Title: getTomorrowStr
-	 * @Description: 获取明天的日期字符串
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午7:25:00
-	 * @param: @return
-	 * @return: String
-	 * @throws
-	 */
-	public static String getTomorrowStr(){
-		SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
-		
-		return format.format(getTomorrow());
-	}
-	
-	/**
-	 * @Title: formatDate
-	 * @Description: 格式化日期
-	 * @author: 施勇
-	 * @date: 2018年1月4日 上午9:30:29
-	 * @param: @param format
-	 * @param: @param date
-	 * @param: @return
-	 * @return: String
-	 * @throws
-	 */
-	public static String formatDate(Date date) {
-		SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-		
-		return dateFormat.format(date);
-	}
-
-	public static String formatDate(LocalDate date) {
-		return dateFormatter.format(date);
-	}
-	
-	/**
-	 * @Title: formatDateTime
-	 * @Description: 格式化整个日期时间
-	 * @author: 施勇
-	 * @date: 2018年6月25日 下午1:30:24
-	 * @param: @param date
-	 * @param: @return
-	 * @return: String
-	 * @throws
-	 */
-	public static String formatDateTime(Date date) {
-		SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-		
-		return dateFormat.format(date);
-	}
-
-	public static String formatDateTime(LocalDateTime dateTime) {
-		return dateTimeFormatter.format(dateTime);
-	}
-	
-	/**
-	 * @Title: parseDate
-	 * @Description: 格式化日期字符串
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午7:40:06
-	 * @param: @param dateString
-	 * @param: @return
-	 * @param: @throws ParseException
-	 * @return: Date
-	 * @throws
-	 */
-	public static Date parseDate(String dateString) throws ParseException{
-		SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-		
-		return dateFormat.parse(dateString);
-	}
-	
-	/**
-	 * @Title: parseDateTime
-	 * @Description: 格式化时间字符串
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午7:40:24
-	 * @param: @param dateString
-	 * @param: @return
-	 * @param: @throws ParseException
-	 * @return: Date
-	 * @throws
-	 */
-	public static Date parseDateTime(String dateString) throws ParseException{
-		SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-		
-		return dateFormat.parse(dateString);
-	}
-	
-	/**
-	 * @Title: getFewDateStrsByDate
-	 * @Description: 获取近几天的日期字符串
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午8:26:40
-	 * @param: @param date
-	 * @param: @param num
-	 * @param: @return
-	 * @return: String
-	 * @throws
-	 */
-	public static String getFewDateStrsByDate(Date date, int num){
-		StringBuffer str = new StringBuffer();
-		
-		Calendar cal = Calendar.getInstance();
-		
-		cal.setTime(date);
-		
-		cal.add(Calendar.DATE, -(num-1));
-		str.append(formatDate(cal.getTime()) + ",");
-		
-		for(int i=0;i<num-1;i++){
-			cal.add(Calendar.DATE, 1);
-			
-			str.append(formatDate(cal.getTime()) + ",");
-		}
-		
-		//去掉最后的逗号
-		
-		return str.substring(0, str.length()-1);
-	}
-	
-	/**
-	 * @Title: getFewDateStrsByWeek
-	 * @Description: 获取近几周同一天的日期字符串
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午8:33:23
-	 * @param: @param date
-	 * @param: @param num
-	 * @param: @return
-	 * @return: String
-	 * @throws
-	 */
-	public static String getFewDateStrsByWeek(Date date, int num){
-		StringBuffer str = new StringBuffer();
-		
-		Calendar cal = Calendar.getInstance();
-		
-		cal.setTime(date);
-		
-		cal.add(Calendar.WEEK_OF_YEAR, -(num-1));
-		str.append(formatDate(cal.getTime()) + ",");
-		
-		for(int i=0;i<num-1;i++){
-			cal.add(Calendar.WEEK_OF_YEAR, 1);
-			
-			str.append(formatDate(cal.getTime()) + ",");
-		}
-		
-		//去掉最后的逗号
-		
-		return str.substring(0, str.length()-1);
-	}
-	
-	/**
-	 * @Title: getFewDateStrsByMonth
-	 * @Description: 获取近几月同一天的日期字符串
-	 * @author: 施勇
-	 * @date: 2019年1月17日 上午8:31:31
-	 * @param: @param date
-	 * @param: @param num
-	 * @param: @return
-	 * @return: String
-	 * @throws
-	 */
-	public static String getFewDateStrsByMonth(Date date, int num){
-		StringBuffer str = new StringBuffer();
-		
-		Calendar cal = Calendar.getInstance();
-		
-		cal.setTime(date);
-		
-		cal.add(Calendar.MONTH, -(num-1));
-		str.append(formatDate(cal.getTime()) + ",");
-		
-		for(int i=0;i<num-1;i++){
-			cal.add(Calendar.MONTH, 1);
-			
-			str.append(formatDate(cal.getTime()) + ",");
-		}
-		
-		//去掉最后的逗号
-		
-		return str.substring(0, str.length()-1);
-	}
-	
-	/**
+    /**
+     * (精确到天的)日期样式
+     */
+    public static final String PATTERN_TO_DAYS = "yyyy-MM-dd";
+    public static final String PATTERN_TO_DAYS2 = "yyyy.MM.dd";
+
+    /**
+     * (精确到秒的)日期样式
+     */
+    public static final String PATTERN_TO_SECONDS = "yyyy-MM-dd HH:mm:ss";
+    public static final String PATTERN_TO_MINUTES = "yyyy-M-d HH:mm";
+
+    public static final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern(PATTERN_TO_DAYS);
+    public static final DateTimeFormatter dateFormatter2 = DateTimeFormatter.ofPattern(PATTERN_TO_DAYS2);
+
+    public static final DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern(PATTERN_TO_SECONDS);
+    public static final DateTimeFormatter dateTimeFormatter3 = DateTimeFormatter.ofPattern(PATTERN_TO_MINUTES);
+
+    public static final DateTimeFormatter dateTimeFormatter2 = DateTimeFormatter.ISO_OFFSET_DATE_TIME;
+
+    /**
+     * @Title: getToday
+     * @Description: 获取今天的日期
+     * @param @return 设定文件
+     * @return Date 返回类型
+     * @throws
+     * @author shiyong
+     * @date 2017年8月24日 下午2:11:03
+     */
+    public static Date getToday() {
+        Calendar cal = Calendar.getInstance();
+
+        return cal.getTime();
+    }
+
+    /**
+     * @Title: getYesterday
+     * @Description: 获取昨天的日期
+     * @author: 施勇
+     * @date: 2019年1月17日 上午7:18:38
+     * @param: @return
+     * @return: Date
+     * @throws
+     */
+    public static Date getYesterday() {
+        Calendar cal = Calendar.getInstance();
+        cal.add(Calendar.DATE, -1);
+
+        return cal.getTime();
+    }
+
+    /**
+     * @Title: getTomorrow
+     * @Description: 获取明天的日期
+     * @author: 施勇
+     * @date: 2019年1月17日 上午7:19:28
+     * @param: @return
+     * @return: Date
+     * @throws
+     */
+    public static Date getTomorrow() {
+        Calendar cal = Calendar.getInstance();
+        cal.add(Calendar.DATE, 1);
+
+        return cal.getTime();
+    }
+
+    /**
+     * @Title: getTodayStr
+     * @Description: 获取今天的日期字符串
+     * @author: 施勇
+     * @date: 2019年1月17日 上午7:21:51
+     * @param: @param pattern
+     * @param: @return
+     * @return: String
+     * @throws
+     */
+    public static String getTodayStr(String pattern) {
+        SimpleDateFormat format = new SimpleDateFormat(pattern);
+
+        return format.format(getToday());
+    }
+
+    /**
+     * @Title: getTodayStr
+     * @Description: 获取今天的日期字符串
+     * @author: 施勇
+     * @date: 2019年1月17日 上午7:33:22
+     * @param: @return
+     * @return: String
+     * @throws
+     */
+    public static String getTodayStr() {
+        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
+
+        return format.format(getToday());
+    }
+
+    /**
+     * @Title: getYesterdayStr
+     * @Description: 获取昨天的日期字符串
+     * @author: 施勇
+     * @date: 2019年1月17日 上午7:21:11
+     * @param: @return
+     * @return: String
+     * @throws
+     */
+    public static String getYesterdayStr() {
+        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
+
+        return format.format(getYesterday());
+    }
+
+    /**
+     * @Title: getTomorrowStr
+     * @Description: 获取明天的日期字符串
+     * @author: 施勇
+     * @date: 2019年1月17日 上午7:25:00
+     * @param: @return
+     * @return: String
+     * @throws
+     */
+    public static String getTomorrowStr() {
+        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
+
+        return format.format(getTomorrow());
+    }
+
+    /**
+     * @Title: formatDate
+     * @Description: 格式化日期
+     * @author: 施勇
+     * @date: 2018年1月4日 上午9:30:29
+     * @param: @param format
+     * @param: @param date
+     * @param: @return
+     * @return: String
+     * @throws
+     */
+    public static String formatDate(Date date) {
+        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+
+        return dateFormat.format(date);
+    }
+
+    public static String formatDate(LocalDate date) {
+        return dateFormatter.format(date);
+    }
+
+    /**
+     * @Title: formatDateTime
+     * @Description: 格式化整个日期时间
+     * @author: 施勇
+     * @date: 2018年6月25日 下午1:30:24
+     * @param: @param date
+     * @param: @return
+     * @return: String
+     * @throws
+     */
+    public static String formatDateTime(Date date) {
+        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+
+        return dateFormat.format(date);
+    }
+
+    public static String formatDateTime(LocalDateTime dateTime) {
+        return dateTimeFormatter.format(dateTime);
+    }
+
+    /**
+     * @Title: parseDate
+     * @Description: 格式化日期字符串
+     * @author: 施勇
+     * @date: 2019年1月17日 上午7:40:06
+     * @param: @param dateString
+     * @param: @return
+     * @param: @throws ParseException
+     * @return: Date
+     * @throws
+     */
+    public static Date parseDate(String dateString) throws ParseException {
+        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+
+        return dateFormat.parse(dateString);
+    }
+
+    /**
+     * @Title: parseDateTime
+     * @Description: 格式化时间字符串
+     * @author: 施勇
+     * @date: 2019年1月17日 上午7:40:24
+     * @param: @param dateString
+     * @param: @return
+     * @param: @throws ParseException
+     * @return: Date
+     * @throws
+     */
+    public static Date parseDateTime(String dateString) throws ParseException {
+        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+
+        return dateFormat.parse(dateString);
+    }
+
+    /**
+     * @Title: getFewDateStrsByDate
+     * @Description: 获取近几天的日期字符串
+     * @author: 施勇
+     * @date: 2019年1月17日 上午8:26:40
+     * @param: @param date
+     * @param: @param num
+     * @param: @return
+     * @return: String
+     * @throws
+     */
+    public static String getFewDateStrsByDate(Date date, int num) {
+        StringBuffer str = new StringBuffer();
+
+        Calendar cal = Calendar.getInstance();
+
+        cal.setTime(date);
+
+        cal.add(Calendar.DATE, -(num - 1));
+        str.append(formatDate(cal.getTime()) + ",");
+
+        for (int i = 0; i < num - 1; i++) {
+            cal.add(Calendar.DATE, 1);
+
+            str.append(formatDate(cal.getTime()) + ",");
+        }
+
+        //去掉最后的逗号
+
+        return str.substring(0, str.length() - 1);
+    }
+
+    /**
+     * @Title: getFewDateStrsByWeek
+     * @Description: 获取近几周同一天的日期字符串
+     * @author: 施勇
+     * @date: 2019年1月17日 上午8:33:23
+     * @param: @param date
+     * @param: @param num
+     * @param: @return
+     * @return: String
+     * @throws
+     */
+    public static String getFewDateStrsByWeek(Date date, int num) {
+        StringBuffer str = new StringBuffer();
+
+        Calendar cal = Calendar.getInstance();
+
+        cal.setTime(date);
+
+        cal.add(Calendar.WEEK_OF_YEAR, -(num - 1));
+        str.append(formatDate(cal.getTime()) + ",");
+
+        for (int i = 0; i < num - 1; i++) {
+            cal.add(Calendar.WEEK_OF_YEAR, 1);
+
+            str.append(formatDate(cal.getTime()) + ",");
+        }
+
+        //去掉最后的逗号
+
+        return str.substring(0, str.length() - 1);
+    }
+
+    /**
+     * @Title: getFewDateStrsByMonth
+     * @Description: 获取近几月同一天的日期字符串
+     * @author: 施勇
+     * @date: 2019年1月17日 上午8:31:31
+     * @param: @param date
+     * @param: @param num
+     * @param: @return
+     * @return: String
+     * @throws
+     */
+    public static String getFewDateStrsByMonth(Date date, int num) {
+        StringBuffer str = new StringBuffer();
+
+        Calendar cal = Calendar.getInstance();
+
+        cal.setTime(date);
+
+        cal.add(Calendar.MONTH, -(num - 1));
+        str.append(formatDate(cal.getTime()) + ",");
+
+        for (int i = 0; i < num - 1; i++) {
+            cal.add(Calendar.MONTH, 1);
+
+            str.append(formatDate(cal.getTime()) + ",");
+        }
+
+        //去掉最后的逗号
+
+        return str.substring(0, str.length() - 1);
+    }
+
+    /**
      * 获取时间戳
      * @author shiyong
      * 2019-10-09 13:36
@@ -335,8 +338,8 @@ public class DateUtils {
     public static long getTimeStamp() {
         return System.currentTimeMillis();
     }
-	
-	public static void main(String[] args) {
-		System.out.println(getFewDateStrsByMonth(getToday(), 7));
-	}
+
+    public static void main(String[] args) {
+        System.out.println(getFewDateStrsByMonth(getToday(), 7));
+    }
 }

+ 2 - 2
src/main/resources/static/crawler.html

@@ -178,9 +178,9 @@
         <span class="font">jsoupJavbusProfile</span>
         <form method="post" action="bg/crawler/jsoupJavbusProfile">
             <span>start</span>
-            <input type="text" name="start"/>
+            <input type="text" name="start" placeholder="不可为空"/>
             <span>limit</span>
-            <input type="text" name="limit"/>
+            <input type="text" name="limit" placeholder="可为空"/>
             <input type="submit" value="提交">
         </form>
     </div>

+ 32 - 27
src/test/java/Test4Javbus.java

@@ -5,6 +5,8 @@ import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
+import top.lvzhiqiang.entity.CrawlerJavbusProfile;
+import top.lvzhiqiang.util.DateUtils;
 import top.lvzhiqiang.util.JsoupUtil;
 
 import java.io.*;
@@ -12,17 +14,18 @@ import java.net.HttpURLConnection;
 import java.net.InetSocketAddress;
 import java.net.Proxy;
 import java.net.URL;
+import java.time.LocalDateTime;
 import java.util.*;
 
 public class Test4Javbus {
     public static void main(String[] args) throws Exception {
-        // setupOne();
+        //setupOne();
         setupTwo();
     }
 
     private static void setupTwo() throws Exception {
         File file = new File("d:\\zhiqiang.lv\\Desktop", "1.html");
-        file = new File("C:\\Users\\l1024v\\Desktop", "1.html");
+        // file = new File("C:\\Users\\l1024v\\Desktop", "1.html");
         Document document = Jsoup.parse(file, "UTF-8");
 
         String avatarUrl = document.select("div.avt").select("img").attr("src");
@@ -51,17 +54,17 @@ public class Test4Javbus {
         String userGroup = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(0)
                 .select("a").text();
         String onlineTime = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
-                .select("li").first().text().replace("在線時間", "").replace("小時", "").trim();
+                .select("li:contains(在線時間)").text().replace("在線時間", "").replace("小時", "").trim();
         String registrationTime = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
-                .select("li").get(1).text().replace("註冊時間", "").trim();
+                .select("li:contains(註冊時間)").text().replace("註冊時間", "").trim();
         String lastVisit = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
-                .select("li").get(2).text().replace("最後訪問", "").trim();
+                .select("li:contains(最後訪問)").text().replace("最後訪問", "").trim();
         String lastActivityTime = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
-                .select("li").get(3).text().replace("上次活動時間", "").trim();
+                .select("li:contains(上次活動時間)").text().replace("上次活動時間", "").trim();
         String lastPublishedTime = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
-                .select("li").get(4).text().replace("上次發表時間", "").trim();
+                .select("li:contains(上次發表時間)").text().replace("上次發表時間", "").trim();
         String timeZone = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
-                .select("li").get(5).text().replace("所在時區", "").trim();
+                .select("li:contains(所在時區)").text().replace("所在時區", "").trim();
         String usedSpace = document.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
                 .select("li").get(0).text().replace("已用空間", "").replace("B", "").trim();
         String mileage = document.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
@@ -69,25 +72,27 @@ public class Test4Javbus {
         String money = document.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
                 .select("li").last().text().replace("金錢", "").trim();
 
-        System.out.println(avatarUrl);
-        System.out.println(nickName);
-        System.out.println(uid);
-        System.out.println(emailStatus);
-        System.out.println(friendNum);
-        System.out.println(replyNum);
-        System.out.println(threadNum);
-        System.out.println(signStr);
-        System.out.println(StringUtils.join(signImgList, ";"));
-        System.out.println(userGroup);
-        System.out.println(onlineTime);
-        System.out.println(registrationTime);
-        System.out.println(lastVisit);
-        System.out.println(lastActivityTime);
-        System.out.println(lastPublishedTime);
-        System.out.println(timeZone);
-        System.out.println(usedSpace);
-        System.out.println(mileage);
-        System.out.println(money);
+        CrawlerJavbusProfile crawlerJavbusProfile = new CrawlerJavbusProfile();
+        crawlerJavbusProfile.setUid(Long.valueOf(uid));
+        crawlerJavbusProfile.setNickName(nickName);
+        crawlerJavbusProfile.setEmailStatus(emailStatus);
+        crawlerJavbusProfile.setFriendNum(Integer.valueOf(friendNum));
+        crawlerJavbusProfile.setReplyNum(Integer.valueOf(replyNum));
+        crawlerJavbusProfile.setThreadNum(Integer.valueOf(threadNum));
+        crawlerJavbusProfile.setUserGroup(userGroup);
+        crawlerJavbusProfile.setOnlineTime(top.lvzhiqiang.util.StringUtils.isNotEmpty(onlineTime) ? Integer.valueOf(onlineTime) : null);
+        crawlerJavbusProfile.setRegistrationTime(top.lvzhiqiang.util.StringUtils.isNotEmpty(registrationTime) ? LocalDateTime.parse(registrationTime, DateUtils.dateTimeFormatter3) : null);
+        crawlerJavbusProfile.setLastVisit(top.lvzhiqiang.util.StringUtils.isNotEmpty(lastVisit) ? LocalDateTime.parse(lastVisit, DateUtils.dateTimeFormatter3) : null);
+        crawlerJavbusProfile.setLastActivityTime(top.lvzhiqiang.util.StringUtils.isNotEmpty(lastActivityTime) ? LocalDateTime.parse(lastActivityTime, DateUtils.dateTimeFormatter3) : null);
+        crawlerJavbusProfile.setLastPublishedTime(top.lvzhiqiang.util.StringUtils.isNotEmpty(lastPublishedTime) ? LocalDateTime.parse(lastPublishedTime, DateUtils.dateTimeFormatter3) : null);
+        crawlerJavbusProfile.setTimeZone(top.lvzhiqiang.util.StringUtils.isNotEmpty(timeZone) ? timeZone : null);
+        crawlerJavbusProfile.setUsedSpace(Integer.valueOf(usedSpace));
+        crawlerJavbusProfile.setMileage(Integer.valueOf(mileage));
+        crawlerJavbusProfile.setMoney(Integer.valueOf(money));
+        crawlerJavbusProfile.setAvatarUrl(avatarUrl);
+        crawlerJavbusProfile.setSignStr(signStr);
+        crawlerJavbusProfile.setSignImg(org.apache.commons.lang3.StringUtils.join(signImgList, ","));
+        System.out.println(crawlerJavbusProfile);
     }
 
     private static void setupOne() throws Exception {