Преглед на файлове

add:findJavbusProfile v1

lvzhiqiang преди 3 години
родител
ревизия
04506fef6e

+ 19 - 0
src/main/java/top/lvzhiqiang/controller/CrawlerController.java

@@ -207,4 +207,23 @@ public class CrawlerController {
         crawler4JavbusService.handleJavbusLog(status);
         return "success";
     }
+
+    /**
+     * findJavbusProfile
+     *
+     * @author lvzhiqiang
+     * 2022/10/20 11:20
+     */
+    @RequestMapping("/findJavbusProfile")
+    @ResponseBody
+    public String findJavbusProfile(String keyword, Integer timeDay, Integer pic, String orderField, String order, Integer pageNo, Integer pageSize) throws Exception {
+        if (null == pageNo) {
+            pageNo = 1;
+        }
+        if (null == pageSize) {
+            pageSize = 10;
+        }
+
+        return crawler4JavbusService.findJavbusProfile(keyword, timeDay, pic, orderField, order, pageNo, pageSize);
+    }
 }

+ 8 - 2
src/main/java/top/lvzhiqiang/mapper/CrawlerJavbusProfileMapper.java

@@ -65,8 +65,14 @@ public interface CrawlerJavbusProfileMapper {
      */
     @Select({"<script>" +
             "select * from crawler_javbus_profile WHERE delete_flag = 1" +
-            "<if test=\"nickName != null and nickName != ''\">" +
-            "   and nick_name like concat('%',#{nickName},'%')" +
+            "<if test=\"keyword != null and keyword != ''\">" +
+            "   and (uid like concat('%',#{keyword},'%') or nick_name like concat('%',#{keyword},'%') or sign_str like concat('%',#{keyword},'%'))" +
+            "</if>" +
+            "<if test=\"timeDay != null \">" +
+            "   and date(registration_time) = date_sub(CURRENT_DATE,interval #{timeDay} day)" +
+            "</if>" +
+            "<if test=\"order != null and order != ''\">" +
+            "   order by ${orderField} ${order}" +
             "</if>" +
             "</script>"})
     List<CrawlerJavbusProfile> findJavbusProfile4MultipleParams(Map<String, Object> params);

+ 2 - 0
src/main/java/top/lvzhiqiang/service/Crawler4JavbusService.java

@@ -11,4 +11,6 @@ public interface Crawler4JavbusService {
     void jsoupJavbusProfile(Long start, Integer limit) throws Exception;
 
     void handleJavbusLog(Integer status) throws Exception;
+
+    String findJavbusProfile(String keyword, Integer timeDay, Integer pic, String orderField, String order, Integer pageNo, Integer pageSize);
 }

+ 80 - 24
src/main/java/top/lvzhiqiang/service/impl/Crawler4JavbusServiceImpl.java

@@ -1,6 +1,8 @@
 package top.lvzhiqiang.service.impl;
 
 import com.alibaba.fastjson.JSONObject;
+import com.github.pagehelper.PageHelper;
+import com.github.pagehelper.PageInfo;
 import lombok.extern.slf4j.Slf4j;
 import org.jsoup.Connection;
 import org.jsoup.HttpStatusException;
@@ -116,14 +118,74 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
         log.warn("handleJavbusLog 结束:time={}", stopWatch.getTotalTimeSeconds());
     }
 
+    @Override
+    public String findJavbusProfile(String keyword, Integer timeDay, Integer pic, String orderField, String order, Integer pageNo, Integer pageSize) {
+        Map<String, Object> params = new HashMap<>();
+        params.put("keyword", keyword);
+        params.put("timeDay", timeDay);
+        params.put("orderField", orderField);
+        params.put("order", order);
+
+        PageHelper.startPage(pageNo, pageSize);
+        List<CrawlerJavbusProfile> crawlerJavbusProfileList = crawlerJavbusProfileMapper.findJavbusProfile4MultipleParams(params);
+        PageInfo<CrawlerJavbusProfile> javbusProfilePageInfo = new PageInfo<>(crawlerJavbusProfileList);
+
+        StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(javbusProfilePageInfo.getTotal())).concat("<br/>"));
+        sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>UID</th><th>昵称</th><th>邮箱状态</th><th>好友数</th><th>回帖数</th><th>主题数</th><th>用户组</th><th>在线时间</th><th>注册时间</th><th>上次活动时间</th><th>上次发表时间</th><th>所在时区</th><th>头像</th><th>个人签名文字</th><th>个人签名图片</th></tr>");
+        for (CrawlerJavbusProfile crawlerJavbusProfile : crawlerJavbusProfileList) {
+            sb.append("<tr>");
+
+            sb.append("<td>").append(crawlerJavbusProfile.getUid()).append("</td>");
+            sb.append("<td>").append(crawlerJavbusProfile.getEmailStatus()).append("</td>");
+            sb.append("<td>").append(crawlerJavbusProfile.getFriendNum()).append("</td>");
+            sb.append("<td>").append(crawlerJavbusProfile.getReplyNum()).append("</td>");
+            sb.append("<td>").append(crawlerJavbusProfile.getThreadNum()).append("</td>");
+            sb.append("<td>").append(crawlerJavbusProfile.getUserGroup()).append("</td>");
+            sb.append("<td>").append(crawlerJavbusProfile.getOnlineTime()).append("</td>");
+            sb.append("<td>").append(crawlerJavbusProfile.getRegistrationTime()).append("</td>");
+            sb.append("<td>").append(crawlerJavbusProfile.getLastActivityTime()).append("</td>");
+            sb.append("<td>").append(crawlerJavbusProfile.getLastPublishedTime()).append("</td>");
+            sb.append("<td>").append(crawlerJavbusProfile.getTimeZone()).append("</td>");
+
+            if (pic == 2) {
+                sb.append("<td>").append("<img src=\"" + crawlerJavbusProfile.getAvatarUrl() + "\" alt=\"封面\" width=\"147\" height=\"auto\">").append("</td>");
+            } else {
+                sb.append("<td>").append(crawlerJavbusProfile.getAvatarUrl()).append("</td>");
+            }
+            sb.append("<td>").append(crawlerJavbusProfile.getSignStr()).append("</td>");
+
+            if (pic == 2) {
+                sb.append("<td>");
+                String signImg = crawlerJavbusProfile.getSignImg();
+                if (StringUtils.isNotEmpty(signImg)) {
+                    for (String s : signImg.split(",")) {
+                        sb.append("<img src=\"" + s + "\" alt=\"sign\" width=\"147\" height=\"auto\">");
+                    }
+                } else {
+                    sb.append("--");
+                }
+                sb.append("</td>");
+            } else {
+                if (StringUtils.isNotEmpty(crawlerJavbusProfile.getSignImg())) {
+                    sb.append("<td>").append(crawlerJavbusProfile.getSignImg()).append("</td>");
+                } else {
+                    sb.append("<td>--</td>");
+                }
+            }
+
+            sb.append("</tr>");
+        }
+        sb.append("</table>");
+
+        return sb.toString();
+    }
+
     private void handleJavbusLogSub(Integer status) {
         List<CrawlerJavbusLog> javbusLogByStatus = crawlerJavbusProfileMapper.findJavbusLogByStatus(status);
 
         String profileUrl = "https://www.javbus.com/forum/?";
         Document profileDocument;
         for (CrawlerJavbusLog javbusLog : javbusLogByStatus) {
-            String uid = "";
-            String nickName = "";
             try {
                 profileDocument = JsoupUtil.requestDocument(profileUrl.concat(javbusLog.getBusinessKey()), JsoupUtil.HTTP_GET, proxy, javbusCookiesMap, null, null);
                 if (profileDocument.html().contains("您指定的用戶空間不存在")) {
@@ -131,21 +193,15 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
                     continue;
                 }
 
-                String[] mbn0Arr = profileDocument.select("div.u_profile").select("div.cl").get(0).select("h2.mbn").get(0).text().replace("(", "").replace(")", "").split("UID:");
-                nickName = mbn0Arr[0].trim();
-                uid = mbn0Arr[1].trim();
-
                 CrawlerJavbusProfile crawlerJavbusProfile = new CrawlerJavbusProfile();
-                crawlerJavbusProfile.setUid(Long.valueOf(uid));
-                crawlerJavbusProfile.setNickName(nickName);
                 parseJavbusProfile(profileDocument, crawlerJavbusProfile);
                 crawlerJavbusProfileMapper.insertOrUpdate(crawlerJavbusProfile);
-                log.warn("jsoupJavbusProfileSub成功插入,uid={}", uid);
+                log.warn("jsoupJavbusProfileSub成功插入,businessKey={}", javbusLog.getBusinessKey());
 
                 javbusLog.setStatus(2);
                 javbusLog.setErrorMsg("");
             } catch (Exception e) {
-                log.error("jsoupJavbusProfileSub插入异常,nickName={},uid={}", nickName, uid, e);
+                log.error("jsoupJavbusProfileSub插入异常,businessKey={}", javbusLog.getBusinessKey(), e);
                 javbusLog.setStatus(3);
                 javbusLog.setErrorMsg(e.getMessage());
             }
@@ -154,7 +210,7 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
         }
     }
 
-    private void jsoupJavbusProfileSub(Long start, Integer limit) throws Exception {
+    private void jsoupJavbusProfileSub(Long start, Integer limit) {
         CrawlerJavbusProfile latestJavbusProfile = crawlerJavbusProfileMapper.findLatestInfo();
         if (start == null && latestJavbusProfile == null) {
             start = 1L;
@@ -180,42 +236,40 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
                 return;
             }
 
-            String uid = "";
-            String nickName = "";
             try {
                 profileDocument = JsoupUtil.requestDocument(profileUrl.concat(String.valueOf(start)), JsoupUtil.HTTP_GET, proxy, javbusCookiesMap, null, null);
-                start++;
 
                 if (profileDocument.html().contains("您指定的用戶空間不存在")) {
-                    log.warn("jsoupJavbusProfileSub您指定的用戶空間不存在,start={}", start - 1);
+                    log.warn("jsoupJavbusProfileSub您指定的用戶空間不存在,start={}", start);
+                    start++;
                     continueCount++;
                     continue;
                 }
 
-                String[] mbn0Arr = profileDocument.select("div.u_profile").select("div.cl").get(0).select("h2.mbn").get(0).text().replace("(", "").replace(")", "").split("UID:");
-                nickName = mbn0Arr[0].trim();
-                uid = mbn0Arr[1].trim();
-
                 CrawlerJavbusProfile crawlerJavbusProfile = new CrawlerJavbusProfile();
-                crawlerJavbusProfile.setUid(Long.valueOf(uid));
-                crawlerJavbusProfile.setNickName(nickName);
                 parseJavbusProfile(profileDocument, crawlerJavbusProfile);
                 crawlerJavbusProfileMapper.insertOrUpdate(crawlerJavbusProfile);
-                log.warn("jsoupJavbusProfileSub成功插入,uid={}", uid);
+                log.warn("jsoupJavbusProfileSub成功插入,start={}", start);
             } catch (Exception e) {
-                log.error("jsoupJavbusProfileSub插入异常,nickName={},uid={}", nickName, uid, e);
+                log.error("jsoupJavbusProfileSub插入异常,start={}", start, e);
                 CrawlerJavbusLog crawlerJavbusLog = new CrawlerJavbusLog();
                 crawlerJavbusLog.setType(1);
                 crawlerJavbusLog.setStatus(1);
-                crawlerJavbusLog.setBusinessKey(uid);
+                crawlerJavbusLog.setBusinessKey(String.valueOf(start));
                 crawlerJavbusLog.setErrorMsg(e.getMessage());
                 crawlerJavbusProfileMapper.insertOrUpdateLog(crawlerJavbusLog);
             }
+            start++;
         }
     }
 
     public void parseJavbusProfile(Document profileDocument, CrawlerJavbusProfile crawlerJavbusProfile) {
         String avatarUrl = profileDocument.select("div.avt").select("img").attr("src");
+
+        String[] mbn0Arr = profileDocument.select("div.u_profile").select("div.cl").get(0).select("h2.mbn").get(0).text().replace("(", "").replace(")", "").split("UID:");
+        String nickName = mbn0Arr[0].trim();
+        String uid = mbn0Arr[1].trim();
+
         String emailStatus = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").first().text().replace("郵箱狀態", "").trim();
 
         Elements signEles = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").get(1).select("li:contains(個人簽名)");
@@ -256,6 +310,8 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
         String money = profileDocument.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
                 .select("li").last().text().replace("金錢", "").trim();
 
+        crawlerJavbusProfile.setUid(Long.valueOf(uid));
+        crawlerJavbusProfile.setNickName(nickName);
         crawlerJavbusProfile.setEmailStatus(emailStatus);
         crawlerJavbusProfile.setFriendNum(Integer.valueOf(friendNum));
         crawlerJavbusProfile.setReplyNum(Integer.valueOf(replyNum));

+ 39 - 0
src/main/resources/static/crawler.html

@@ -197,6 +197,45 @@
             <input type="submit" value="提交">
         </form>
     </div>
+    <div style="margin-right:20px;">
+        <span class="font">findJavbusProfile</span>
+        <form method="post" action="bg/crawler/findJavbusProfile">
+            <span>keyword</span>
+            <input type="text" name="keyword" placeholder="可为空"/>
+            <span>timeDay</span>
+            <input type="text" name="timeDay" placeholder="数字(天),可为空" style="width: 50px;"/>
+            <span>pic</span>
+            <select name="pic" style="height: 21.43px;">
+                <option value="1">不显示</option>
+                <option value="2">显示</option>
+            </select>
+            <span>order</span>
+            <select name="orderField" style="height: 21.43px;">
+                <option value="uid">uid</option>
+                <option value="friend_num">friend_num</option>
+                <option value="reply_num">reply_num</option>
+                <option value="thread_num">thread_num</option>
+                <option value="online_time">online_time</option>
+                <option value="registration_time">registration_time</option>
+                <option value="last_visit">last_visit</option>
+                <option value="last_activity_time">last_activity_time</option>
+                <option value="last_published_time">last_published_time</option>
+                <option value="used_space">used_space</option>
+                <option value="mileage">mileage</option>
+                <option value="money">money</option>
+                <option value="create_time">create_time</option>
+                <option value="modify_time">modify_time</option>
+            </select>
+            <select name="order" style="height: 21.43px;">
+                <option value="desc">desc</option>
+                <option value="asc">asc</option>
+            </select>
+            <span>page</span>
+            <input type="text" name="pageNo" placeholder="pageNo,默认1" style="width: 50px;"/>
+            <input type="text" name="pageSize" placeholder="pageSize,默认10" style="width: 50px;"/>
+            <input type="submit" value="提交">
+        </form>
+    </div>
 </div>
 </body>
 </html>