|
|
@@ -1,6 +1,8 @@
|
|
|
package top.lvzhiqiang.service.impl;
|
|
|
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.github.pagehelper.PageHelper;
|
|
|
+import com.github.pagehelper.PageInfo;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.jsoup.Connection;
|
|
|
import org.jsoup.HttpStatusException;
|
|
|
@@ -116,14 +118,74 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
|
|
|
log.warn("handleJavbusLog 结束:time={}", stopWatch.getTotalTimeSeconds());
|
|
|
}
|
|
|
|
|
|
+ @Override
|
|
|
+ public String findJavbusProfile(String keyword, Integer timeDay, Integer pic, String orderField, String order, Integer pageNo, Integer pageSize) {
|
|
|
+ Map<String, Object> params = new HashMap<>();
|
|
|
+ params.put("keyword", keyword);
|
|
|
+ params.put("timeDay", timeDay);
|
|
|
+ params.put("orderField", orderField);
|
|
|
+ params.put("order", order);
|
|
|
+
|
|
|
+ PageHelper.startPage(pageNo, pageSize);
|
|
|
+ List<CrawlerJavbusProfile> crawlerJavbusProfileList = crawlerJavbusProfileMapper.findJavbusProfile4MultipleParams(params);
|
|
|
+ PageInfo<CrawlerJavbusProfile> javbusProfilePageInfo = new PageInfo<>(crawlerJavbusProfileList);
|
|
|
+
|
|
|
+ StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(javbusProfilePageInfo.getTotal())).concat("<br/>"));
|
|
|
+ sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>UID</th><th>昵称</th><th>邮箱状态</th><th>好友数</th><th>回帖数</th><th>主题数</th><th>用户组</th><th>在线时间</th><th>注册时间</th><th>上次活动时间</th><th>上次发表时间</th><th>所在时区</th><th>头像</th><th>个人签名文字</th><th>个人签名图片</th></tr>");
|
|
|
+ for (CrawlerJavbusProfile crawlerJavbusProfile : crawlerJavbusProfileList) {
|
|
|
+ sb.append("<tr>");
|
|
|
+
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getUid()).append("</td>");
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getEmailStatus()).append("</td>");
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getFriendNum()).append("</td>");
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getReplyNum()).append("</td>");
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getThreadNum()).append("</td>");
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getUserGroup()).append("</td>");
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getOnlineTime()).append("</td>");
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getRegistrationTime()).append("</td>");
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getLastActivityTime()).append("</td>");
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getLastPublishedTime()).append("</td>");
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getTimeZone()).append("</td>");
|
|
|
+
|
|
|
+ if (pic == 2) {
|
|
|
+ sb.append("<td>").append("<img src=\"" + crawlerJavbusProfile.getAvatarUrl() + "\" alt=\"封面\" width=\"147\" height=\"auto\">").append("</td>");
|
|
|
+ } else {
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getAvatarUrl()).append("</td>");
|
|
|
+ }
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getSignStr()).append("</td>");
|
|
|
+
|
|
|
+ if (pic == 2) {
|
|
|
+ sb.append("<td>");
|
|
|
+ String signImg = crawlerJavbusProfile.getSignImg();
|
|
|
+ if (StringUtils.isNotEmpty(signImg)) {
|
|
|
+ for (String s : signImg.split(",")) {
|
|
|
+ sb.append("<img src=\"" + s + "\" alt=\"sign\" width=\"147\" height=\"auto\">");
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ sb.append("--");
|
|
|
+ }
|
|
|
+ sb.append("</td>");
|
|
|
+ } else {
|
|
|
+ if (StringUtils.isNotEmpty(crawlerJavbusProfile.getSignImg())) {
|
|
|
+ sb.append("<td>").append(crawlerJavbusProfile.getSignImg()).append("</td>");
|
|
|
+ } else {
|
|
|
+ sb.append("<td>--</td>");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ sb.append("</tr>");
|
|
|
+ }
|
|
|
+ sb.append("</table>");
|
|
|
+
|
|
|
+ return sb.toString();
|
|
|
+ }
|
|
|
+
|
|
|
private void handleJavbusLogSub(Integer status) {
|
|
|
List<CrawlerJavbusLog> javbusLogByStatus = crawlerJavbusProfileMapper.findJavbusLogByStatus(status);
|
|
|
|
|
|
String profileUrl = "https://www.javbus.com/forum/?";
|
|
|
Document profileDocument;
|
|
|
for (CrawlerJavbusLog javbusLog : javbusLogByStatus) {
|
|
|
- String uid = "";
|
|
|
- String nickName = "";
|
|
|
try {
|
|
|
profileDocument = JsoupUtil.requestDocument(profileUrl.concat(javbusLog.getBusinessKey()), JsoupUtil.HTTP_GET, proxy, javbusCookiesMap, null, null);
|
|
|
if (profileDocument.html().contains("您指定的用戶空間不存在")) {
|
|
|
@@ -131,21 +193,15 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- String[] mbn0Arr = profileDocument.select("div.u_profile").select("div.cl").get(0).select("h2.mbn").get(0).text().replace("(", "").replace(")", "").split("UID:");
|
|
|
- nickName = mbn0Arr[0].trim();
|
|
|
- uid = mbn0Arr[1].trim();
|
|
|
-
|
|
|
CrawlerJavbusProfile crawlerJavbusProfile = new CrawlerJavbusProfile();
|
|
|
- crawlerJavbusProfile.setUid(Long.valueOf(uid));
|
|
|
- crawlerJavbusProfile.setNickName(nickName);
|
|
|
parseJavbusProfile(profileDocument, crawlerJavbusProfile);
|
|
|
crawlerJavbusProfileMapper.insertOrUpdate(crawlerJavbusProfile);
|
|
|
- log.warn("jsoupJavbusProfileSub成功插入,uid={}", uid);
|
|
|
+ log.warn("jsoupJavbusProfileSub成功插入,businessKey={}", javbusLog.getBusinessKey());
|
|
|
|
|
|
javbusLog.setStatus(2);
|
|
|
javbusLog.setErrorMsg("");
|
|
|
} catch (Exception e) {
|
|
|
- log.error("jsoupJavbusProfileSub插入异常,nickName={},uid={}", nickName, uid, e);
|
|
|
+ log.error("jsoupJavbusProfileSub插入异常,businessKey={}", javbusLog.getBusinessKey(), e);
|
|
|
javbusLog.setStatus(3);
|
|
|
javbusLog.setErrorMsg(e.getMessage());
|
|
|
}
|
|
|
@@ -154,7 +210,7 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private void jsoupJavbusProfileSub(Long start, Integer limit) throws Exception {
|
|
|
+ private void jsoupJavbusProfileSub(Long start, Integer limit) {
|
|
|
CrawlerJavbusProfile latestJavbusProfile = crawlerJavbusProfileMapper.findLatestInfo();
|
|
|
if (start == null && latestJavbusProfile == null) {
|
|
|
start = 1L;
|
|
|
@@ -180,42 +236,40 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- String uid = "";
|
|
|
- String nickName = "";
|
|
|
try {
|
|
|
profileDocument = JsoupUtil.requestDocument(profileUrl.concat(String.valueOf(start)), JsoupUtil.HTTP_GET, proxy, javbusCookiesMap, null, null);
|
|
|
- start++;
|
|
|
|
|
|
if (profileDocument.html().contains("您指定的用戶空間不存在")) {
|
|
|
- log.warn("jsoupJavbusProfileSub您指定的用戶空間不存在,start={}", start - 1);
|
|
|
+ log.warn("jsoupJavbusProfileSub您指定的用戶空間不存在,start={}", start);
|
|
|
+ start++;
|
|
|
continueCount++;
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- String[] mbn0Arr = profileDocument.select("div.u_profile").select("div.cl").get(0).select("h2.mbn").get(0).text().replace("(", "").replace(")", "").split("UID:");
|
|
|
- nickName = mbn0Arr[0].trim();
|
|
|
- uid = mbn0Arr[1].trim();
|
|
|
-
|
|
|
CrawlerJavbusProfile crawlerJavbusProfile = new CrawlerJavbusProfile();
|
|
|
- crawlerJavbusProfile.setUid(Long.valueOf(uid));
|
|
|
- crawlerJavbusProfile.setNickName(nickName);
|
|
|
parseJavbusProfile(profileDocument, crawlerJavbusProfile);
|
|
|
crawlerJavbusProfileMapper.insertOrUpdate(crawlerJavbusProfile);
|
|
|
- log.warn("jsoupJavbusProfileSub成功插入,uid={}", uid);
|
|
|
+ log.warn("jsoupJavbusProfileSub成功插入,start={}", start);
|
|
|
} catch (Exception e) {
|
|
|
- log.error("jsoupJavbusProfileSub插入异常,nickName={},uid={}", nickName, uid, e);
|
|
|
+ log.error("jsoupJavbusProfileSub插入异常,start={}", start, e);
|
|
|
CrawlerJavbusLog crawlerJavbusLog = new CrawlerJavbusLog();
|
|
|
crawlerJavbusLog.setType(1);
|
|
|
crawlerJavbusLog.setStatus(1);
|
|
|
- crawlerJavbusLog.setBusinessKey(uid);
|
|
|
+ crawlerJavbusLog.setBusinessKey(String.valueOf(start));
|
|
|
crawlerJavbusLog.setErrorMsg(e.getMessage());
|
|
|
crawlerJavbusProfileMapper.insertOrUpdateLog(crawlerJavbusLog);
|
|
|
}
|
|
|
+ start++;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public void parseJavbusProfile(Document profileDocument, CrawlerJavbusProfile crawlerJavbusProfile) {
|
|
|
String avatarUrl = profileDocument.select("div.avt").select("img").attr("src");
|
|
|
+
|
|
|
+ String[] mbn0Arr = profileDocument.select("div.u_profile").select("div.cl").get(0).select("h2.mbn").get(0).text().replace("(", "").replace(")", "").split("UID:");
|
|
|
+ String nickName = mbn0Arr[0].trim();
|
|
|
+ String uid = mbn0Arr[1].trim();
|
|
|
+
|
|
|
String emailStatus = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").first().text().replace("郵箱狀態", "").trim();
|
|
|
|
|
|
Elements signEles = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").get(1).select("li:contains(個人簽名)");
|
|
|
@@ -256,6 +310,8 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
|
|
|
String money = profileDocument.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
|
|
|
.select("li").last().text().replace("金錢", "").trim();
|
|
|
|
|
|
+ crawlerJavbusProfile.setUid(Long.valueOf(uid));
|
|
|
+ crawlerJavbusProfile.setNickName(nickName);
|
|
|
crawlerJavbusProfile.setEmailStatus(emailStatus);
|
|
|
crawlerJavbusProfile.setFriendNum(Integer.valueOf(friendNum));
|
|
|
crawlerJavbusProfile.setReplyNum(Integer.valueOf(replyNum));
|