Pārlūkot izejas kodu

update:javbus v3

tujidelv 3 gadi atpakaļ
vecāks
revīzija
77ecda2f53

+ 20 - 0
src/main/java/top/lvzhiqiang/controller/CrawlerController.java

@@ -6,6 +6,7 @@ import org.springframework.web.bind.annotation.ResponseBody;
 import top.lvzhiqiang.dto.R;
 import top.lvzhiqiang.exception.ParameterException;
 import top.lvzhiqiang.service.Crawler4FacebookService;
+import top.lvzhiqiang.service.Crawler4JavbusService;
 import top.lvzhiqiang.service.CrawlerService;
 import top.lvzhiqiang.util.StringUtils;
 
@@ -25,6 +26,8 @@ public class CrawlerController {
     private CrawlerService crawlerService;
     @Resource
     private Crawler4FacebookService crawler4FacebookService;
+    @Resource
+    private Crawler4JavbusService crawler4JavbusService;
 
     /**
      * findXiaoeknowCourse
@@ -173,4 +176,21 @@ public class CrawlerController {
         crawler4FacebookService.jsoupFacebookGroupMemberInfo(email, password, url, limit);
         return "success";
     }
+
+    /**
+     * jsoupJavbusProfile
+     *
+     * @author lvzhiqiang
+     * 2022/10/18 22:09
+     */
+    @RequestMapping("/jsoupJavbusProfile")
+    @ResponseBody
+    public String jsoupJavbusProfile(String start, String limit) throws Exception {
+        if (StringUtils.isEmpty(start) || StringUtils.isEmpty(limit)) {
+            throw new ParameterException("start|password不能为空");
+        }
+
+        crawler4JavbusService.jsoupJavbusProfile(start, limit);
+        return "success";
+    }
 }

+ 58 - 0
src/main/java/top/lvzhiqiang/entity/CrawlerJavbusLog.java

@@ -0,0 +1,58 @@
+package top.lvzhiqiang.entity;
+
+import com.fasterxml.jackson.annotation.JsonFormat;
+import lombok.Data;
+
+import java.io.Serializable;
+import java.time.LocalDateTime;
+
+/**
+ * javbus日志表
+ *
+ * @author lvzhiqiang
+ * 2022/10/18 22:09
+ */
+@Data
+public class CrawlerJavbusLog implements Serializable {
+    /**
+     * 主键
+     */
+    private Long id;
+
+    /**
+     * 类型(1:个人资料)
+     */
+    private Integer type;
+
+    /**
+     * 状态(1:待解决,2:已解决,3:忽略)
+     */
+    private Integer status;
+
+    /**
+     * 业务key
+     */
+    private String businessKey;
+
+    /**
+     * 错误信息
+     */
+    private String errorMsg;
+
+    /**
+     * 删除标志{1:正常,2:已删除}
+     */
+    private Integer deleteFlag;
+
+    /**
+     * 创建时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime createTime;
+
+    /**
+     * 最后修改时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime modifyTime;
+}

+ 143 - 0
src/main/java/top/lvzhiqiang/entity/CrawlerJavbusProfile.java

@@ -0,0 +1,143 @@
+package top.lvzhiqiang.entity;
+
+import com.fasterxml.jackson.annotation.JsonFormat;
+import lombok.Data;
+
+import java.io.Serializable;
+import java.time.LocalDateTime;
+
+/**
+ * javbus个人资料表
+ *
+ * @author lvzhiqiang
+ * 2022/10/18 22:09
+ */
+@Data
+public class CrawlerJavbusProfile implements Serializable {
+
+    /**
+     * 主键
+     */
+    private Long id;
+
+    /**
+     * UID
+     */
+    private Long uid;
+
+    /**
+     * 昵称
+     */
+    private String nickName;
+
+    /**
+     * 邮箱状态
+     */
+    private String emailStatus;
+
+    /**
+     * 好友数
+     */
+    private Integer friendNum;
+
+    /**
+     * 回帖数
+     */
+    private Integer replyNum;
+
+    /**
+     * 主题数
+     */
+    private Integer threadNum;
+
+    /**
+     * 用户组
+     */
+    private String userGroup;
+
+    /**
+     * 在线时间,单位小时
+     */
+    private Integer onlineTime;
+
+    /**
+     * 注册时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime registrationTime;
+
+    /**
+     * 最后访问
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime lastVisit;
+
+    /**
+     * 上次活动时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime lastActivityTime;
+
+    /**
+     * 上次发表时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime lastPublishedTime;
+
+    /**
+     * 所在时区
+     */
+    private String timeZone;
+
+    /**
+     * 已用空间,单位B
+     */
+    private Integer usedSpace;
+
+    /**
+     * 里程
+     */
+    private Integer mileage;
+
+    /**
+     * 金钱
+     */
+    private Integer money;
+
+    /**
+     * 头像
+     */
+    private String avatarUrl;
+
+    /**
+     * 头像本地
+     */
+    private String avatarLocalUrl;
+
+    /**
+     * 个人签名文字
+     */
+    private String signStr;
+
+    /**
+     * 个人签名图片
+     */
+    private String signImg;
+
+    /**
+     * 删除标志{1:正常,2:已删除}
+     */
+    private Integer deleteFlag;
+
+    /**
+     * 创建时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime createTime;
+
+    /**
+     * 最后修改时间
+     */
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    private LocalDateTime modifyTime;
+}

+ 1 - 1
src/main/java/top/lvzhiqiang/service/Crawler4JavbusService.java

@@ -8,5 +8,5 @@ package top.lvzhiqiang.service;
  */
 public interface Crawler4JavbusService {
 
-    void jsoupJavbusMemberInfo() throws Exception;
+    void jsoupJavbusProfile(String start, String limit) throws Exception;
 }

+ 182 - 2
src/main/java/top/lvzhiqiang/service/impl/Crawler4JavbusServiceImpl.java

@@ -1,18 +1,32 @@
 package top.lvzhiqiang.service.impl;
 
+import com.alibaba.fastjson.JSONObject;
 import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Connection;
+import org.jsoup.HttpStatusException;
+import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
+import org.springframework.util.StopWatch;
+import top.lvzhiqiang.entity.DicCode;
 import top.lvzhiqiang.mapper.DicCodeMapper;
 import top.lvzhiqiang.service.Crawler4JavbusService;
 import top.lvzhiqiang.util.JsoupUtil;
+import top.lvzhiqiang.util.StringUtils;
 
 import javax.annotation.Resource;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
 import java.net.InetSocketAddress;
 import java.net.Proxy;
+import java.net.URL;
+import java.util.Base64;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 /**
  * Crawler Javbus ServiceImpl
@@ -29,10 +43,20 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
     @Value("${spring.profiles.active}")
     private String env;
 
-    protected static Map<String, String> cookies = null;
+    Map<String, String> javbusConstantMap;
+    Map<String, String> javbusCookiesMap = null;
+    private String bdAccessToken = "";
 
     @Override
-    public void jsoupJavbusMemberInfo() throws Exception {
+    public void jsoupJavbusProfile(String start, String limit) throws Exception {
+        log.warn("jsoupJavbusProfile 开始:start={},limit={}", start, limit);
+        StopWatch stopWatch = new StopWatch();
+        stopWatch.start();
+
+        // 获取javbus常量MAP
+        javbusConstantMap = dicCodeMapper.findAll().stream()
+                .filter(x -> "javbus".equals(x.getCodeDesc()) && x.getEnv().contains(env))
+                .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
         // 代理及TOKEN设置
         Proxy proxy;
         if ("dev".equals(env)) {
@@ -40,10 +64,166 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
         } else {
             proxy = Proxy.NO_PROXY;
         }
+        if (StringUtils.isEmpty(bdAccessToken)) {
+            bdAccessToken = getAuth(javbusConstantMap.get("bd_ak"), javbusConstantMap.get("bd_sk"));
+        }
+        if (null == javbusCookiesMap) {
+            generateJavbusCookies(proxy);
+        }
+        // 获取个人资料
+        Connection.Response memberInfoResponse = JsoupUtil.requestBody("https://www.javbus.com/forum/?355292", JsoupUtil.HTTP_GET, proxy, javbusCookiesMap, null, null);
+        System.out.println(memberInfoResponse.body());
 
+    }
 
+    private void generateJavbusCookies(Proxy proxy) throws Exception {
+        // 1 登陆获取cookies
+        // 1.0 https://www.javbus.com/forum/forum.php
+        Connection.Response forumResponse = JsoupUtil.requestBody(javbusConstantMap.get("forum_url"), JsoupUtil.HTTP_GET, proxy, null);
+        Map<String, String> forumCookies = forumResponse.cookies();
+        log.warn("generateJavbusCookies=>,forum_url={},forumCookies={}", javbusConstantMap.get("forum_url"), forumCookies);
+        // 1.1 https://www.javbus.com/forum/member.php
+        Map<String, String> params = new HashMap<>(8);
+        params.put("mod", "logging");
+        params.put("action", "login");
+        params.put("referer", "");
+        params.put("infloat", "yes");
+        params.put("handlekey", "login");
+        params.put("inajax", "1");
+        params.put("ajaxtarget", "fwin_content_login");
+        String memberHtmlStr = JsoupUtil.requestDocument(javbusConstantMap.get("member_url"), JsoupUtil.HTTP_GET, proxy, forumCookies, null, params).html().replace("<![CDATA[", "").replace("]]>", "");
+        Document memberDocument = Jsoup.parse(memberHtmlStr);
+        String key1 = memberDocument.select("input[type='password']").first().attr("id").split("_")[1];
+        String key2 = memberDocument.select("span[id^='seccode']").first().attr("id").split("_")[1];
+        String key3 = memberDocument.select("input[name='formhash']").first().val();
+        // 1.2 https://www.javbus.com/forum/misc.php
+        params.clear();
+        params.put("mod", "seccode");
+        params.put("action", "update");
+        params.put("idhash", key2);
+        params.put("modid", "member::logging");
+        Document miscDocument = JsoupUtil.requestDocument(javbusConstantMap.get("misc_url"), JsoupUtil.HTTP_GET, proxy, forumCookies, null, params);
+        String imgVerifyUrl = "https://www.javbus.com/forum/" + miscDocument.select("img[onclick]").first().attr("src");
+        // 1.3 get verifyImg
+        Map<String, String> headerParams = new HashMap<>(8);
+        headerParams.put("referer", javbusConstantMap.get("forum_url"));
+        Connection.Response imgResponse = JsoupUtil.requestBody(imgVerifyUrl, JsoupUtil.HTTP_GET, proxy, forumCookies, headerParams, null);
+        byte[] imgBytes = imgResponse.bodyAsBytes();
+        Map<String, String> imgCookies = imgResponse.cookies();
+        log.warn("generateJavbusCookies=>,imgVerifyUrl={},imgCookies={}", imgVerifyUrl, imgCookies);
+        String cookieKey4Seccode = "";
+        for (Map.Entry<String, String> imgCookie : imgCookies.entrySet()) {
+            if (imgCookie.getKey().contains("seccode")) {
+                cookieKey4Seccode = imgCookie.getKey();
+                break;
+            }
+        }
+        // 1.4 get imgVerifyNumber by BaiduOCR
+        headerParams.clear();
+        headerParams.put("Content-Type", "application/x-www-form-urlencoded");
+        params.clear();
+        params.put("image", Base64.getEncoder().encodeToString(imgBytes));
+        JSONObject crAccurateBasicResult = null;
+        String seccodeverify = "";
+        for (int i = 0; i < 3; i++) {
+            try {
+                Connection.Response ocrResponse = JsoupUtil.requestBody(javbusConstantMap.get("bd_ocr_url").concat("?access_token=").concat(bdAccessToken),
+                        JsoupUtil.HTTP_POST, Proxy.NO_PROXY, headerParams, params);
+                crAccurateBasicResult = JSONObject.parseObject(ocrResponse.body());
+                seccodeverify = crAccurateBasicResult.getJSONArray("words_result").getJSONObject(0).getString("words");
+                break;
+            } catch (HttpStatusException hse) {
+                bdAccessToken = getAuth(javbusConstantMap.get("bd_ak"), javbusConstantMap.get("bd_sk"));
+            } catch (Exception e) {
+                log.error("BaiduOCR异常,bdOcrUrl={},bdAccessToken={},crAccurateBasicResult={}", javbusConstantMap.get("bd_ocr_url"), bdAccessToken, crAccurateBasicResult, e);
+                if (i == 2) {
+                    throw new Exception("BaiduOCR异常!");
+                }
+            }
+        }
+        // 1.5 https://www.javbus.com/forum/member.php
+        String sbParams = "?mod=logging&action=login&loginsubmit=yes&handlekey=login&loginhash=" + key1 + "&inajax=1";
+        headerParams.clear();
+        headerParams.put("Content-Type", "application/x-www-form-urlencoded");
+        params.clear();
+        params.put("formhash", key3);
+        params.put("referer", javbusConstantMap.get("forum_url"));
+        params.put("loginfield", "username");
+        params.put("username", javbusConstantMap.get("username"));
+        params.put("password", javbusConstantMap.get("password"));
+        params.put("questionid", "0");
+        params.put("answer", "");
+        params.put("seccodehash", key2);
+        params.put("seccodemodid", "member::logging");
+        params.put("seccodeverify", seccodeverify);
+        if (cookieKey4Seccode != "") {
+            forumCookies.put("existmag", "mag");
+            forumCookies.put(cookieKey4Seccode, imgCookies.get(cookieKey4Seccode));
+        }
+        String loginUrl = javbusConstantMap.get("member_url").concat(sbParams);
+        Connection.Response loginResponse = JsoupUtil.requestBody(loginUrl, JsoupUtil.HTTP_POST, proxy, forumCookies, headerParams, params);
+
+        Map<String, String> loginCookies = loginResponse.cookies();
+        System.out.println("loginCookies=" + loginCookies);
+        System.out.println(loginResponse.body());
+        log.warn("generateJavbusCookies=>,loginUrl={},params={},forumCookies={},loginCookies={},loginResponseBody={}", loginUrl, params, forumCookies, loginCookies, loginResponse.body());
+        for (Map.Entry<String, String> loginCookie : loginCookies.entrySet()) {
+            if (loginCookie.getKey().contains("ulastactivity")) {
+                forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
+            } else if (loginCookie.getKey().contains("auth")) {
+                forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
+            } else if (loginCookie.getKey().contains("lastcheckfeed")) {
+                forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
+            } else if (loginCookie.getKey().contains("lip")) {
+                forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
+            }
+        }
+        log.warn("generateJavbusCookies=>,forumFinalCookies={}", forumCookies);
+        javbusCookiesMap = forumCookies;
     }
 
+    public String getAuth(String ak, String sk) {
+        // 获取token地址
+        String authHost = javbusConstantMap.get("bd_authhost_url");
+        String getAccessTokenUrl = authHost
+                // 1. grant_type为固定参数
+                + "grant_type=client_credentials"
+                // 2. 官网获取的 API Key
+                + "&client_id=" + ak
+                // 3. 官网获取的 Secret Key
+                + "&client_secret=" + sk;
+        try {
+            URL realUrl = new URL(getAccessTokenUrl);
+            // 打开和URL之间的连接
+            HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
+            connection.setRequestMethod("GET");
+            connection.connect();
+            // 获取所有响应头字段
+            Map<String, List<String>> map = connection.getHeaderFields();
+            // 遍历所有的响应头字段
+            /*for (String key : map.keySet()) {
+                System.err.println(key + "--->" + map.get(key));
+            }*/
+            // 定义 BufferedReader输入流来读取URL的响应
+            BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+            String result = "";
+            String line;
+            while ((line = in.readLine()) != null) {
+                result += line;
+            }
+            /**
+             * 返回结果示例
+             */
+            System.err.println("result:" + result);
+            JSONObject jsonObject = JSONObject.parseObject(result);
+            String access_token = jsonObject.getString("access_token");
+            return access_token;
+        } catch (Exception e) {
+            System.err.print("获取token失败!");
+            e.printStackTrace(System.err);
+        }
+        return null;
+    }
 }
 
 

+ 11 - 0
src/main/resources/static/crawler.html

@@ -173,6 +173,17 @@
             <input type="submit" value="提交">
         </form>
     </div>
+    <hr/>
+    <div style="margin-right:20px;">
+        <span class="font">jsoupJavbusProfile</span>
+        <form method="post" action="bg/crawler/jsoupJavbusProfile">
+            <span>start</span>
+            <input type="text" name="start"/>
+            <span>limit</span>
+            <input type="text" name="limit"/>
+            <input type="submit" value="提交">
+        </form>
+    </div>
 </div>
 </body>
 </html>

+ 23 - 14
src/test/java/Test4Javbus.java

@@ -1,7 +1,10 @@
 import com.alibaba.fastjson.JSONObject;
+import org.apache.commons.lang3.StringUtils;
 import org.jsoup.Connection;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
 import top.lvzhiqiang.util.JsoupUtil;
 
 import java.io.*;
@@ -9,10 +12,7 @@ import java.net.HttpURLConnection;
 import java.net.InetSocketAddress;
 import java.net.Proxy;
 import java.net.URL;
-import java.util.Base64;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 
 public class Test4Javbus {
     public static void main(String[] args) throws Exception {
@@ -22,23 +22,32 @@ public class Test4Javbus {
 
     private static void setupTwo() throws Exception {
         File file = new File("d:\\zhiqiang.lv\\Desktop", "1.html");
+        file = new File("C:\\Users\\l1024v\\Desktop", "1.html");
         Document document = Jsoup.parse(file, "UTF-8");
 
         String avatarUrl = document.select("div.avt").select("img").attr("src");
         String[] mbn0Arr = document.select("div.u_profile").select("div.cl").get(0).select("h2.mbn").get(0).text().replace("(", "").replace(")", "").split("UID:");
         String nickName = mbn0Arr[0].trim();
         String uid = mbn0Arr[1].trim();
-        String emailStatus = document.select("div.u_profile").select("div.cl").get(0).select("ul.cl").first().text().replace("郵箱狀態", "").trim();
-        String friendNum = document.select("div.u_profile").select("div.cl").get(0).select("ul.cl").get(1)
+        String emailStatus = document.select("div.u_profile").select("div.cl").get(0).select("ul").first().text().replace("郵箱狀態", "").trim();
+
+        Elements signEles = document.select("div.u_profile").select("div.cl").get(0).select("ul").get(1).select("li:contains(個人簽名)");
+        String signStr = "";
+        ArrayList<String> signImgList = new ArrayList<>();
+        if (signEles.size() > 0) {
+            signStr = signEles.first().select("table").text();
+            Elements signImgEles = signEles.first().select("table").select("img");
+            for (Element signImgEle : signImgEles) {
+                signImgList.add(signImgEle.attr("src"));
+            }
+        }
+
+        String friendNum = document.select("div.u_profile").select("div.cl").get(0).select("ul").get(2)
                 .select("a").get(0).text().replace("好友數", "").trim();
-        String replyNum = document.select("div.u_profile").select("div.cl").get(0).select("ul.cl").get(1)
+        String replyNum = document.select("div.u_profile").select("div.cl").get(0).select("ul").get(2)
                 .select("a").get(1).text().replace("回帖數", "").trim();
-        String threadNum = document.select("div.u_profile").select("div.cl").get(0).select("ul.cl").get(1)
+        String threadNum = document.select("div.u_profile").select("div.cl").get(0).select("ul").get(2)
                 .select("a").get(2).text().replace("主題數", "").trim();
-        String sex = document.select("div.u_profile").select("div.cl").get(0).select("ul.cl").get(2)
-                .select("li").first().text().replace("性別", "").trim();
-        String birthday = document.select("div.u_profile").select("div.cl").get(0).select("ul.cl").get(2)
-                .select("li").get(1).text().replace("生日", "").trim();
         String userGroup = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(0)
                 .select("a").text();
         String onlineTime = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
@@ -67,8 +76,8 @@ public class Test4Javbus {
         System.out.println(friendNum);
         System.out.println(replyNum);
         System.out.println(threadNum);
-        System.out.println(sex);
-        System.out.println(birthday);
+        System.out.println(signStr);
+        System.out.println(StringUtils.join(signImgList, ";"));
         System.out.println(userGroup);
         System.out.println(onlineTime);
         System.out.println(registrationTime);