|
|
@@ -1,18 +1,32 @@
|
|
|
package top.lvzhiqiang.service.impl;
|
|
|
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.jsoup.Connection;
|
|
|
+import org.jsoup.HttpStatusException;
|
|
|
+import org.jsoup.Jsoup;
|
|
|
import org.jsoup.nodes.Document;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.util.StopWatch;
|
|
|
+import top.lvzhiqiang.entity.DicCode;
|
|
|
import top.lvzhiqiang.mapper.DicCodeMapper;
|
|
|
import top.lvzhiqiang.service.Crawler4JavbusService;
|
|
|
import top.lvzhiqiang.util.JsoupUtil;
|
|
|
+import top.lvzhiqiang.util.StringUtils;
|
|
|
|
|
|
import javax.annotation.Resource;
|
|
|
+import java.io.BufferedReader;
|
|
|
+import java.io.InputStreamReader;
|
|
|
+import java.net.HttpURLConnection;
|
|
|
import java.net.InetSocketAddress;
|
|
|
import java.net.Proxy;
|
|
|
+import java.net.URL;
|
|
|
+import java.util.Base64;
|
|
|
import java.util.HashMap;
|
|
|
+import java.util.List;
|
|
|
import java.util.Map;
|
|
|
+import java.util.stream.Collectors;
|
|
|
|
|
|
/**
|
|
|
* Crawler Javbus ServiceImpl
|
|
|
@@ -29,10 +43,20 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
|
|
|
@Value("${spring.profiles.active}")
|
|
|
private String env;
|
|
|
|
|
|
- protected static Map<String, String> cookies = null;
|
|
|
+ Map<String, String> javbusConstantMap;
|
|
|
+ Map<String, String> javbusCookiesMap = null;
|
|
|
+ private String bdAccessToken = "";
|
|
|
|
|
|
@Override
|
|
|
- public void jsoupJavbusMemberInfo() throws Exception {
|
|
|
+ public void jsoupJavbusProfile(String start, String limit) throws Exception {
|
|
|
+ log.warn("jsoupJavbusProfile 开始:start={},limit={}", start, limit);
|
|
|
+ StopWatch stopWatch = new StopWatch();
|
|
|
+ stopWatch.start();
|
|
|
+
|
|
|
+ // 获取javbus常量MAP
|
|
|
+ javbusConstantMap = dicCodeMapper.findAll().stream()
|
|
|
+ .filter(x -> "javbus".equals(x.getCodeDesc()) && x.getEnv().contains(env))
|
|
|
+ .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
|
|
|
// 代理及TOKEN设置
|
|
|
Proxy proxy;
|
|
|
if ("dev".equals(env)) {
|
|
|
@@ -40,10 +64,166 @@ public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
|
|
|
} else {
|
|
|
proxy = Proxy.NO_PROXY;
|
|
|
}
|
|
|
+ if (StringUtils.isEmpty(bdAccessToken)) {
|
|
|
+ bdAccessToken = getAuth(javbusConstantMap.get("bd_ak"), javbusConstantMap.get("bd_sk"));
|
|
|
+ }
|
|
|
+ if (null == javbusCookiesMap) {
|
|
|
+ generateJavbusCookies(proxy);
|
|
|
+ }
|
|
|
+ // 获取个人资料
|
|
|
+ Connection.Response memberInfoResponse = JsoupUtil.requestBody("https://www.javbus.com/forum/?355292", JsoupUtil.HTTP_GET, proxy, javbusCookiesMap, null, null);
|
|
|
+ System.out.println(memberInfoResponse.body());
|
|
|
|
|
|
+ }
|
|
|
|
|
|
+ private void generateJavbusCookies(Proxy proxy) throws Exception {
|
|
|
+ // 1 登陆获取cookies
|
|
|
+ // 1.0 https://www.javbus.com/forum/forum.php
|
|
|
+ Connection.Response forumResponse = JsoupUtil.requestBody(javbusConstantMap.get("forum_url"), JsoupUtil.HTTP_GET, proxy, null);
|
|
|
+ Map<String, String> forumCookies = forumResponse.cookies();
|
|
|
+ log.warn("generateJavbusCookies=>,forum_url={},forumCookies={}", javbusConstantMap.get("forum_url"), forumCookies);
|
|
|
+ // 1.1 https://www.javbus.com/forum/member.php
|
|
|
+ Map<String, String> params = new HashMap<>(8);
|
|
|
+ params.put("mod", "logging");
|
|
|
+ params.put("action", "login");
|
|
|
+ params.put("referer", "");
|
|
|
+ params.put("infloat", "yes");
|
|
|
+ params.put("handlekey", "login");
|
|
|
+ params.put("inajax", "1");
|
|
|
+ params.put("ajaxtarget", "fwin_content_login");
|
|
|
+ String memberHtmlStr = JsoupUtil.requestDocument(javbusConstantMap.get("member_url"), JsoupUtil.HTTP_GET, proxy, forumCookies, null, params).html().replace("<![CDATA[", "").replace("]]>", "");
|
|
|
+ Document memberDocument = Jsoup.parse(memberHtmlStr);
|
|
|
+ String key1 = memberDocument.select("input[type='password']").first().attr("id").split("_")[1];
|
|
|
+ String key2 = memberDocument.select("span[id^='seccode']").first().attr("id").split("_")[1];
|
|
|
+ String key3 = memberDocument.select("input[name='formhash']").first().val();
|
|
|
+ // 1.2 https://www.javbus.com/forum/misc.php
|
|
|
+ params.clear();
|
|
|
+ params.put("mod", "seccode");
|
|
|
+ params.put("action", "update");
|
|
|
+ params.put("idhash", key2);
|
|
|
+ params.put("modid", "member::logging");
|
|
|
+ Document miscDocument = JsoupUtil.requestDocument(javbusConstantMap.get("misc_url"), JsoupUtil.HTTP_GET, proxy, forumCookies, null, params);
|
|
|
+ String imgVerifyUrl = "https://www.javbus.com/forum/" + miscDocument.select("img[onclick]").first().attr("src");
|
|
|
+ // 1.3 get verifyImg
|
|
|
+ Map<String, String> headerParams = new HashMap<>(8);
|
|
|
+ headerParams.put("referer", javbusConstantMap.get("forum_url"));
|
|
|
+ Connection.Response imgResponse = JsoupUtil.requestBody(imgVerifyUrl, JsoupUtil.HTTP_GET, proxy, forumCookies, headerParams, null);
|
|
|
+ byte[] imgBytes = imgResponse.bodyAsBytes();
|
|
|
+ Map<String, String> imgCookies = imgResponse.cookies();
|
|
|
+ log.warn("generateJavbusCookies=>,imgVerifyUrl={},imgCookies={}", imgVerifyUrl, imgCookies);
|
|
|
+ String cookieKey4Seccode = "";
|
|
|
+ for (Map.Entry<String, String> imgCookie : imgCookies.entrySet()) {
|
|
|
+ if (imgCookie.getKey().contains("seccode")) {
|
|
|
+ cookieKey4Seccode = imgCookie.getKey();
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 1.4 get imgVerifyNumber by BaiduOCR
|
|
|
+ headerParams.clear();
|
|
|
+ headerParams.put("Content-Type", "application/x-www-form-urlencoded");
|
|
|
+ params.clear();
|
|
|
+ params.put("image", Base64.getEncoder().encodeToString(imgBytes));
|
|
|
+ JSONObject crAccurateBasicResult = null;
|
|
|
+ String seccodeverify = "";
|
|
|
+ for (int i = 0; i < 3; i++) {
|
|
|
+ try {
|
|
|
+ Connection.Response ocrResponse = JsoupUtil.requestBody(javbusConstantMap.get("bd_ocr_url").concat("?access_token=").concat(bdAccessToken),
|
|
|
+ JsoupUtil.HTTP_POST, Proxy.NO_PROXY, headerParams, params);
|
|
|
+ crAccurateBasicResult = JSONObject.parseObject(ocrResponse.body());
|
|
|
+ seccodeverify = crAccurateBasicResult.getJSONArray("words_result").getJSONObject(0).getString("words");
|
|
|
+ break;
|
|
|
+ } catch (HttpStatusException hse) {
|
|
|
+ bdAccessToken = getAuth(javbusConstantMap.get("bd_ak"), javbusConstantMap.get("bd_sk"));
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("BaiduOCR异常,bdOcrUrl={},bdAccessToken={},crAccurateBasicResult={}", javbusConstantMap.get("bd_ocr_url"), bdAccessToken, crAccurateBasicResult, e);
|
|
|
+ if (i == 2) {
|
|
|
+ throw new Exception("BaiduOCR异常!");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 1.5 https://www.javbus.com/forum/member.php
|
|
|
+ String sbParams = "?mod=logging&action=login&loginsubmit=yes&handlekey=login&loginhash=" + key1 + "&inajax=1";
|
|
|
+ headerParams.clear();
|
|
|
+ headerParams.put("Content-Type", "application/x-www-form-urlencoded");
|
|
|
+ params.clear();
|
|
|
+ params.put("formhash", key3);
|
|
|
+ params.put("referer", javbusConstantMap.get("forum_url"));
|
|
|
+ params.put("loginfield", "username");
|
|
|
+ params.put("username", javbusConstantMap.get("username"));
|
|
|
+ params.put("password", javbusConstantMap.get("password"));
|
|
|
+ params.put("questionid", "0");
|
|
|
+ params.put("answer", "");
|
|
|
+ params.put("seccodehash", key2);
|
|
|
+ params.put("seccodemodid", "member::logging");
|
|
|
+ params.put("seccodeverify", seccodeverify);
|
|
|
+ if (cookieKey4Seccode != "") {
|
|
|
+ forumCookies.put("existmag", "mag");
|
|
|
+ forumCookies.put(cookieKey4Seccode, imgCookies.get(cookieKey4Seccode));
|
|
|
+ }
|
|
|
+ String loginUrl = javbusConstantMap.get("member_url").concat(sbParams);
|
|
|
+ Connection.Response loginResponse = JsoupUtil.requestBody(loginUrl, JsoupUtil.HTTP_POST, proxy, forumCookies, headerParams, params);
|
|
|
+
|
|
|
+ Map<String, String> loginCookies = loginResponse.cookies();
|
|
|
+ System.out.println("loginCookies=" + loginCookies);
|
|
|
+ System.out.println(loginResponse.body());
|
|
|
+ log.warn("generateJavbusCookies=>,loginUrl={},params={},forumCookies={},loginCookies={},loginResponseBody={}", loginUrl, params, forumCookies, loginCookies, loginResponse.body());
|
|
|
+ for (Map.Entry<String, String> loginCookie : loginCookies.entrySet()) {
|
|
|
+ if (loginCookie.getKey().contains("ulastactivity")) {
|
|
|
+ forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
|
|
|
+ } else if (loginCookie.getKey().contains("auth")) {
|
|
|
+ forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
|
|
|
+ } else if (loginCookie.getKey().contains("lastcheckfeed")) {
|
|
|
+ forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
|
|
|
+ } else if (loginCookie.getKey().contains("lip")) {
|
|
|
+ forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ log.warn("generateJavbusCookies=>,forumFinalCookies={}", forumCookies);
|
|
|
+ javbusCookiesMap = forumCookies;
|
|
|
}
|
|
|
|
|
|
+ public String getAuth(String ak, String sk) {
|
|
|
+ // 获取token地址
|
|
|
+ String authHost = javbusConstantMap.get("bd_authhost_url");
|
|
|
+ String getAccessTokenUrl = authHost
|
|
|
+ // 1. grant_type为固定参数
|
|
|
+ + "grant_type=client_credentials"
|
|
|
+ // 2. 官网获取的 API Key
|
|
|
+ + "&client_id=" + ak
|
|
|
+ // 3. 官网获取的 Secret Key
|
|
|
+ + "&client_secret=" + sk;
|
|
|
+ try {
|
|
|
+ URL realUrl = new URL(getAccessTokenUrl);
|
|
|
+ // 打开和URL之间的连接
|
|
|
+ HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
|
|
|
+ connection.setRequestMethod("GET");
|
|
|
+ connection.connect();
|
|
|
+ // 获取所有响应头字段
|
|
|
+ Map<String, List<String>> map = connection.getHeaderFields();
|
|
|
+ // 遍历所有的响应头字段
|
|
|
+ /*for (String key : map.keySet()) {
|
|
|
+ System.err.println(key + "--->" + map.get(key));
|
|
|
+ }*/
|
|
|
+ // 定义 BufferedReader输入流来读取URL的响应
|
|
|
+ BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
|
|
+ String result = "";
|
|
|
+ String line;
|
|
|
+ while ((line = in.readLine()) != null) {
|
|
|
+ result += line;
|
|
|
+ }
|
|
|
+ /**
|
|
|
+ * 返回结果示例
|
|
|
+ */
|
|
|
+ System.err.println("result:" + result);
|
|
|
+ JSONObject jsonObject = JSONObject.parseObject(result);
|
|
|
+ String access_token = jsonObject.getString("access_token");
|
|
|
+ return access_token;
|
|
|
+ } catch (Exception e) {
|
|
|
+ System.err.print("获取token失败!");
|
|
|
+ e.printStackTrace(System.err);
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
|