import com.alibaba.fastjson.JSONObject; import org.apache.commons.lang3.StringUtils; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import top.lvzhiqiang.entity.CrawlerJavbusProfile; import top.lvzhiqiang.util.DateUtils; import top.lvzhiqiang.util.JsoupUtil; import java.io.*; import java.net.HttpURLConnection; import java.net.InetSocketAddress; import java.net.Proxy; import java.net.URL; import java.time.LocalDateTime; import java.util.*; public class Test4Javbus { public static void main(String[] args) throws Exception { //setupOne(); setupTwo(); } private static void setupTwo() throws Exception { File file = new File("d:\\zhiqiang.lv\\Desktop", "1.html"); // file = new File("C:\\Users\\l1024v\\Desktop", "1.html"); Document document = Jsoup.parse(file, "UTF-8"); String avatarUrl = document.select("div.avt").select("img").attr("src"); String[] mbn0Arr = document.select("div.u_profile").select("div.cl").get(0).select("h2.mbn").get(0).text().replace("(", "").replace(")", "").split("UID:"); String nickName = mbn0Arr[0].trim(); String uid = mbn0Arr[1].trim(); String emailStatus = document.select("div.u_profile").select("div.cl").get(0).select("ul").first().text().replace("郵箱狀態", "").trim(); Elements signEles = document.select("div.u_profile").select("div.cl").get(0).select("ul").get(1).select("li:contains(個人簽名)"); String signStr = ""; ArrayList signImgList = new ArrayList<>(); if (signEles.size() > 0) { signStr = signEles.first().select("table").text(); Elements signImgEles = signEles.first().select("table").select("img"); for (Element signImgEle : signImgEles) { signImgList.add(signImgEle.attr("src")); } } String friendNum = document.select("div.u_profile").select("div.cl").get(0).select("ul").get(2) .select("a").get(0).text().replace("好友數", "").trim(); String replyNum = document.select("div.u_profile").select("div.cl").get(0).select("ul").get(2) .select("a").get(1).text().replace("回帖數", "").trim(); String threadNum = document.select("div.u_profile").select("div.cl").get(0).select("ul").get(2) .select("a").get(2).text().replace("主題數", "").trim(); String userGroup = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(0) .select("a").text(); String onlineTime = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1) .select("li:contains(在線時間)").text().replace("在線時間", "").replace("小時", "").trim(); String registrationTime = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1) .select("li:contains(註冊時間)").text().replace("註冊時間", "").trim(); String lastVisit = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1) .select("li:contains(最後訪問)").text().replace("最後訪問", "").trim(); String lastActivityTime = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1) .select("li:contains(上次活動時間)").text().replace("上次活動時間", "").trim(); String lastPublishedTime = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1) .select("li:contains(上次發表時間)").text().replace("上次發表時間", "").trim(); String timeZone = document.select("div.u_profile").select("div.cl").get(1).select("ul").get(1) .select("li:contains(所在時區)").text().replace("所在時區", "").trim(); String usedSpace = document.select("div.u_profile").select("div.cl").get(2).select("ul").get(0) .select("li").get(0).text().replace("已用空間", "").replace("B", "").trim(); String mileage = document.select("div.u_profile").select("div.cl").get(2).select("ul").get(0) .select("li").get(1).text().replace("里程", "").trim(); String money = document.select("div.u_profile").select("div.cl").get(2).select("ul").get(0) .select("li").last().text().replace("金錢", "").trim(); CrawlerJavbusProfile crawlerJavbusProfile = new CrawlerJavbusProfile(); crawlerJavbusProfile.setUid(Long.valueOf(uid)); crawlerJavbusProfile.setNickName(nickName); crawlerJavbusProfile.setEmailStatus(emailStatus); crawlerJavbusProfile.setFriendNum(Integer.valueOf(friendNum)); crawlerJavbusProfile.setReplyNum(Integer.valueOf(replyNum)); crawlerJavbusProfile.setThreadNum(Integer.valueOf(threadNum)); crawlerJavbusProfile.setUserGroup(userGroup); crawlerJavbusProfile.setOnlineTime(top.lvzhiqiang.util.StringUtils.isNotEmpty(onlineTime) ? Integer.valueOf(onlineTime) : null); crawlerJavbusProfile.setRegistrationTime(top.lvzhiqiang.util.StringUtils.isNotEmpty(registrationTime) ? LocalDateTime.parse(registrationTime, DateUtils.dateTimeFormatter3) : null); crawlerJavbusProfile.setLastVisit(top.lvzhiqiang.util.StringUtils.isNotEmpty(lastVisit) ? LocalDateTime.parse(lastVisit, DateUtils.dateTimeFormatter3) : null); crawlerJavbusProfile.setLastActivityTime(top.lvzhiqiang.util.StringUtils.isNotEmpty(lastActivityTime) ? LocalDateTime.parse(lastActivityTime, DateUtils.dateTimeFormatter3) : null); crawlerJavbusProfile.setLastPublishedTime(top.lvzhiqiang.util.StringUtils.isNotEmpty(lastPublishedTime) ? LocalDateTime.parse(lastPublishedTime, DateUtils.dateTimeFormatter3) : null); crawlerJavbusProfile.setTimeZone(top.lvzhiqiang.util.StringUtils.isNotEmpty(timeZone) ? timeZone : null); crawlerJavbusProfile.setUsedSpace(Integer.valueOf(usedSpace)); crawlerJavbusProfile.setMileage(Integer.valueOf(mileage)); crawlerJavbusProfile.setMoney(Integer.valueOf(money)); crawlerJavbusProfile.setAvatarUrl(avatarUrl); crawlerJavbusProfile.setSignStr(signStr); crawlerJavbusProfile.setSignImg(org.apache.commons.lang3.StringUtils.join(signImgList, ",")); System.out.println(crawlerJavbusProfile); } private static void setupOne() throws Exception { // 代理及TOKEN设置 Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1080)); // 1 登陆获取cookies // 1.0 https://www.javbus.com/forum/forum.php Connection.Response forumResponse = JsoupUtil.requestBody("https://www.javbus.com/forum/forum.php", JsoupUtil.HTTP_GET, proxy, null); Map forumCookies = forumResponse.cookies(); System.out.println("forumCookies=" + forumCookies); // 1.1 https://www.javbus.com/forum/member.php String memberUrl = "https://www.javbus.com/forum/member.php"; Map params = new HashMap<>(); params.put("mod", "logging"); params.put("action", "login"); params.put("referer", ""); params.put("infloat", "yes"); params.put("handlekey", "login"); params.put("inajax", "1"); params.put("ajaxtarget", "fwin_content_login"); String memberHtmlStr = JsoupUtil.requestDocument(memberUrl, JsoupUtil.HTTP_GET, proxy, forumCookies, null, params).html().replace("", ""); Document memberDocument = Jsoup.parse(memberHtmlStr); String key1 = memberDocument.select("input[type='password']").first().attr("id").split("_")[1]; String key2 = memberDocument.select("span[id^='seccode']").first().attr("id").split("_")[1]; String key3 = memberDocument.select("input[name='formhash']").first().val(); // 1.2 https://www.javbus.com/forum/misc.php String miscUrl = "https://www.javbus.com/forum/misc.php"; params.clear(); params.put("mod", "seccode"); params.put("action", "update"); params.put("idhash", key2); params.put("modid", "member::logging"); Document miscDocument = JsoupUtil.requestDocument(miscUrl, JsoupUtil.HTTP_GET, proxy, forumCookies, null, params); String imgVerifyUrl = "https://www.javbus.com/forum/" + miscDocument.select("img[onclick]").first().attr("src"); System.out.println("imgVerifyUrl=" + imgVerifyUrl); // 1.3 get imgVerifyUrl Map headerParams = new HashMap<>(); headerParams.put("referer", "https://www.javbus.com/forum/forum.php"); Connection.Response imgResponse = JsoupUtil.requestBody(imgVerifyUrl, JsoupUtil.HTTP_GET, proxy, forumCookies, headerParams, null); byte[] imgBytes = imgResponse.bodyAsBytes(); Map imgCookies = imgResponse.cookies(); System.out.println("imgCookies=" + imgCookies); String cookieKey4Seccode = ""; for (Map.Entry imgCookie : imgCookies.entrySet()) { if (imgCookie.getKey().contains("seccode")) { cookieKey4Seccode = imgCookie.getKey(); break; } } // 1.4 get imgVerifyNumber by BaiduOCR { //一次最多读取1k byte[] buffer = new byte[1024]; //实际读取的长度 int readLenghth; //创建的一个写出的缓冲流 File savePathPrexFile = new File("d:\\zhiqiang.lv\\Desktop"); BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(new File(savePathPrexFile, "1.png"))); //文件逐步写入本地 BufferedInputStream bufferedInputStream = new BufferedInputStream(new ByteArrayInputStream(imgBytes)); while ((readLenghth = bufferedInputStream.read(buffer, 0, 1024)) != -1) {//先读出来,保存在buffer数组中 bufferedOutputStream.write(buffer, 0, readLenghth);//再从buffer中取出来保存到本地 } //关闭缓冲流 bufferedOutputStream.close(); bufferedInputStream.close(); } String ocrAccurateBasicUrl = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"; String accessToken = getAuth("taQYPoO9deuODxEsltkGFyqG", "lm9laVGOO14cH3sfWvtwcL1GtEC8rwS9"); headerParams.clear(); headerParams.put("Content-Type", "application/x-www-form-urlencoded"); params.clear(); params.put("image", Base64.getEncoder().encodeToString(imgBytes)); Connection.Response ocrResponse = JsoupUtil.requestBody(ocrAccurateBasicUrl.concat("?access_token=").concat(accessToken), JsoupUtil.HTTP_POST, Proxy.NO_PROXY, headerParams, params); JSONObject crAccurateBasicResult = JSONObject.parseObject(ocrResponse.body()); String seccodeverify = crAccurateBasicResult.getJSONArray("words_result").getJSONObject(0).getString("words"); // 1.5 https://www.javbus.com/forum/member.php StringBuffer sbParams = new StringBuffer(); sbParams.append("?mod=logging&action=login&loginsubmit=yes&handlekey=login&loginhash=").append(key1).append("&inajax=1"); headerParams.clear(); headerParams.put("Content-Type", "application/x-www-form-urlencoded"); params.clear(); params.put("formhash", key3); params.put("referer", "https://www.javbus.com/forum/forum.php"); params.put("loginfield", "username"); params.put("username", "Tujide.lv"); params.put("password", "Lzq920165830."); params.put("questionid", "0"); params.put("answer", ""); params.put("seccodehash", key2); params.put("seccodemodid", "member::logging"); params.put("seccodeverify", seccodeverify); System.out.println(sbParams); System.out.println(params); if (cookieKey4Seccode != "") { forumCookies.put("existmag", "mag"); forumCookies.put(cookieKey4Seccode, imgCookies.get(cookieKey4Seccode)); System.out.println("forumCookies2=" + forumCookies); } Connection.Response loginResponse = JsoupUtil.requestBody(memberUrl.concat(sbParams.toString()), JsoupUtil.HTTP_POST, proxy, forumCookies, headerParams, params); Map loginCookies = loginResponse.cookies(); System.out.println("loginCookies=" + loginCookies); System.out.println(loginResponse.body()); for (Map.Entry loginCookie : loginCookies.entrySet()) { if (loginCookie.getKey().contains("ulastactivity")) { forumCookies.put(loginCookie.getKey(), loginCookie.getValue()); } else if (loginCookie.getKey().contains("auth")) { forumCookies.put(loginCookie.getKey(), loginCookie.getValue()); } else if (loginCookie.getKey().contains("lastcheckfeed")) { forumCookies.put(loginCookie.getKey(), loginCookie.getValue()); } else if (loginCookie.getKey().contains("lip")) { forumCookies.put(loginCookie.getKey(), loginCookie.getValue()); } } System.out.println("loginCookies2=" + forumCookies); // 2 获取个人资料 Connection.Response memberInfoResponse = JsoupUtil.requestBody("https://www.javbus.com/forum/?355292", JsoupUtil.HTTP_GET, proxy, forumCookies, null, null); System.out.println("memberInfoCookies=" + memberInfoResponse.cookies()); System.out.println(memberInfoResponse.body()); } public static String getAuth(String ak, String sk) { // 获取token地址 String authHost = "https://aip.baidubce.com/oauth/2.0/token?"; String getAccessTokenUrl = authHost // 1. grant_type为固定参数 + "grant_type=client_credentials" // 2. 官网获取的 API Key + "&client_id=" + ak // 3. 官网获取的 Secret Key + "&client_secret=" + sk; try { URL realUrl = new URL(getAccessTokenUrl); // 打开和URL之间的连接 HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection(); connection.setRequestMethod("GET"); connection.connect(); // 获取所有响应头字段 Map> map = connection.getHeaderFields(); // 遍历所有的响应头字段 for (String key : map.keySet()) { System.err.println(key + "--->" + map.get(key)); } // 定义 BufferedReader输入流来读取URL的响应 BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream())); String result = ""; String line; while ((line = in.readLine()) != null) { result += line; } /** * 返回结果示例 */ System.err.println("result:" + result); JSONObject jsonObject = JSONObject.parseObject(result); String access_token = jsonObject.getString("access_token"); return access_token; } catch (Exception e) { System.err.print("获取token失败!"); e.printStackTrace(System.err); } return null; } }