| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601 |
- package top.lvzhiqiang.service.impl;
- import com.alibaba.fastjson.JSONObject;
- import com.github.pagehelper.PageHelper;
- import com.github.pagehelper.PageInfo;
- import com.xxl.job.core.context.XxlJobHelper;
- import lombok.extern.slf4j.Slf4j;
- import org.jsoup.Connection;
- import org.jsoup.HttpStatusException;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- import org.springframework.beans.factory.annotation.Value;
- import org.springframework.scheduling.annotation.Async;
- import org.springframework.stereotype.Service;
- import org.springframework.transaction.annotation.Propagation;
- import org.springframework.transaction.annotation.Transactional;
- import org.springframework.util.StopWatch;
- import top.lvzhiqiang.config.WebAppConfig;
- import top.lvzhiqiang.entity.CrawlerJavbusLog;
- import top.lvzhiqiang.entity.CrawlerJavbusProfile;
- import top.lvzhiqiang.entity.DicCode;
- import top.lvzhiqiang.entity.VideoSitePool;
- import top.lvzhiqiang.mapper.CrawlerJavbusProfileMapper;
- import top.lvzhiqiang.mapper.DicCodeMapper;
- import top.lvzhiqiang.mapper.VideoSitePoolMapper;
- import top.lvzhiqiang.service.Crawler4JavbusService;
- import top.lvzhiqiang.util.DateUtils;
- import top.lvzhiqiang.util.JsoupUtil;
- import top.lvzhiqiang.util.StringUtils;
- import javax.annotation.Resource;
- import java.io.BufferedReader;
- import java.io.InputStreamReader;
- import java.net.HttpURLConnection;
- import java.net.InetSocketAddress;
- import java.net.Proxy;
- import java.net.URL;
- import java.time.LocalDateTime;
- import java.util.*;
- import java.util.stream.Collectors;
- /**
- * Crawler Javbus ServiceImpl
- *
- * @author lvzhiqiang
- * 2022/10/17 14:47
- */
- @Service
- @Slf4j
- public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
- @Resource
- private DicCodeMapper dicCodeMapper;
- @Resource
- private VideoSitePoolMapper videoSitePoolMapper;
- @Resource
- private CrawlerJavbusProfileMapper crawlerJavbusProfileMapper;
- @Value("${spring.profiles.active}")
- private String env;
- Map<String, String> javbusConstantMap = null;
- Map<String, String> javbusCookiesMap = null;
- String bdAccessToken = "";
- Proxy proxy = null;
- public void beforeJavbus() throws Exception {
- if (null == proxy) {
- if ("dev".equals(env)) {
- proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1080));
- } else {
- proxy = Proxy.NO_PROXY;
- }
- }
- if (StringUtils.isEmpty(bdAccessToken)) {
- bdAccessToken = getAuth(javbusConstantMap.get("bd_ak"), javbusConstantMap.get("bd_sk"));
- }
- if (null == javbusCookiesMap) {
- for (int i = 0; i < 3; i++) {
- if (generateJavbusCookies(proxy)) {
- break;
- } else {
- javbusCookiesMap = null;
- }
- }
- if (javbusCookiesMap == null) {
- throw new Exception("获取javbusCookies失败!");
- }
- }
- }
- @Override
- public Map<String, String> getJavbusCookiesMap() throws Exception {
- // 获取javbus常量MAP
- if (javbusConstantMap == null) {
- javbusConstantMap = dicCodeMapper.findAll().stream()
- .filter(x -> "javbus".equals(x.getCodeDesc()) && x.getEnv().contains(env))
- .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
- }
- // 代理及TOKEN设置
- beforeJavbus();
- return javbusCookiesMap;
- }
- @Override
- @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
- public void checkJavbusVideoSite() {
- XxlJobHelper.log("checkVideoSite开始==============================");
- // 获取javbus官方地址
- DicCode dicCode = WebAppConfig.dicCodeList.stream().filter(x -> 2 == x.getType() && "javbus".equals(x.getCodeKey())).findFirst().get();
- if (dicCode == null) {
- XxlJobHelper.log("javbus官方站点为Null");
- return;
- }
- // 获取javbusUrlList
- List<String> javbusUrlList = videoSitePoolMapper.findUrlByType(1);
- // 获取javbusNewUrlList
- Set<String> javbusNewUrlList = new HashSet<>();
- try {
- Document document = Jsoup.connect(dicCode.getCodeValue()).timeout(50000).ignoreContentType(true)
- .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
- .header("referer", "https://www.javbus.com/").get();
- Elements ahrefList = document.select("strong:contains(防屏蔽地址)").next("a");
- for (Element element : ahrefList) {
- String text = element.text();
- XxlJobHelper.log("Jsoup获取{}防屏蔽地址:{}", dicCode.getCodeValue(), text);
- javbusNewUrlList.add(text);
- }
- } catch (Exception e) {
- log.error("Jsoup获取{}防屏蔽地址异常", dicCode.getCodeValue(), e);
- XxlJobHelper.log("Jsoup获取{}防屏蔽地址异常", dicCode.getCodeValue());
- XxlJobHelper.log(e);
- }
- if (javbusNewUrlList.size() == 0) {
- XxlJobHelper.log("javbusNewUrlList为空");
- }
- if (javbusNewUrlList.size() == 0 && javbusUrlList.size() == 0) {
- XxlJobHelper.log("javbusUrlList和javbusNewUrlList为空");
- return;
- }
- // 校验新地址
- List<String> javbusNewUrlFinalList = javbusNewUrlList.stream().filter(e -> !javbusUrlList.contains(e)).collect(Collectors.toList());
- List<VideoSitePool> videoSitePoolList = new ArrayList<>();
- VideoSitePool videoSitePool;
- for (String javbusNewUrlFinal : javbusNewUrlFinalList) {
- try {
- Jsoup.connect(javbusNewUrlFinal).timeout(50000);
- videoSitePool = new VideoSitePool();
- videoSitePool.setUrl(javbusNewUrlFinal);
- videoSitePool.setType(1);
- videoSitePoolList.add(videoSitePool);
- XxlJobHelper.log("javbusNewUrlFinalList:javbus防屏蔽地址有效!javbusUrl={}", javbusNewUrlFinal);
- } catch (Exception e) {
- log.error("javbusNewUrlFinalList:javbus防屏蔽地址失效!javbusUrl={}", javbusNewUrlFinal, e);
- XxlJobHelper.log("javbusNewUrlFinalList:javbus防屏蔽地址失效!javbusUrl={}", javbusNewUrlFinal);
- XxlJobHelper.log(e);
- }
- }
- if (videoSitePoolList.size() > 0) {
- videoSitePoolMapper.insertList(videoSitePoolList);
- }
- // 校验存量地址
- for (String javbusUrl : javbusUrlList) {
- int deleteFlag = 1;
- try {
- Jsoup.connect(javbusUrl).timeout(50000);
- XxlJobHelper.log("javbusUrlList:javbus防屏蔽地址有效!javbusUrl={}", javbusUrl);
- } catch (Exception e) {
- deleteFlag = 2;
- log.error("javbusUrlList:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl, e);
- XxlJobHelper.log("javbusUrlList:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl);
- XxlJobHelper.log(e);
- }
- videoSitePoolMapper.updateDeleteFlag(javbusUrl, deleteFlag);
- }
- XxlJobHelper.log("checkVideoSite结束==============================");
- }
- @Async
- @Override
- public void jsoupJavbusProfile(Long start, Integer limit) throws Exception {
- log.warn("jsoupJavbusProfile 开始:start={},limit={}", start, limit);
- StopWatch stopWatch = new StopWatch();
- stopWatch.start();
- // 获取javbus常量MAP
- javbusConstantMap = dicCodeMapper.findAll().stream()
- .filter(x -> "javbus".equals(x.getCodeDesc()) && x.getEnv().contains(env))
- .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
- // 代理及TOKEN设置
- beforeJavbus();
- // 获取个人资料
- jsoupJavbusProfileSub(start, limit);
- log.warn("jsoupJavbusProfile 结束:time={}", stopWatch.getTotalTimeSeconds());
- }
- @Async
- @Override
- public void handleJavbusLog(Integer status) throws Exception {
- log.warn("handleJavbusLog 开始:status={}", status);
- StopWatch stopWatch = new StopWatch();
- stopWatch.start();
- // 获取javbus常量MAP
- javbusConstantMap = dicCodeMapper.findAll().stream()
- .filter(x -> "javbus".equals(x.getCodeDesc()) && x.getEnv().contains(env))
- .collect(Collectors.toMap(DicCode::getCodeKey, DicCode::getCodeValue, (key1, key2) -> key1));
- // 代理及TOKEN设置
- beforeJavbus();
- // 获取个人资料
- handleJavbusLogSub(status);
- log.warn("handleJavbusLog 结束:time={}", stopWatch.getTotalTimeSeconds());
- }
- @Override
- public String findJavbusProfile(String keyword, Integer timeDay, Integer pic, String orderField, String order, Integer pageNo, Integer pageSize) {
- Map<String, Object> params = new HashMap<>();
- params.put("keyword", keyword);
- params.put("timeDay", timeDay);
- params.put("orderField", orderField);
- params.put("order", order);
- PageHelper.startPage(pageNo, pageSize);
- List<CrawlerJavbusProfile> crawlerJavbusProfileList = crawlerJavbusProfileMapper.findJavbusProfile4MultipleParams(params);
- PageInfo<CrawlerJavbusProfile> javbusProfilePageInfo = new PageInfo<>(crawlerJavbusProfileList);
- StringBuffer sb = new StringBuffer("total:".concat(String.valueOf(javbusProfilePageInfo.getTotal())).concat("<br/>"));
- sb.append("<table border=\"1\" cellspacing=\"0\"><tr><th>UID</th><th>昵称</th><th>邮箱状态</th><th>好友数</th><th>回帖数</th><th>主题数</th><th>用户组</th><th>在线时间</th><th>注册时间</th><th>上次活动时间</th><th>上次发表时间</th><th>所在时区</th><th>头像</th><th>个人签名文字</th><th>个人签名图片</th></tr>");
- for (CrawlerJavbusProfile crawlerJavbusProfile : crawlerJavbusProfileList) {
- sb.append("<tr>");
- sb.append("<td>").append(crawlerJavbusProfile.getUid()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getNickName()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getEmailStatus()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getFriendNum()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getReplyNum()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getThreadNum()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getUserGroup()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getOnlineTime()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getRegistrationTime()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getLastActivityTime()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getLastPublishedTime()).append("</td>");
- sb.append("<td>").append(crawlerJavbusProfile.getTimeZone()).append("</td>");
- if (pic == 2) {
- sb.append("<td>").append("<img src=\"" + crawlerJavbusProfile.getAvatarUrl() + "\" alt=\"封面\" width=\"147\" height=\"auto\" referrerpolicy=\"no-referrer\">").append("</td>");
- } else {
- sb.append("<td>").append(crawlerJavbusProfile.getAvatarUrl()).append("</td>");
- }
- sb.append("<td>").append(crawlerJavbusProfile.getSignStr()).append("</td>");
- if (pic == 2) {
- sb.append("<td>");
- String signImg = crawlerJavbusProfile.getSignImg();
- if (StringUtils.isNotEmpty(signImg)) {
- for (String s : signImg.split(",")) {
- sb.append("<img src=\"" + s + "\" alt=\"sign\" width=\"147\" height=\"auto\">");
- }
- } else {
- sb.append("--");
- }
- sb.append("</td>");
- } else {
- if (StringUtils.isNotEmpty(crawlerJavbusProfile.getSignImg())) {
- sb.append("<td>").append(crawlerJavbusProfile.getSignImg()).append("</td>");
- } else {
- sb.append("<td>--</td>");
- }
- }
- sb.append("</tr>");
- }
- sb.append("</table>");
- return sb.toString();
- }
- private void handleJavbusLogSub(Integer status) {
- List<CrawlerJavbusLog> javbusLogByStatus = crawlerJavbusProfileMapper.findJavbusLogByStatus(status);
- String profileUrl = "https://www.javbus.com/forum/?";
- Document profileDocument;
- for (CrawlerJavbusLog javbusLog : javbusLogByStatus) {
- try {
- profileDocument = JsoupUtil.requestDocument(profileUrl.concat(javbusLog.getBusinessKey()), JsoupUtil.HTTP_GET, proxy, javbusCookiesMap, null, null);
- if (profileDocument.html().contains("您指定的用戶空間不存在")) {
- log.warn("jsoupJavbusProfileSub您指定的用戶空間不存在,start={}", javbusLog.getBusinessKey());
- crawlerJavbusProfileMapper.deleteJavbusLogById2(javbusLog.getId(), "您指定的用戶空間不存在");
- continue;
- }
- if (profileDocument.html().contains("空間已被鎖定無法訪問")) {
- log.warn("jsoupJavbusProfileSub空間已被鎖定無法訪問,start={}", javbusLog.getBusinessKey());
- crawlerJavbusProfileMapper.deleteJavbusLogById2(javbusLog.getId(), "空間已被鎖定無法訪問,如有疑問請聯繫管理員");
- continue;
- }
- CrawlerJavbusProfile crawlerJavbusProfile = new CrawlerJavbusProfile();
- parseJavbusProfile(profileDocument, crawlerJavbusProfile);
- crawlerJavbusProfileMapper.insertOrUpdate(crawlerJavbusProfile);
- log.warn("jsoupJavbusProfileSub成功插入,businessKey={}", javbusLog.getBusinessKey());
- javbusLog.setStatus(2);
- javbusLog.setErrorMsg("");
- } catch (Exception e) {
- log.error("jsoupJavbusProfileSub插入异常,businessKey={}", javbusLog.getBusinessKey(), e);
- javbusLog.setStatus(3);
- javbusLog.setErrorMsg(e.getMessage());
- }
- crawlerJavbusProfileMapper.insertOrUpdateLog(javbusLog);
- }
- }
- private void jsoupJavbusProfileSub(Long start, Integer limit) {
- CrawlerJavbusProfile latestJavbusProfile = crawlerJavbusProfileMapper.findLatestInfo();
- if (start == null && latestJavbusProfile == null) {
- start = 1L;
- } else if (start == null && latestJavbusProfile != null) {
- start = latestJavbusProfile.getUid() + 1;
- }
- long startFinal = 0;
- if (limit != null) {
- startFinal = start + limit;
- }
- String profileUrl = "https://www.javbus.com/forum/?";
- Document profileDocument;
- int continueCount = 0;
- while (true) {
- if (startFinal != 0 && start > startFinal) {
- log.warn("jsoupJavbusProfileSub结束,start={},startFinal={}", start, startFinal);
- return;
- }
- if (continueCount > 10) {
- log.warn("jsoupJavbusProfileSub结束,start={},continueCount={}", start, continueCount);
- return;
- }
- try {
- profileDocument = JsoupUtil.requestDocument(profileUrl.concat(String.valueOf(start)), JsoupUtil.HTTP_GET, proxy, javbusCookiesMap, null, null);
- if (profileDocument.html().contains("您指定的用戶空間不存在")) {
- log.warn("jsoupJavbusProfileSub您指定的用戶空間不存在,start={}", start);
- start++;
- if (start > 500000) {
- continueCount++;
- }
- continue;
- }
- CrawlerJavbusProfile crawlerJavbusProfile = new CrawlerJavbusProfile();
- parseJavbusProfile(profileDocument, crawlerJavbusProfile);
- crawlerJavbusProfileMapper.insertOrUpdate(crawlerJavbusProfile);
- log.warn("jsoupJavbusProfileSub成功插入,start={}", start);
- } catch (Exception e) {
- log.error("jsoupJavbusProfileSub插入异常,start={}", start, e);
- CrawlerJavbusLog crawlerJavbusLog = new CrawlerJavbusLog();
- crawlerJavbusLog.setType(1);
- crawlerJavbusLog.setStatus(1);
- crawlerJavbusLog.setBusinessKey(String.valueOf(start));
- crawlerJavbusLog.setErrorMsg(e.getMessage());
- crawlerJavbusProfileMapper.insertOrUpdateLog(crawlerJavbusLog);
- }
- start++;
- }
- }
- public void parseJavbusProfile(Document profileDocument, CrawlerJavbusProfile crawlerJavbusProfile) {
- String avatarUrl = profileDocument.select("div.avt").select("img").attr("src");
- String[] mbn0Arr = profileDocument.select("div.u_profile").select("div.cl").get(0).select("h2.mbn").get(0).text().replace("(", "").replace(")", "").split("UID:");
- String nickName = mbn0Arr[0].trim();
- String uid = mbn0Arr[1].trim();
- String emailStatus = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").first().text().replace("郵箱狀態", "").trim();
- Elements signEles = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").get(1).select("li:contains(個人簽名)");
- String signStr = "";
- ArrayList<String> signImgList = new ArrayList<>();
- if (signEles.size() > 0) {
- signStr = signEles.first().select("table").text();
- Elements signImgEles = signEles.first().select("table").select("img");
- for (Element signImgEle : signImgEles) {
- signImgList.add(signImgEle.attr("src"));
- }
- }
- String friendNum = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").get(2)
- .select("a").get(0).text().replace("好友數", "").trim();
- String replyNum = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").get(2)
- .select("a").get(1).text().replace("回帖數", "").trim();
- String threadNum = profileDocument.select("div.u_profile").select("div.cl").get(0).select("ul").get(2)
- .select("a").get(2).text().replace("主題數", "").trim();
- String userGroup = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(0)
- .select("a").text();
- String onlineTime = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
- .select("li:contains(在線時間)").text().replace("在線時間", "").replace("小時", "").trim();
- String registrationTime = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
- .select("li:contains(註冊時間)").text().replace("註冊時間", "").trim();
- String lastVisit = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
- .select("li:contains(最後訪問)").text().replace("最後訪問", "").trim();
- String lastActivityTime = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
- .select("li:contains(上次活動時間)").text().replace("上次活動時間", "").trim();
- String lastPublishedTime = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
- .select("li:contains(上次發表時間)").text().replace("上次發表時間", "").trim();
- String timeZone = profileDocument.select("div.u_profile").select("div.cl").get(1).select("ul").get(1)
- .select("li:contains(所在時區)").text().replace("所在時區", "").trim();
- String usedSpace = profileDocument.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
- .select("li").get(0).text().replace("已用空間", "").replace("B", "").trim();
- String mileage = profileDocument.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
- .select("li").get(1).text().replace("里程", "").trim();
- String money = profileDocument.select("div.u_profile").select("div.cl").get(2).select("ul").get(0)
- .select("li").last().text().replace("金錢", "").trim();
- crawlerJavbusProfile.setUid(Long.valueOf(uid));
- crawlerJavbusProfile.setNickName(nickName);
- crawlerJavbusProfile.setEmailStatus(emailStatus);
- crawlerJavbusProfile.setFriendNum(Integer.valueOf(friendNum));
- crawlerJavbusProfile.setReplyNum(Integer.valueOf(replyNum));
- crawlerJavbusProfile.setThreadNum(Integer.valueOf(threadNum));
- crawlerJavbusProfile.setUserGroup(userGroup);
- crawlerJavbusProfile.setOnlineTime(StringUtils.isNotEmpty(onlineTime) ? Integer.valueOf(onlineTime) : null);
- crawlerJavbusProfile.setRegistrationTime(StringUtils.isNotEmpty(registrationTime) && registrationTime.length() >= 10 ? LocalDateTime.parse(registrationTime, DateUtils.dateTimeFormatter3) : null);
- crawlerJavbusProfile.setLastVisit(StringUtils.isNotEmpty(lastVisit) && lastVisit.length() >= 10 ? LocalDateTime.parse(lastVisit, DateUtils.dateTimeFormatter3) : null);
- crawlerJavbusProfile.setLastActivityTime(StringUtils.isNotEmpty(lastActivityTime) && lastActivityTime.length() >= 10 ? LocalDateTime.parse(lastActivityTime, DateUtils.dateTimeFormatter3) : null);
- crawlerJavbusProfile.setLastPublishedTime(StringUtils.isNotEmpty(lastPublishedTime) && lastPublishedTime.length() >= 10 ? LocalDateTime.parse(lastPublishedTime, DateUtils.dateTimeFormatter3) : null);
- crawlerJavbusProfile.setTimeZone(StringUtils.isNotEmpty(timeZone) ? timeZone : null);
- crawlerJavbusProfile.setUsedSpace(Integer.valueOf(usedSpace));
- crawlerJavbusProfile.setMileage(Integer.valueOf(mileage));
- crawlerJavbusProfile.setMoney(Integer.valueOf(money));
- crawlerJavbusProfile.setAvatarUrl(avatarUrl);
- crawlerJavbusProfile.setSignStr(signStr);
- crawlerJavbusProfile.setSignImg(org.apache.commons.lang3.StringUtils.join(signImgList, ","));
- }
- private boolean generateJavbusCookies(Proxy proxy) throws Exception {
- // 1 登陆获取cookies
- // 1.0 https://www.javbus.com/forum/forum.php
- Connection.Response forumResponse = JsoupUtil.requestBody(javbusConstantMap.get("forum_url"), JsoupUtil.HTTP_GET, proxy, null);
- Map<String, String> forumCookies = forumResponse.cookies();
- log.warn("generateJavbusCookies=>,forum_url={},forumCookies={}", javbusConstantMap.get("forum_url"), forumCookies);
- // 1.1 https://www.javbus.com/forum/member.php
- Map<String, String> params = new HashMap<>(8);
- params.put("mod", "logging");
- params.put("action", "login");
- params.put("referer", "");
- params.put("infloat", "yes");
- params.put("handlekey", "login");
- params.put("inajax", "1");
- params.put("ajaxtarget", "fwin_content_login");
- String memberHtmlStr = JsoupUtil.requestDocument(javbusConstantMap.get("member_url"), JsoupUtil.HTTP_GET, proxy, forumCookies, null, params).html().replace("<![CDATA[", "").replace("]]>", "");
- Document memberDocument = Jsoup.parse(memberHtmlStr);
- String key1 = memberDocument.select("input[type='password']").first().attr("id").split("_")[1];
- String key2 = memberDocument.select("span[id^='seccode']").first().attr("id").split("_")[1];
- String key3 = memberDocument.select("input[name='formhash']").first().val();
- // 1.2 https://www.javbus.com/forum/misc.php
- params.clear();
- params.put("mod", "seccode");
- params.put("action", "update");
- params.put("idhash", key2);
- params.put("modid", "member::logging");
- Document miscDocument = JsoupUtil.requestDocument(javbusConstantMap.get("misc_url"), JsoupUtil.HTTP_GET, proxy, forumCookies, null, params);
- String imgVerifyUrl = "https://www.javbus.com/forum/" + miscDocument.select("img[onclick]").first().attr("src");
- // 1.3 get verifyImg
- Map<String, String> headerParams = new HashMap<>(8);
- headerParams.put("referer", javbusConstantMap.get("forum_url"));
- Connection.Response imgResponse = JsoupUtil.requestBody(imgVerifyUrl, JsoupUtil.HTTP_GET, proxy, forumCookies, headerParams, null);
- byte[] imgBytes = imgResponse.bodyAsBytes();
- Map<String, String> imgCookies = imgResponse.cookies();
- log.warn("generateJavbusCookies=>,imgVerifyUrl={},imgCookies={}", imgVerifyUrl, imgCookies);
- String cookieKey4Seccode = "";
- for (Map.Entry<String, String> imgCookie : imgCookies.entrySet()) {
- if (imgCookie.getKey().contains("seccode")) {
- cookieKey4Seccode = imgCookie.getKey();
- break;
- }
- }
- // 1.4 get imgVerifyNumber by BaiduOCR
- headerParams.clear();
- headerParams.put("Content-Type", "application/x-www-form-urlencoded");
- params.clear();
- params.put("image", Base64.getEncoder().encodeToString(imgBytes));
- JSONObject crAccurateBasicResult = null;
- String seccodeverify = "";
- for (int i = 0; i < 3; i++) {
- try {
- Connection.Response ocrResponse = JsoupUtil.requestBody(javbusConstantMap.get("bd_ocr_url").concat("?access_token=").concat(bdAccessToken),
- JsoupUtil.HTTP_POST, Proxy.NO_PROXY, headerParams, params);
- crAccurateBasicResult = JSONObject.parseObject(ocrResponse.body());
- seccodeverify = crAccurateBasicResult.getJSONArray("words_result").getJSONObject(0).getString("words");
- break;
- } catch (HttpStatusException hse) {
- bdAccessToken = getAuth(javbusConstantMap.get("bd_ak"), javbusConstantMap.get("bd_sk"));
- } catch (Exception e) {
- log.error("BaiduOCR异常,bdOcrUrl={},bdAccessToken={},crAccurateBasicResult={}", javbusConstantMap.get("bd_ocr_url"), bdAccessToken, crAccurateBasicResult, e);
- if (i == 2) {
- throw new Exception("BaiduOCR异常!");
- }
- }
- }
- // 1.5 https://www.javbus.com/forum/member.php
- String sbParams = "?mod=logging&action=login&loginsubmit=yes&handlekey=login&loginhash=" + key1 + "&inajax=1";
- headerParams.clear();
- headerParams.put("Content-Type", "application/x-www-form-urlencoded");
- params.clear();
- params.put("formhash", key3);
- params.put("referer", javbusConstantMap.get("forum_url"));
- params.put("loginfield", "username");
- params.put("username", javbusConstantMap.get("username"));
- params.put("password", javbusConstantMap.get("password"));
- params.put("questionid", "0");
- params.put("answer", "");
- params.put("seccodehash", key2);
- params.put("seccodemodid", "member::logging");
- params.put("seccodeverify", seccodeverify);
- if (cookieKey4Seccode != "") {
- forumCookies.put("existmag", "mag");
- forumCookies.put(cookieKey4Seccode, imgCookies.get(cookieKey4Seccode));
- }
- String loginUrl = javbusConstantMap.get("member_url").concat(sbParams);
- Connection.Response loginResponse = JsoupUtil.requestBody(loginUrl, JsoupUtil.HTTP_POST, proxy, forumCookies, headerParams, params);
- String loginBody = loginResponse.body();
- Map<String, String> loginCookies = loginResponse.cookies();
- log.warn("generateJavbusCookies=>,loginUrl={},params={},forumCookies={},loginCookies={},loginResponseBody={}", loginUrl, params, forumCookies, loginCookies, loginResponse.body());
- for (Map.Entry<String, String> loginCookie : loginCookies.entrySet()) {
- if (loginCookie.getKey().contains("ulastactivity")) {
- forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
- } else if (loginCookie.getKey().contains("auth")) {
- forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
- } else if (loginCookie.getKey().contains("lastcheckfeed")) {
- forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
- } else if (loginCookie.getKey().contains("lip")) {
- forumCookies.put(loginCookie.getKey(), loginCookie.getValue());
- }
- }
- log.warn("generateJavbusCookies=>,forumFinalCookies={}", forumCookies);
- javbusCookiesMap = forumCookies;
- return loginBody.contains("歡迎您回來");
- }
- public String getAuth(String ak, String sk) {
- // 获取token地址
- String authHost = javbusConstantMap.get("bd_authhost_url");
- String getAccessTokenUrl = authHost
- // 1. grant_type为固定参数
- + "grant_type=client_credentials"
- // 2. 官网获取的 API Key
- + "&client_id=" + ak
- // 3. 官网获取的 Secret Key
- + "&client_secret=" + sk;
- try {
- URL realUrl = new URL(getAccessTokenUrl);
- // 打开和URL之间的连接
- HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
- connection.setRequestMethod("GET");
- connection.connect();
- // 获取所有响应头字段
- Map<String, List<String>> map = connection.getHeaderFields();
- // 遍历所有的响应头字段
- /*for (String key : map.keySet()) {
- System.err.println(key + "--->" + map.get(key));
- }*/
- // 定义 BufferedReader输入流来读取URL的响应
- BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
- String result = "";
- String line;
- while ((line = in.readLine()) != null) {
- result += line;
- }
- /**
- * 返回结果示例
- */
- // System.err.println("result:" + result);
- JSONObject jsonObject = JSONObject.parseObject(result);
- String access_token = jsonObject.getString("access_token");
- return access_token;
- } catch (Exception e) {
- System.err.print("获取token失败!");
- e.printStackTrace(System.err);
- }
- return null;
- }
- }
|