|
@@ -20,14 +20,12 @@ import top.lvzhiqiang.util.StringUtils;
|
|
|
|
|
|
|
|
import javax.annotation.Resource;
|
|
import javax.annotation.Resource;
|
|
|
import java.io.*;
|
|
import java.io.*;
|
|
|
|
|
+import java.net.Proxy;
|
|
|
import java.time.Instant;
|
|
import java.time.Instant;
|
|
|
import java.time.LocalDate;
|
|
import java.time.LocalDate;
|
|
|
import java.time.LocalDateTime;
|
|
import java.time.LocalDateTime;
|
|
|
import java.time.ZoneOffset;
|
|
import java.time.ZoneOffset;
|
|
|
-import java.util.ArrayList;
|
|
|
|
|
-import java.util.List;
|
|
|
|
|
-import java.util.Map;
|
|
|
|
|
-import java.util.Set;
|
|
|
|
|
|
|
+import java.util.*;
|
|
|
import java.util.stream.Collectors;
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -87,7 +85,11 @@ public class BgServiceImpl implements BgService {
|
|
|
@Override
|
|
@Override
|
|
|
@Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
@Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
@Async
|
|
@Async
|
|
|
- public void uploadFile4IdentificationCode(InputStream is) {
|
|
|
|
|
|
|
+ public void uploadFile4IdentificationCode(InputStream is, Integer isDel) {
|
|
|
|
|
+ if (isDel == 1) {
|
|
|
|
|
+ icodePoolMapper.deleteAll();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
List<String> uploadIcodeList = readFromIcodeStream(is);
|
|
List<String> uploadIcodeList = readFromIcodeStream(is);
|
|
|
List<String> icodePoolList = icodePoolMapper.findIcode();
|
|
List<String> icodePoolList = icodePoolMapper.findIcode();
|
|
|
|
|
|
|
@@ -110,10 +112,14 @@ public class BgServiceImpl implements BgService {
|
|
|
*
|
|
*
|
|
|
* @param status
|
|
* @param status
|
|
|
*/
|
|
*/
|
|
|
- @Transactional(propagation = Propagation.REQUIRED, rollbackFor = Exception.class)
|
|
|
|
|
@Async
|
|
@Async
|
|
|
@Override
|
|
@Override
|
|
|
- public void jsoupIcodePool(Integer status) {
|
|
|
|
|
|
|
+ public void jsoupIcodePool(Integer status, Integer isDel) {
|
|
|
|
|
+ if (isDel == 1) {
|
|
|
|
|
+ videoInfoPoolMapper.deleteAll();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
// 获取待抓取码列表
|
|
// 获取待抓取码列表
|
|
|
List<String> icodePoolList = icodePoolMapper.findIcodeByStatus(status);
|
|
List<String> icodePoolList = icodePoolMapper.findIcodeByStatus(status);
|
|
|
// 获取主表所有识别码
|
|
// 获取主表所有识别码
|
|
@@ -124,13 +130,19 @@ public class BgServiceImpl implements BgService {
|
|
|
if (dicCode == null) {
|
|
if (dicCode == null) {
|
|
|
return;
|
|
return;
|
|
|
}
|
|
}
|
|
|
- String javbusUrl = dicCode.getCodeValue();
|
|
|
|
|
- // 校验地址
|
|
|
|
|
- try {
|
|
|
|
|
- Jsoup.connect(javbusUrl.concat(javbusUrl));
|
|
|
|
|
- log.info("jsoupIcodePool:javbus防屏蔽地址有效!javbusUrl={}", javbusUrl);
|
|
|
|
|
- } catch (Exception e) {
|
|
|
|
|
- log.error("jsoupIcodePool:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl, e);
|
|
|
|
|
|
|
+ String[] javbusUrlArr = dicCode.getCodeValue().split(",");
|
|
|
|
|
+ List<String> javbusUrlList = new ArrayList<>();
|
|
|
|
|
+ for (String javbusUrl : javbusUrlArr) {
|
|
|
|
|
+ // 校验地址
|
|
|
|
|
+ try {
|
|
|
|
|
+ Jsoup.connect(javbusUrl.concat(javbusUrl));
|
|
|
|
|
+ log.info("jsoupIcodePool:javbus防屏蔽地址有效!javbusUrl={}", javbusUrl);
|
|
|
|
|
+ javbusUrlList.add(javbusUrl);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("jsoupIcodePool:javbus防屏蔽地址失效!javbusUrl={}", javbusUrl, e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ if (javbusUrlList.size() == 0) {
|
|
|
return;
|
|
return;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -140,7 +152,10 @@ public class BgServiceImpl implements BgService {
|
|
|
// 遍历
|
|
// 遍历
|
|
|
Document document;
|
|
Document document;
|
|
|
VideoInfoPool videoInfoPool;
|
|
VideoInfoPool videoInfoPool;
|
|
|
|
|
+ String javbusUrl;
|
|
|
for (int i = 0; i < icodePoolList.size(); i++) {
|
|
for (int i = 0; i < icodePoolList.size(); i++) {
|
|
|
|
|
+
|
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
String identificationCode = icodePoolList.get(i);
|
|
String identificationCode = icodePoolList.get(i);
|
|
|
|
|
|
|
|
IcodePool icodePool = new IcodePool();
|
|
IcodePool icodePool = new IcodePool();
|
|
@@ -149,34 +164,66 @@ public class BgServiceImpl implements BgService {
|
|
|
icodePool.setStatus(4);
|
|
icodePool.setStatus(4);
|
|
|
icodePool.setRetryCount(0);
|
|
icodePool.setRetryCount(0);
|
|
|
icodePoolMapper.updateStatus(icodePool);
|
|
icodePoolMapper.updateStatus(icodePool);
|
|
|
- log.warn("jsoupIcodePool exists:i={},identificationCode={}", i, identificationCode);
|
|
|
|
|
|
|
+ log.warn("jsoupIcodePool exists:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode);
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- try {
|
|
|
|
|
- document = Jsoup.connect(javbusUrl.concat(identificationCode)).timeout(50000).get();
|
|
|
|
|
-
|
|
|
|
|
- videoInfoPool = new VideoInfoPool();
|
|
|
|
|
- parseDocument(document, identificationCode, machiPath, videoInfoPool);
|
|
|
|
|
- if (videoInfoPool != null) {
|
|
|
|
|
- icodePool.setStatus(2);
|
|
|
|
|
- icodePool.setRetryCount(0);
|
|
|
|
|
- icodePoolMapper.updateStatus(icodePool);
|
|
|
|
|
- videoInfoPoolMapper.insert(videoInfoPool);
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ int retryCount = 0;
|
|
|
|
|
+ while (retryCount <= 3) {
|
|
|
|
|
+ javbusUrl = javbusUrlList.get((int) (0 + Math.random() * (javbusUrlList.size() - 0)));
|
|
|
|
|
+ try {
|
|
|
|
|
+ document = Jsoup.connect(javbusUrl.concat(identificationCode)).timeout(50000).userAgent(getUserAgent()).get();
|
|
|
|
|
+
|
|
|
|
|
+ videoInfoPool = new VideoInfoPool();
|
|
|
|
|
+ long picTime = parseDocument(document, identificationCode, machiPath, videoInfoPool);
|
|
|
|
|
+ if (videoInfoPool != null) {
|
|
|
|
|
+ icodePool.setStatus(2);
|
|
|
|
|
+ icodePool.setRetryCount(retryCount);
|
|
|
|
|
+ icodePoolMapper.updateStatus(icodePool);
|
|
|
|
|
+ videoInfoPoolMapper.insert(videoInfoPool);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- log.info("jsoupIcodePool success:i={},identificationCode={}", i, identificationCode);
|
|
|
|
|
- } catch (Exception e) {
|
|
|
|
|
- icodePool.setStatus(3);
|
|
|
|
|
- icodePool.setRetryCount(0);
|
|
|
|
|
- icodePool.setFailureCause(e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200));
|
|
|
|
|
- icodePoolMapper.updateStatus(icodePool);
|
|
|
|
|
- log.error("jsoupIcodePool error:i={},identificationCode={}", i, identificationCode, e);
|
|
|
|
|
|
|
+ log.info("jsoupIcodePool success:i={},picTime={},time={},identificationCode={}", i, picTime, System.currentTimeMillis() - start, identificationCode);
|
|
|
|
|
+ break;
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ ++retryCount;
|
|
|
|
|
+
|
|
|
|
|
+ if (retryCount < 4) {
|
|
|
|
|
+ log.error("jsoupIcodePool error重试:i={},retryCount={},time={},identificationCode={}", i, retryCount, System.currentTimeMillis() - start, identificationCode, e);
|
|
|
|
|
+ } else if (retryCount == 4) {
|
|
|
|
|
+ icodePool.setStatus(3);
|
|
|
|
|
+ icodePool.setRetryCount(retryCount);
|
|
|
|
|
+ icodePool.setFailureCause(e.getMessage().length() <= 200 ? e.getMessage() : e.getMessage().substring(0, 200));
|
|
|
|
|
+ icodePoolMapper.updateStatus(icodePool);
|
|
|
|
|
+ log.error("jsoupIcodePool error:i={},time={},identificationCode={}", i, System.currentTimeMillis() - start, identificationCode, e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- private void parseDocument(Document document, String identificationCode, String machiPath, VideoInfoPool videoInfoPool) throws Exception {
|
|
|
|
|
|
|
+ private String getUserAgent() {
|
|
|
|
|
+ Random r = new Random();
|
|
|
|
|
+ String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
|
|
|
|
|
+ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
|
|
|
|
|
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
|
|
|
|
|
+ "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",
|
|
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0"};
|
|
|
|
|
+ int i = r.nextInt(14);
|
|
|
|
|
+ return ua[i];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private long parseDocument(Document document, String identificationCode, String machiPath, VideoInfoPool videoInfoPool) throws Exception {
|
|
|
Elements container = document.select("div.container");
|
|
Elements container = document.select("div.container");
|
|
|
if (container.size() == 0) {
|
|
if (container.size() == 0) {
|
|
|
throw new Exception("番号无效!");
|
|
throw new Exception("番号无效!");
|
|
@@ -257,13 +304,18 @@ public class BgServiceImpl implements BgService {
|
|
|
// 图片URL
|
|
// 图片URL
|
|
|
String href = container.select("a.bigImage").first().attr("abs:href");
|
|
String href = container.select("a.bigImage").first().attr("abs:href");
|
|
|
|
|
|
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
Connection.Response response = Jsoup.connect(href).method(Connection.Method.GET).ignoreContentType(true).timeout(50 * 1000).execute();
|
|
|
String fileName = issueDate.concat(" ").concat(h3).concat(".jpg");
|
|
String fileName = issueDate.concat(" ").concat(h3).concat(".jpg");
|
|
|
saveFile(response.bodyStream(), machiPath.concat(fileName));
|
|
saveFile(response.bodyStream(), machiPath.concat(fileName));
|
|
|
|
|
+ long end = System.currentTimeMillis();
|
|
|
|
|
+
|
|
|
videoInfoPool.setImgUrl(fileName);
|
|
videoInfoPool.setImgUrl(fileName);
|
|
|
|
|
|
|
|
videoInfoPool.setCreateTime(LocalDateTime.now());
|
|
videoInfoPool.setCreateTime(LocalDateTime.now());
|
|
|
videoInfoPool.setType(1);
|
|
videoInfoPool.setType(1);
|
|
|
|
|
+
|
|
|
|
|
+ return end - start;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
/**
|