|
|
@@ -0,0 +1,165 @@
|
|
|
+package top.lvzhiqiang.service.impl;
|
|
|
+
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.jsoup.Connection;
|
|
|
+import org.jsoup.Jsoup;
|
|
|
+import org.jsoup.nodes.Document;
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+import top.lvzhiqiang.mapper.DicCodeMapper;
|
|
|
+import top.lvzhiqiang.service.Crawler4FacebookService;
|
|
|
+
|
|
|
+import javax.annotation.Resource;
|
|
|
+import javax.net.ssl.*;
|
|
|
+import java.net.InetSocketAddress;
|
|
|
+import java.net.Proxy;
|
|
|
+import java.security.SecureRandom;
|
|
|
+import java.security.cert.CertificateException;
|
|
|
+import java.security.cert.X509Certificate;
|
|
|
+import java.util.HashMap;
|
|
|
+import java.util.Map;
|
|
|
+import java.util.Random;
|
|
|
+
|
|
|
+/**
|
|
|
+ * Crawler Facebook ServiceImpl
|
|
|
+ *
|
|
|
+ * @author lvzhiqiang
|
|
|
+ * 2022/10/11 16:11
|
|
|
+ */
|
|
|
+@Service
|
|
|
+@Slf4j
|
|
|
+public class Crawler4FacebookServiceImpl implements Crawler4FacebookService {
|
|
|
+
|
|
|
+ @Resource
|
|
|
+ private DicCodeMapper dicCodeMapper;
|
|
|
+
|
|
|
+ @Value("${spring.profiles.active}")
|
|
|
+ private String env;
|
|
|
+
|
|
|
+ protected final static int TIMEOUT_CONNECTION = 60000;
|
|
|
+ protected final static String HTTP_GET = "GET";
|
|
|
+ protected final static String HTTP_POST = "POST";
|
|
|
+ protected static Map<String, String> cookies = null;
|
|
|
+ protected static String userAgent = " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11";
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void jsoupFacebookGroupMemberInfo(String email, String password, String url, String limit) throws Exception {
|
|
|
+ String loginUrl = "https://www.facebook.com/login/device-based/regular/login/?login_attempt=1";
|
|
|
+ loginUrl = "https://www.facebook.com/login.php?login_attempt=1";
|
|
|
+ loginUrl = "https://www.facebook.com/login?privacy_mutation_token=eyJ0eXBlIjowLCJjcmVhdGlvbl90aW1lIjoxNjY1NTQwMzA5LCJjYWxsc2l0ZV9pZCI6MzgxMjI5MDc5NTc1OTQ2fQ%3D%3D";
|
|
|
+ loginUrl = "https://www.facebook.com/login/device-based/regular/login/?login_attempt=1&lwv=101";
|
|
|
+ loginUrl = "https://m.facebook.com/login/async/?refsrc=https%3A%2F%2Fm.facebook.com%2F&lwv=100";
|
|
|
+
|
|
|
+ // 代理及TOKEN设置
|
|
|
+ Proxy proxy;
|
|
|
+ if ("dev".equals(env)) {
|
|
|
+ proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1080));
|
|
|
+ } else {
|
|
|
+ proxy = Proxy.NO_PROXY;
|
|
|
+ }
|
|
|
+ // 模拟登录
|
|
|
+ requestBody(loginUrl, HTTP_GET, proxy, null); //fetching cookie and saving
|
|
|
+
|
|
|
+ //trustEveryone();
|
|
|
+
|
|
|
+ Map<String, String> loginParams = new HashMap<>();
|
|
|
+ loginParams.put("email", email);
|
|
|
+ loginParams.put("pass", password);
|
|
|
+ Connection.Response loginResponse = requestBody(loginUrl, HTTP_POST, proxy, loginParams);
|
|
|
+ String userId = loginResponse.cookies().get("c_user"); // current login userId
|
|
|
+ }
|
|
|
+
|
|
|
+ private Connection getConnection(String url, Proxy proxy) {
|
|
|
+ return Jsoup.connect(url)
|
|
|
+ .timeout(TIMEOUT_CONNECTION)
|
|
|
+ .proxy(proxy)
|
|
|
+ .userAgent(getUserAgent())
|
|
|
+ .followRedirects(true)
|
|
|
+ .ignoreContentType(true);
|
|
|
+ }
|
|
|
+
|
|
|
+ protected Document requestDocument(String url, String httpMethod, Proxy proxy, Map<String, String> data) throws Exception {
|
|
|
+ Connection connection = getConnection(url, proxy);
|
|
|
+ if (data != null && data.size() > 0) {
|
|
|
+ connection.data(data);
|
|
|
+ }
|
|
|
+ if (cookies != null) {
|
|
|
+ connection.cookies(cookies);
|
|
|
+ }
|
|
|
+ Document resultDocument = HTTP_POST.equalsIgnoreCase(httpMethod) ? connection.post() : connection.get();
|
|
|
+ return resultDocument;
|
|
|
+ }
|
|
|
+
|
|
|
+ protected Connection.Response requestBody(String url, String httpMethod, Proxy proxy, Map<String, String> data) throws Exception {
|
|
|
+ Connection connection = getConnection(url, proxy);
|
|
|
+ if (data != null && data.size() > 0) {
|
|
|
+ connection.data(data);
|
|
|
+ }
|
|
|
+ if (cookies != null) {
|
|
|
+ connection.cookies(cookies);
|
|
|
+ }
|
|
|
+ connection.method(HTTP_POST.equalsIgnoreCase(httpMethod) ? Connection.Method.POST : Connection.Method.GET);
|
|
|
+ Connection.Response res = connection.execute();
|
|
|
+ if (res.cookies() != null && !res.cookies().isEmpty()) {
|
|
|
+ cookies = res.cookies();
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+ }
|
|
|
+
|
|
|
+ private String getUserAgent() {
|
|
|
+ Random r = new Random();
|
|
|
+ String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
|
|
|
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
|
|
|
+ "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
|
|
|
+ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
|
|
|
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
|
|
|
+ "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",
|
|
|
+ "Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0",
|
|
|
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"};
|
|
|
+ int i = r.nextInt(15);
|
|
|
+ return ua[i];
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 信任任何站点
|
|
|
+ */
|
|
|
+ public void trustEveryone() {
|
|
|
+ try {
|
|
|
+ HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {
|
|
|
+ @Override
|
|
|
+ public boolean verify(String hostname, SSLSession session) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ SSLContext context = SSLContext.getInstance("TLS");
|
|
|
+ context.init(null, new X509TrustManager[]{new X509TrustManager() {
|
|
|
+ @Override
|
|
|
+ public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public X509Certificate[] getAcceptedIssuers() {
|
|
|
+ return new X509Certificate[0];
|
|
|
+ }
|
|
|
+ }}, new SecureRandom());
|
|
|
+ HttpsURLConnection.setDefaultSSLSocketFactory(context.getSocketFactory());
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|