lvzhiqiang 3 éve
szülő
commit
49cd2353a3

+ 12 - 0
src/main/java/top/lvzhiqiang/service/Crawler4JavbusService.java

@@ -0,0 +1,12 @@
+package top.lvzhiqiang.service;
+
+/**
+ * Crawler Javbus Service
+ *
+ * @author lvzhiqiang
+ * 2022/10/17 14:47
+ */
+public interface Crawler4JavbusService {
+
+    void jsoupJavbusMemberInfo() throws Exception;
+}

+ 49 - 0
src/main/java/top/lvzhiqiang/service/impl/Crawler4JavbusServiceImpl.java

@@ -0,0 +1,49 @@
+package top.lvzhiqiang.service.impl;
+
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.nodes.Document;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+import top.lvzhiqiang.mapper.DicCodeMapper;
+import top.lvzhiqiang.service.Crawler4JavbusService;
+import top.lvzhiqiang.util.JsoupUtil;
+
+import javax.annotation.Resource;
+import java.net.InetSocketAddress;
+import java.net.Proxy;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Crawler Javbus ServiceImpl
+ *
+ * @author lvzhiqiang
+ * 2022/10/17 14:47
+ */
+@Service
+@Slf4j
+public class Crawler4JavbusServiceImpl implements Crawler4JavbusService {
+
+    @Resource
+    private DicCodeMapper dicCodeMapper;
+    @Value("${spring.profiles.active}")
+    private String env;
+
+    protected static Map<String, String> cookies = null;
+
+    @Override
+    public void jsoupJavbusMemberInfo() throws Exception {
+        // 代理及TOKEN设置
+        Proxy proxy;
+        if ("dev".equals(env)) {
+            proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1080));
+        } else {
+            proxy = Proxy.NO_PROXY;
+        }
+
+
+    }
+
+}
+
+

+ 65 - 0
src/main/java/top/lvzhiqiang/util/Base64Util.java

@@ -0,0 +1,65 @@
+package top.lvzhiqiang.util;
+
+/**
+ * Base64 工具类
+ */
+public class Base64Util {
+    private static final char last2byte = (char) Integer.parseInt("00000011", 2);
+    private static final char last4byte = (char) Integer.parseInt("00001111", 2);
+    private static final char last6byte = (char) Integer.parseInt("00111111", 2);
+    private static final char lead6byte = (char) Integer.parseInt("11111100", 2);
+    private static final char lead4byte = (char) Integer.parseInt("11110000", 2);
+    private static final char lead2byte = (char) Integer.parseInt("11000000", 2);
+    private static final char[] encodeTable = new char[]{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
+
+    public Base64Util() {
+    }
+
+    public static String encode(byte[] from) {
+        StringBuilder to = new StringBuilder((int) ((double) from.length * 1.34D) + 3);
+        int num = 0;
+        char currentByte = 0;
+
+        int i;
+        for (i = 0; i < from.length; ++i) {
+            for (num %= 8; num < 8; num += 6) {
+                switch (num) {
+                    case 0:
+                        currentByte = (char) (from[i] & lead6byte);
+                        currentByte = (char) (currentByte >>> 2);
+                    case 1:
+                    case 3:
+                    case 5:
+                    default:
+                        break;
+                    case 2:
+                        currentByte = (char) (from[i] & last6byte);
+                        break;
+                    case 4:
+                        currentByte = (char) (from[i] & last4byte);
+                        currentByte = (char) (currentByte << 2);
+                        if (i + 1 < from.length) {
+                            currentByte = (char) (currentByte | (from[i + 1] & lead2byte) >>> 6);
+                        }
+                        break;
+                    case 6:
+                        currentByte = (char) (from[i] & last2byte);
+                        currentByte = (char) (currentByte << 4);
+                        if (i + 1 < from.length) {
+                            currentByte = (char) (currentByte | (from[i + 1] & lead4byte) >>> 4);
+                        }
+                }
+
+                to.append(encodeTable[currentByte]);
+            }
+        }
+
+        if (to.length() % 4 != 0) {
+            for (i = 4 - to.length() % 4; i > 0; --i) {
+                to.append("=");
+            }
+        }
+
+        return to.toString();
+    }
+}

+ 116 - 0
src/main/java/top/lvzhiqiang/util/FileUtil.java

@@ -0,0 +1,116 @@
+package top.lvzhiqiang.util;
+
+import java.io.*;
+
+/**
+ * 文件读取工具类
+ */
+public class FileUtil {
+
+    /**
+     * 读取文件内容,作为字符串返回
+     */
+    public static String readFileAsString(String filePath) throws IOException {
+        File file = new File(filePath);
+        if (!file.exists()) {
+            throw new FileNotFoundException(filePath);
+        }
+
+        if (file.length() > 1024 * 1024 * 1024) {
+            throw new IOException("File is too large");
+        }
+
+        StringBuilder sb = new StringBuilder((int) (file.length()));
+        // 创建字节输入流  
+        FileInputStream fis = new FileInputStream(filePath);
+        // 创建一个长度为10240的Buffer
+        byte[] bbuf = new byte[10240];
+        // 用于保存实际读取的字节数  
+        int hasRead = 0;
+        while ((hasRead = fis.read(bbuf)) > 0) {
+            sb.append(new String(bbuf, 0, hasRead));
+        }
+        fis.close();
+        return sb.toString();
+    }
+
+    /**
+     * 根据文件路径读取byte[] 数组
+     */
+    public static byte[] readFileByBytes(String filePath) throws IOException {
+        File file = new File(filePath);
+        if (!file.exists()) {
+            throw new FileNotFoundException(filePath);
+        } else {
+            ByteArrayOutputStream bos = new ByteArrayOutputStream((int) file.length());
+            BufferedInputStream in = null;
+
+            try {
+                in = new BufferedInputStream(new FileInputStream(file));
+                short bufSize = 1024;
+                byte[] buffer = new byte[bufSize];
+                int len1;
+                while (-1 != (len1 = in.read(buffer, 0, bufSize))) {
+                    bos.write(buffer, 0, len1);
+                }
+
+                byte[] var7 = bos.toByteArray();
+                return var7;
+            } finally {
+                try {
+                    if (in != null) {
+                        in.close();
+                    }
+                } catch (IOException var14) {
+                    var14.printStackTrace();
+                }
+
+                bos.close();
+            }
+        }
+    }
+
+    /**
+     * 文件转字节数组
+     *
+     * @param filePath
+     * @return
+     */
+    private static byte[] readFileByBytes2(String filePath) {
+        byte[] data = null;
+        try (FileInputStream fis = new FileInputStream(filePath)) {
+            // 获取文件字节长度,创建数组存放数据,一次从硬盘读入数组中
+            data = new byte[fis.available()];
+            fis.read(data);
+        } catch (IOException e) {
+            System.out.println(String.format("读取验证码图片异常,filePath=%s", filePath));
+            e.printStackTrace();
+        }
+        return data;
+    }
+
+    public static byte[] readStreamByBytes(BufferedInputStream in) throws IOException {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream(in.available());
+        try {
+            short bufSize = 1024;
+            byte[] buffer = new byte[bufSize];
+            int len1;
+            while (-1 != (len1 = in.read(buffer, 0, bufSize))) {
+                bos.write(buffer, 0, len1);
+            }
+
+            byte[] var7 = bos.toByteArray();
+            return var7;
+        } finally {
+            try {
+                if (in != null) {
+                    in.close();
+                }
+            } catch (IOException var14) {
+                var14.printStackTrace();
+            }
+
+            bos.close();
+        }
+    }
+}

+ 77 - 0
src/main/java/top/lvzhiqiang/util/HttpUtil.java

@@ -0,0 +1,77 @@
+package top.lvzhiqiang.util;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * http 工具类
+ */
+public class HttpUtil {
+
+    public static String post(String requestUrl, String accessToken, String params)
+            throws Exception {
+        String contentType = "application/x-www-form-urlencoded";
+        return HttpUtil.post(requestUrl, accessToken, contentType, params);
+    }
+
+    public static String post(String requestUrl, String accessToken, String contentType, String params)
+            throws Exception {
+        String encoding = "UTF-8";
+        if (requestUrl.contains("nlp")) {
+            encoding = "GBK";
+        }
+        return HttpUtil.post(requestUrl, accessToken, contentType, params, encoding);
+    }
+
+    public static String post(String requestUrl, String accessToken, String contentType, String params, String encoding)
+            throws Exception {
+        String url = requestUrl + "?access_token=" + accessToken;
+        return HttpUtil.postGeneralUrl(url, contentType, params, encoding);
+    }
+
+    public static String postGeneralUrl(String generalUrl, String contentType, String params, String encoding)
+            throws Exception {
+        URL url = new URL(generalUrl);
+        // 打开和URL之间的连接
+        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
+        connection.setRequestMethod("POST");
+        // 设置通用的请求属性
+        connection.setRequestProperty("Content-Type", contentType);
+        connection.setRequestProperty("Connection", "Keep-Alive");
+        connection.setUseCaches(false);
+        connection.setDoOutput(true);
+        connection.setDoInput(true);
+
+        // 得到请求的输出流对象
+        DataOutputStream out = new DataOutputStream(connection.getOutputStream());
+        out.write(params.getBytes(encoding));
+        out.flush();
+        out.close();
+
+        // 建立实际的连接
+        connection.connect();
+        // 获取所有响应头字段
+        Map<String, List<String>> headers = connection.getHeaderFields();
+        // 遍历所有的响应头字段
+        for (String key : headers.keySet()) {
+            System.err.println(key + "--->" + headers.get(key));
+        }
+        // 定义 BufferedReader输入流来读取URL的响应
+        BufferedReader in = null;
+        in = new BufferedReader(
+                new InputStreamReader(connection.getInputStream(), encoding));
+        String result = "";
+        String getLine;
+        while ((getLine = in.readLine()) != null) {
+            result += getLine;
+        }
+        in.close();
+        System.err.println("result:" + result);
+        return result;
+    }
+}

+ 101 - 144
src/main/java/top/lvzhiqiang/util/JsoupUtil.java

@@ -1,173 +1,130 @@
 package top.lvzhiqiang.util;
 
+import org.jsoup.Connection;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
 
-import java.io.File;
-import java.util.ArrayList;
-import java.util.List;
+import javax.net.ssl.*;
+import java.net.Proxy;
+import java.security.SecureRandom;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
+import java.util.Map;
+import java.util.Random;
 
 public class JsoupUtil {
+    private static int TIMEOUT_CONNECTION = 60000;
+    public static String HTTP_GET = "GET";
+    public static String HTTP_POST = "POST";
+
+    private static Connection getConnection(String url, Proxy proxy) {
+        return Jsoup.connect(url)
+                .timeout(TIMEOUT_CONNECTION)
+                .proxy(proxy)
+                .userAgent(getUserAgent())
+                .followRedirects(true)
+                .ignoreContentType(true);
+    }
 
-    private static Document doc = null;
-    private static Elements eles = null;
-
-
-    /**
-     * 获取xml文件(file格式)
-     *
-     * @param file
-     */
-    public static Document setXmlFile(File file) {
-        try {
-            doc = Jsoup.parse(file, "UTF-8");
-        } catch (Exception e) {
-            e.printStackTrace();
+    public static Document requestDocument(String url, String httpMethod, Map<String, String> cookies, Map<String, String> headers, Proxy proxy, Map<String, String> data) throws Exception {
+        Connection connection = getConnection(url, proxy);
+        if (data != null && data.size() > 0) {
+            connection.data(data);
+        }
+        if (cookies != null) {
+            connection.cookies(cookies);
         }
-        return doc;
+        if (headers != null) {
+            connection.headers(headers);
+        }
+        Document resultDocument = HTTP_POST.equalsIgnoreCase(httpMethod) ? connection.post() : connection.get();
+        return resultDocument;
     }
 
-
-    /**
-     * 获取xml文件(绝对路径)
-     *
-     * @param path
-     */
-    public static Document setXmlFile(String path) {
-        try {
-            File file = new File(path);
-            doc = Jsoup.parse(file, "UTF-8");
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        return doc;
+    public static Document requestDocument(String url, String httpMethod, Proxy proxy, Map<String, String> data) throws Exception {
+        return requestDocument(url, httpMethod, null, null, proxy, data);
     }
 
+    public static Document requestDocument(String url, String httpMethod, Proxy proxy, Map<String, String> headers, Map<String, String> data) throws Exception {
+        return requestDocument(url, httpMethod, proxy, headers, data);
+    }
 
-    /**
-     * 根据拼接节点获取元素集合
-     *
-     * @param nodeQuery
-     * @return
-     */
-    public static Elements getEles(String nodeQuery) {
-        try {
-            eles = doc.select(nodeQuery);
-        } catch (Exception e) {
-            e.printStackTrace();
+    public static Connection.Response requestBody(String url, String httpMethod, Map<String, String> cookies, Map<String, String> headers, Proxy proxy, Map<String, String> data) throws Exception {
+        Connection connection = getConnection(url, proxy);
+        if (data != null && data.size() > 0) {
+            connection.data(data);
         }
-
-        return eles;
+        if (cookies != null) {
+            connection.cookies(cookies);
+        }
+        if (headers != null) {
+            connection.headers(headers);
+        }
+        connection.method(HTTP_POST.equalsIgnoreCase(httpMethod) ? Connection.Method.POST : Connection.Method.GET);
+        Connection.Response res = connection.execute();
+        return res;
     }
 
+    public static Connection.Response requestBody(String url, String httpMethod, Proxy proxy, Map<String, String> data) throws Exception {
+        return requestBody(url, httpMethod, null, null, proxy, data);
+    }
 
-    public static void main(String[] args) {
-
-		/*String http="http://zizhan.mot.gov.cn/sj/kejs/kejifzh_kjs/";
-		List<String> newsLink =new ArrayList<String>();
-		try {
-			//获取所需要的所有页面链接
-			Document doc=Jsoup.connect(http).get();
-			Elements eles=doc.select("div.main_cont1 > ul > li > a");
-			for (Element element : eles) {
-				
-				String href=element.attr("href").toString();
-				href=href.substring(8);
-				newsLink.add("http://zizhan.mot.gov.cn"+href);
-			}
-			
-			//从每个页面中获取所需字段
-			for (String str : newsLink) {
-				try {
-					doc=Jsoup.connect(str).get();
-					String title=doc.select("div#cont_detail > div.docTitleCls").text();
-					String content=doc.select("div#cont_detail > div").get(1).html();
-					if(title==null || title.equals("") || content==null || content.equals(""))
-						continue;
-					//获取来源和时间
-					Elements ele=doc.select("div.continfo>table>tbody>tr").get(2).select("td");
-					String createDate=ele.get(0).text().replaceAll("发文日期:","");
-					
-					Elements elesource=doc.select("div.continfo>table>tbody>tr").get(1).select("td");
-					String source=elesource.get(1).text().replaceAll("发布机构:","");
-					System.out.println(createDate);
-					System.out.println(title);
-					System.out.println(source);
-					System.out.println(content);
-					System.out.println("=============================================");
-					
-				} catch (Exception e) {
-					continue;
-				}
-		
-				
-			}
-			
-			
-			
-		} catch (Exception e) {
-			e.printStackTrace();
-		}*/
+    public static Connection.Response requestBody(String url, String httpMethod, Proxy proxy, Map<String, String> headers, Map<String, String> data) throws Exception {
+        return requestBody(url, httpMethod, null, headers, proxy, data);
+    }
 
+    private static String getUserAgent() {
+        Random r = new Random();
+        String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
+                "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
+                "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
+                "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
+                "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
+                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",
+                "Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0",
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"};
+        int i = r.nextInt(15);
+        return ua[i];
+    }
 
+    /**
+     * 信任任何站点
+     */
+    private static void trustEveryone() {
         try {
+            HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {
+                @Override
+                public boolean verify(String hostname, SSLSession session) {
+                    return true;
+                }
+            });
 
-            List<String> newsLink = new ArrayList<String>();
-            Document doc = Jsoup.connect("http://www.cmzz100.com/cn/G100/toutiao.html").get();
-
-            //Elements eles=doc.select("table.border-style>tbody>tr>td>table>tbody>tr").get(1).select("table>tbody>tr>td>table>tbody>tr").get(1).select("td");
-            Elements eles = doc.select("div.title>a");
-
-
-            for (Element element : eles) {
-                newsLink.add("http://www.cmzz100.com" + element.attr("href"));
-            }
-
-
-            //从每个页面中获取所需字段
-            for (String str : newsLink) {
-                try {
-                    doc = Jsoup.connect(str).get();
-                    String title = doc.select("div.title").text();
-                    Elements ele1 = doc.select("div.info");
-                    for (Element el : ele1) {
-                        if (!doc.select("div.info").get(0).select("a>img").equals("")) {
-                            Elements ele2 = doc.select("div.info").get(0).select("a>img");
-                            ele2.attr("src", "http://www.cmzz100.com" + ele2.attr("src"));
-                        }
-                        Elements els3 = doc.select("div.info p>img[src]");
-                        els3.attr("src", "http://www.cmzz100.com" + els3.attr("src"));
-                        //	System.out.println(els3.toString());
-                        //	System.out.println("=============================================");
-                        //	el.attr("src","http://www.cmzz100.com"+el.attr("src"));
-                        //	System.out.print(el.toString());
-                    }
-                    String content = doc.select("div.info").html();
-
-                    System.out.print(ele1);
-                    System.out.println("=============================================");
-                    if (title == null || title.equals("") || content == null || content.equals(""))
-                        continue;
-                    //获取来源和时间
-                    Elements ele = doc.select("div.datetime");
-                    String createDate = ele.text().substring(0, ele.text().indexOf("|")).replaceAll("发表:", "");
-
-                    //System.out.println(createDate);
-
-
-                    //System.out.println(title);
-                    //System.out.println(source);
-                    //System.out.println(content);
+            SSLContext context = SSLContext.getInstance("TLS");
+            context.init(null, new X509TrustManager[]{new X509TrustManager() {
+                @Override
+                public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
+                }
 
+                @Override
+                public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
+                }
 
-                } catch (Exception e) {
-                    continue;
+                @Override
+                public X509Certificate[] getAcceptedIssuers() {
+                    return new X509Certificate[0];
                 }
-            }
+            }}, new SecureRandom());
+            HttpsURLConnection.setDefaultSSLSocketFactory(context.getSocketFactory());
         } catch (Exception e) {
-            // TODO: handle exception
+            e.printStackTrace();
         }
     }
 }

+ 136 - 0
src/test/java/Test4BaiduOCR.java

@@ -0,0 +1,136 @@
+import com.alibaba.fastjson.JSONObject;
+import org.jsoup.Connection;
+import org.jsoup.Jsoup;
+import top.lvzhiqiang.util.Base64Util;
+import top.lvzhiqiang.util.FileUtil;
+import top.lvzhiqiang.util.HttpUtil;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Base64;
+import java.util.List;
+import java.util.Map;
+
+public class Test4BaiduOCR {
+    public static void main(String[] args) {
+        accurateBasic();
+    }
+
+    /**
+     * 重要提示代码中所需工具类
+     * FileUtil,Base64Util,HttpUtil,GsonUtils请从
+     * https://ai.baidu.com/file/658A35ABAB2D404FBF903F64D47C1F72
+     * https://ai.baidu.com/file/C8D81F3301E24D2892968F09AE1AD6E2
+     * https://ai.baidu.com/file/544D677F5D4E4F17B4122FBD60DB82B3
+     * https://ai.baidu.com/file/470B3ACCA3FE43788B5A963BF0B625F3
+     * 下载
+     */
+    public static String accurateBasic() {
+        // 请求url
+        String url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic";
+        try {
+            // 本地文件路径
+            String filePath = "d:\\zhiqiang.lv\\Desktop\\misc.png";
+            byte[] imgData = FileUtil.readFileByBytes(filePath);
+            String imgStr = Base64.getEncoder().encodeToString(imgData);
+            //String imgStr = Base64Util.encode(imgData);
+            String imgParam = URLEncoder.encode(imgStr, "UTF-8");
+
+            String param = "image=" + imgParam;
+
+            // 注意这里仅为了简化编码每一次请求都去获取access_token,线上环境access_token有过期时间, 客户端可自行缓存,过期后重新获取。
+            String accessToken = getAuth();
+
+            // String result = HttpUtil.post(url, accessToken, param);
+
+            Connection.Response response = Jsoup.connect(url.concat("?access_token=").concat(accessToken))
+                    .header("Content-Type", "application/x-www-form-urlencoded")
+                    .timeout(50000)
+                    //.requestBody(param)
+                    .data("image", imgStr)
+                    .ignoreContentType(true)
+                    .method(Connection.Method.POST)
+                    .execute();
+            JSONObject jsonObject = JSONObject.parseObject(response.body());
+            String result = jsonObject.toJSONString();
+
+            System.out.println(result);
+            return result;
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return null;
+    }
+
+    /**
+     * 获取权限token
+     *
+     * @return 返回示例:
+     * {
+     * "access_token": "24.460da4889caad24cccdb1fea17221975.2592000.1491995545.282335-1234567",
+     * "expires_in": 2592000
+     * }
+     */
+    public static String getAuth() {
+        // 官网获取的 API Key 更新为你注册的
+        String clientId = "xxx";
+        // 官网获取的 Secret Key 更新为你注册的
+        String clientSecret = "xxx";
+        return getAuth(clientId, clientSecret);
+    }
+
+    /**
+     * 获取API访问token
+     * 该token有一定的有效期,需要自行管理,当失效时需重新获取.
+     *
+     * @param ak - 百度云官网获取的 API Key
+     * @param sk - 百度云官网获取的 Securet Key
+     * @return assess_token 示例:
+     * "24.460da4889caad24cccdb1fea17221975.2592000.1491995545.282335-1234567"
+     */
+    public static String getAuth(String ak, String sk) {
+        // 获取token地址
+        String authHost = "https://aip.baidubce.com/oauth/2.0/token?";
+        String getAccessTokenUrl = authHost
+                // 1. grant_type为固定参数
+                + "grant_type=client_credentials"
+                // 2. 官网获取的 API Key
+                + "&client_id=" + ak
+                // 3. 官网获取的 Secret Key
+                + "&client_secret=" + sk;
+        try {
+            URL realUrl = new URL(getAccessTokenUrl);
+            // 打开和URL之间的连接
+            HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
+            connection.setRequestMethod("GET");
+            connection.connect();
+            // 获取所有响应头字段
+            Map<String, List<String>> map = connection.getHeaderFields();
+            // 遍历所有的响应头字段
+            for (String key : map.keySet()) {
+                System.err.println(key + "--->" + map.get(key));
+            }
+            // 定义 BufferedReader输入流来读取URL的响应
+            BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+            String result = "";
+            String line;
+            while ((line = in.readLine()) != null) {
+                result += line;
+            }
+            /**
+             * 返回结果示例
+             */
+            System.err.println("result:" + result);
+            JSONObject jsonObject = JSONObject.parseObject(result);
+            String access_token = jsonObject.getString("access_token");
+            return access_token;
+        } catch (Exception e) {
+            System.err.printf("获取token失败!");
+            e.printStackTrace(System.err);
+        }
+        return null;
+    }
+}

+ 142 - 0
src/test/java/Test4Javbus.java

@@ -0,0 +1,142 @@
+import com.alibaba.fastjson.JSONObject;
+import org.jsoup.Connection;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import top.lvzhiqiang.util.FileUtil;
+import top.lvzhiqiang.util.JsoupUtil;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.InetSocketAddress;
+import java.net.Proxy;
+import java.net.URL;
+import java.util.Base64;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class Test4Javbus {
+    public static void main(String[] args) throws Exception {
+        // 代理及TOKEN设置
+        Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 1080));
+
+        // 1 登陆获取cookies
+        // 1.1 https://www.javbus.com/forum/member.php
+        String memberUrl = "https://www.javbus.com/forum/member.php";
+        Map<String, String> params = new HashMap<>();
+        params.put("mod", "logging");
+        params.put("action", "login");
+        params.put("referer", "");
+        params.put("infloat", "yes");
+        params.put("handlekey", "login");
+        params.put("inajax", "1");
+        params.put("ajaxtarget", "fwin_content_login");
+        String memberHtmlStr = JsoupUtil.requestDocument(memberUrl, JsoupUtil.HTTP_GET, proxy, params).html().replace("<![CDATA[", "").replace("]]>", "");
+        Document memberDocument = Jsoup.parse(memberHtmlStr);
+        String key1 = memberDocument.select("input[type='password']").first().attr("id").split("_")[1];
+        String key2 = memberDocument.select("span[id^='seccode']").first().attr("id").split("_")[1];
+        String key3 = memberDocument.select("input[name='formhash']").first().val();
+        // 1.2 https://www.javbus.com/forum/misc.php
+        String miscUrl = "https://www.javbus.com/forum/misc.php";
+        params.clear();
+        params.put("mod", "seccode");
+        params.put("action", "update");
+        params.put("idhash", key2);
+        params.put("modid", "member::logging");
+        Document miscDocument = JsoupUtil.requestDocument(miscUrl, JsoupUtil.HTTP_GET, proxy, params);
+        String imgVerifyUrl = "https://www.javbus.com/forum/" + miscDocument.select("img[onclick]").first().attr("src");
+        // 1.3 get imgVerifyUrl
+        Map<String, String> headerParams = new HashMap<>();
+        headerParams.put("referer", "https://www.javbus.com/forum/forum.php");
+        Connection.Response imgResponse = JsoupUtil.requestBody(imgVerifyUrl, JsoupUtil.HTTP_GET, proxy, headerParams, null);
+        BufferedInputStream bufferedInputStream = imgResponse.bodyStream();
+        // 1.4 get imgVerifyNumber by BaiduOCR
+        String ocrAccurateBasicUrl = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic";
+        String accessToken = getAuth("taQYPoO9deuODxEsltkGFyqG", "lm9laVGOO14cH3sfWvtwcL1GtEC8rwS9");
+        headerParams.clear();
+        headerParams.put("Content-Type", "application/x-www-form-urlencoded");
+        params.clear();
+        byte[] imgBytes = FileUtil.readStreamByBytes(bufferedInputStream);
+        params.put("image", Base64.getEncoder().encodeToString(imgBytes));
+        Connection.Response ocrResponse = JsoupUtil.requestBody(ocrAccurateBasicUrl.concat("?access_token=").concat(accessToken),
+                JsoupUtil.HTTP_POST, Proxy.NO_PROXY, headerParams, params);
+        JSONObject jsonObject = JSONObject.parseObject(ocrResponse.body());
+
+        // 1.4 https://www.javbus.com/forum/member.php
+        StringBuffer sbParams = new StringBuffer();
+        sbParams.append("?mod=logging&action=login&loginsubmit=yes&handlekey=login&inajax=1&loginhash=").append(key1);
+        headerParams.clear();
+        headerParams.put("Content-Type", "application/x-www-form-urlencoded");
+        params.clear();
+        params.put("formhash", key3);
+        params.put("referer", " https://www.javbus.com/forum/forum.php");
+        params.put("loginfield", "username");
+        params.put("username", "Tujide.lv");
+        params.put("password", "Lzq920165830.");
+        params.put("questionid", "0");
+        params.put("answer", "");
+        params.put("seccodehash", key1);
+        params.put("seccodemodid", "member::logging");
+        params.put("seccodeverify", "");
+
+
+        // formhash: 6471706d
+        //referer: https://www.javbus.com/forum/forum.php
+        //loginfield: username
+        //username: Tujide.lv
+        //password: Lzq920165830.
+        //questionid: 0
+        //answer:
+        //seccodehash: cSAx5OIdVjbZ
+        //seccodemodid: member::logging
+        //seccodeverify:
+        Connection.Response response = JsoupUtil.requestBody(memberUrl.concat(sbParams.toString()), JsoupUtil.HTTP_POST, proxy, headerParams, params);
+        Map<String, String> cookies = response.cookies();
+        System.out.println(cookies);
+    }
+
+    public static String getAuth(String ak, String sk) {
+        // 获取token地址
+        String authHost = "https://aip.baidubce.com/oauth/2.0/token?";
+        String getAccessTokenUrl = authHost
+                // 1. grant_type为固定参数
+                + "grant_type=client_credentials"
+                // 2. 官网获取的 API Key
+                + "&client_id=" + ak
+                // 3. 官网获取的 Secret Key
+                + "&client_secret=" + sk;
+        try {
+            URL realUrl = new URL(getAccessTokenUrl);
+            // 打开和URL之间的连接
+            HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
+            connection.setRequestMethod("GET");
+            connection.connect();
+            // 获取所有响应头字段
+            Map<String, List<String>> map = connection.getHeaderFields();
+            // 遍历所有的响应头字段
+            for (String key : map.keySet()) {
+                System.err.println(key + "--->" + map.get(key));
+            }
+            // 定义 BufferedReader输入流来读取URL的响应
+            BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+            String result = "";
+            String line;
+            while ((line = in.readLine()) != null) {
+                result += line;
+            }
+            /**
+             * 返回结果示例
+             */
+            System.err.println("result:" + result);
+            JSONObject jsonObject = JSONObject.parseObject(result);
+            String access_token = jsonObject.getString("access_token");
+            return access_token;
+        } catch (Exception e) {
+            System.err.printf("获取token失败!");
+            e.printStackTrace(System.err);
+        }
+        return null;
+    }
+}