1. 测试代码
public class TesseractExample { public static void main(String[] args) throws Exception { //1.本地 //File imageFile = new File("C:\\wangl\\eurotext.tif"); //InputStream is = new FileInputStream(imageFile);; //2.url http //InputStream is = downLoadFromUrl("http://s1.sinaimg.cn/mw690/003bsgbmgy6R6ekxkl2e0"); //3.url https InputStream is = downLoadFromUrl("https://login.10333.com/dotoyo//captchacode"); BufferedImage bi = ImageIO.read(is); ITesseract instance = new Tesseract(); // JNA Interface Mapping //设置datapath instance.setDatapath("C:\\wangl\\soft\\tess4j\\tessdata"); //设置语言包 instance.setLanguage("eng+chi_sim"); try { String result = instance.doOCR(bi); System.out.println(result); } catch (TesseractException e) { System.err.println(e.getMessage()); } if(is != null){ is.close(); } } public static InputStream downLoadFromUrl(String urlStr) throws Exception{ //指定证书文件 System.setProperty("javax.net.ssl.trustStore","C:\\wangl\\soft\\Java\\jdk1.7.0_79\\jre\\lib\\security\\jssecacerts"); URL url = new URL(urlStr); HttpURLConnection conn = (HttpURLConnection)url.openConnection(); //信任所有的https证书 trustAllHttpsCertificates(); HttpsURLConnection.setDefaultHostnameVerifier(hv); //设置超时间为3秒 conn.setConnectTimeout(3*1000); //防止屏蔽程序抓取而返回403错误 conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)"); //得到输入流 InputStream inputStream = conn.getInputStream(); return inputStream; } public static HostnameVerifier hv = new HostnameVerifier() { public boolean verify(String urlHostName, SSLSession session) { System.out.println("Warning: URL Host: " + urlHostName + " vs. " + session.getPeerHost()); return true; } }; private static void trustAllHttpsCertificates() throws Exception { javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1]; javax.net.ssl.TrustManager tm = new miTM(); trustAllCerts[0] = tm; javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext .getInstance("SSL"); sc.init(null, trustAllCerts, null); javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc .getSocketFactory()); } static class miTM implements javax.net.ssl.TrustManager, javax.net.ssl.X509TrustManager { public java.security.cert.X509Certificate[] getAcceptedIssuers() { return null; } public boolean isServerTrusted( java.security.cert.X509Certificate[] certs) { return true; } public boolean isClientTrusted( java.security.cert.X509Certificate[] certs) { return true; } public void checkServerTrusted( java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException { return; } public void checkClientTrusted( java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException { return; } } }
2. 设置datapath,设置语言包中英文
3. 注意如果https证书不被信任,那么需要导出证书(填写host port 路径,运行InstallCert
导出证书)
public class InstallCert { public static void main(String[] args) throws Exception { // // int port; // char[] passphrase; // if ((args.length == 1) || (args.length == 2)) { // String[] c = args[0].split(":"); // host = c[0]; // port = (c.length == 1) ? 443 : Integer.parseInt(c[1]); // String p = (args.length == 1) ? "changeit" : args[1]; // passphrase = p.toCharArray(); // } else { // System.out // .println("Usage: java InstallCert <host>[:port] [passphrase]"); // return; // } String host = "14.17.69.168"; int port = 443; String p = "changeit" ; char[] passphrase = p.toCharArray(); File file = new File("jssecacerts"); if (file.isFile() == false) { char SEP = File.separatorChar; File dir = new File(System.getProperty("java.home") + SEP + "lib" + SEP + "security"); file = new File(dir, "jssecacerts"); if (file.isFile() == false) { file = new File(dir, "cacerts"); } } System.out.println("Loading KeyStore " + file + "..."); InputStream in = new FileInputStream(file); KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); ks.load(in, passphrase); in.close(); SSLContext context = SSLContext.getInstance("TLS"); TrustManagerFactory tmf = TrustManagerFactory .getInstance(TrustManagerFactory.getDefaultAlgorithm()); tmf.init(ks); X509TrustManager defaultTrustManager = (X509TrustManager) tmf .getTrustManagers()[0]; SavingTrustManager tm = new SavingTrustManager(defaultTrustManager); context.init(null, new TrustManager[] { tm }, null); SSLSocketFactory factory = context.getSocketFactory(); System.out .println("Opening connection to " + host + ":" + port + "..."); SSLSocket socket = (SSLSocket) factory.createSocket(host, port); socket.setSoTimeout(10000); try { System.out.println("Starting SSL handshake..."); socket.startHandshake(); socket.close(); System.out.println(); System.out.println("No errors, certificate is already trusted"); } catch (SSLException e) { System.out.println(); e.printStackTrace(System.out); } X509Certificate[] chain = tm.chain; if (chain == null) { System.out.println("Could not obtain server certificate chain"); return; } System.out.println(); System.out.println("Server sent " + chain.length + " certificate(s):"); System.out.println(); MessageDigest sha1 = MessageDigest.getInstance("SHA1"); MessageDigest md5 = MessageDigest.getInstance("MD5"); for (int i = 0; i < chain.length; i++) { X509Certificate cert = chain[i]; System.out.println(" " + (i + 1) + " Subject " + cert.getSubjectDN()); System.out.println(" Issuer " + cert.getIssuerDN()); sha1.update(cert.getEncoded()); System.out.println(" sha1 " + toHexString(sha1.digest())); md5.update(cert.getEncoded()); System.out.println(" md5 " + toHexString(md5.digest())); System.out.println(); } System.out .println("Enter certificate to add to trusted keystore or 'q' to quit: [1]"); String line = "1"; int k; try { k = (line.length() == 0) ? 0 : Integer.parseInt(line) - 1; } catch (NumberFormatException e) { System.out.println("KeyStore not changed"); return; } X509Certificate cert = chain[k]; String alias = host + "-" + (k + 1); ks.setCertificateEntry(alias, cert); OutputStream out = new FileOutputStream("c://wangl//https//jssecacerts"); ks.store(out, passphrase); out.close(); System.out.println(); System.out.println(cert); System.out.println(); System.out .println("Added certificate to keystore 'jssecacerts' using alias '" + alias + "'"); } private static final char[] HEXDIGITS = "0123456789abcdef".toCharArray(); private static String toHexString(byte[] bytes) { StringBuilder sb = new StringBuilder(bytes.length * 3); for (int b : bytes) { b &= 0xff; sb.append(HEXDIGITS[b >> 4]); sb.append(HEXDIGITS[b & 15]); sb.append(' '); } return sb.toString(); } private static class SavingTrustManager implements X509TrustManager { private final X509TrustManager tm; private X509Certificate[] chain; SavingTrustManager(X509TrustManager tm) { this.tm = tm; } public X509Certificate[] getAcceptedIssuers() { throw new UnsupportedOperationException(); } public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException { throw new UnsupportedOperationException(); } public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException { this.chain = chain; tm.checkServerTrusted(chain, authType); } } }
4.https需要在代码1中指定证书文件.
5.图片信息为: 汉字验证码--demo分享
最终结果显示: 汉字验证码汴demo分享
OK,识别率还可以,但存在提升的空间!
6.如果追求高识别率,可以考虑付费的国外泰比和国内的汉王.