分享web开发知识

注册/登录|最近发布|今日推荐

主页 IT知识网页技术软件开发前端开发代码编程运营维护技术分享教程案例
当前位置:首页 > 运营维护

httpclient:Ip 代理

发布时间:2023-09-06 01:24责任编辑:沈小雨关键词:http

参考:http://blog.csdn.net/sdfiiiiii/article/details/70432060  http://blog.csdn.net/qy20115549/article/details/54945974

第一篇博客可以获取http://www.xicidaili.com/网站上所有的代理ip,并测试可不可以用(貌似不是很准),可用的代理ip放到一个list中

第二篇博客是直接将代理ip设置进代码内,可以用作测试ip可不可用

第一篇博客

<dependency> ???<groupId>com.alibaba</groupId> ???<artifactId>fastjson</artifactId> ???<version>1.2.28</version></dependency><dependency> ???<groupId>org.jsoup</groupId> ???<artifactId>jsoup</artifactId> ???<version>1.10.2</version></dependency>
import com.alibaba.fastjson.JSONObject;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.regex.Matcher;import java.util.regex.Pattern;/** * 获取代理IP,需要 * com.alibaba.fastjson.JSONObject以及Jsoup */public class ProxyCralwerUnusedVPN { ???ThreadLocal<Integer> localWantedNumber = new ThreadLocal<Integer>(); ???ThreadLocal<List<ProxyInfo>> localProxyInfos = new ThreadLocal<List<ProxyInfo>>(); ???public static void main(String[] args) { ???????ProxyCralwerUnusedVPN proxyCrawler = new ProxyCralwerUnusedVPN(); ???????/** ????????* 想要获取的代理IP个数,由需求方自行指定。(如果个数太多,将导致返回变慢) ????????*/ ???????proxyCrawler.startCrawler(1); ???} ???/** ????* 暴露给外部模块调用的入口 ????* @param wantedNumber 调用方期望获取到的代理IP个数 ????*/ ???public String startCrawler(int wantedNumber) { ???????localWantedNumber.set(wantedNumber); ???????kuaidailiCom("http://www.xicidaili.com/nn/", 15); ???????kuaidailiCom("http://www.xicidaili.com/nt/", 15); ???????kuaidailiCom("http://www.xicidaili.com/wt/", 15); ???????kuaidailiCom("http://www.kuaidaili.com/free/inha/", 15); ???????kuaidailiCom("http://www.kuaidaili.com/free/intr/", 15); ???????kuaidailiCom("http://www.kuaidaili.com/free/outtr/", 15); ???????/** ????????* 构造返回数据 ????????*/ ???????ProxyResponse response = new ProxyResponse(); ???????response.setSuccess("true"); ???????Map<String, Object> dataInfoMap = new HashMap<String, Object>(); ???????dataInfoMap.put("numFound", localProxyInfos.get().size()); ???????dataInfoMap.put("pageNum", 1); ???????dataInfoMap.put("proxy", localProxyInfos.get()); ???????response.setData(dataInfoMap); ???????String responseString = JSONObject.toJSON(response).toString(); ???????System.out.println(responseString); ???????return responseString; ???} ???private void kuaidailiCom(String baseUrl, int totalPage) { ???????String ipReg = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} \\d{1,6}"; ???????Pattern ipPtn = Pattern.compile(ipReg); ???????for (int i = 1; i < totalPage; i++) { ???????????if (getCurrentProxyNumber() >= localWantedNumber.get()) { ???????????????return; ???????????} ???????????try { ???????????????Document doc = Jsoup.connect(baseUrl + i + "/") ???????????????????????.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") ???????????????????????.header("Accept-Encoding", "gzip, deflate, sdch") ???????????????????????.header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6") ???????????????????????.header("Cache-Control", "max-age=0") ???????????????????????.header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36") ???????????????????????.header("Cookie", "Hm_lvt_7ed65b1cc4b810e9fd37959c9bb51b31=1462812244; _gat=1; _ga=GA1.2.1061361785.1462812244") ???????????????????????.header("Host", "www.kuaidaili.com") ???????????????????????.header("Referer", "http://www.kuaidaili.com/free/outha/") ???????????????????????.timeout(30 * 1000) ???????????????????????.get(); ???????????????Matcher m = ipPtn.matcher(doc.text()); ???????????????while (m.find()) { ???????????????????if (getCurrentProxyNumber() >= localWantedNumber.get()) { ???????????????????????break; ???????????????????} ???????????????????String[] strs = m.group().split(" "); ???????????????????if (checkProxy(strs[0], Integer.parseInt(strs[1]))) { ???????????????????????System.out.println("获取到可用代理IP\t" + strs[0] + "\t" + strs[1]); ???????????????????????addProxy(strs[0], strs[1], "http"); ???????????????????} ???????????????} ???????????} catch (Exception e) { ???????????????e.printStackTrace(); ???????????} ???????} ???} ???private static boolean checkProxy(String ip, Integer port) { ???????try { ???????????//http://1212.ip138.com/ic.asp 可以换成任何比较快的网页 ???????????Jsoup.connect("http://1212.ip138.com/ic.asp") ???????????????????.timeout(2 * 1000) ???????????????????.proxy(ip, port) ???????????????????.get(); ???????????return true; ???????} catch (Exception e) { ???????????return false; ???????} ???} ???private int getCurrentProxyNumber() { ???????List<ProxyInfo> proxyInfos = localProxyInfos.get(); ???????if (proxyInfos == null) { ???????????proxyInfos = new ArrayList<ProxyInfo>(); ???????????localProxyInfos.set(proxyInfos); ???????????return 0; ???????} ???????else { ???????????return proxyInfos.size(); ???????} ???} ???private void addProxy(String ip, String port, String protocol){ ???????List<ProxyInfo> proxyInfos = localProxyInfos.get(); ???????if (proxyInfos == null) { ???????????proxyInfos = new ArrayList<ProxyInfo>(); ???????????proxyInfos.add(new ProxyInfo(ip, port, protocol)); ???????} ???????else { ???????????proxyInfos.add(new ProxyInfo(ip, port, protocol)); ???????} ???}}class ProxyInfo { ???private String userName = ""; ???private String ip; ???private String password = ""; ???private String type; ???private String port; ???private int is_internet = 1; ???public ProxyInfo(String ip, String port, String type) { ???????this.ip = ip; ???????this.type = type; ???????this.port = port; ???} ???public String getUserName() { ???????return userName; ???} ???public void setUserName(String userName) { ???????this.userName = userName; ???} ???public String getIp() { ???????return ip; ???} ???public void setIp(String ip) { ???????this.ip = ip; ???} ???public String getPassword() { ???????return password; ???} ???public void setPassword(String password) { ???????this.password = password; ???} ???public String getType() { ???????return type; ???} ???public void setType(String type) { ???????this.type = type; ???} ???public String getPort() { ???????return port; ???} ???public void setPort(String port) { ???????this.port = port; ???} ???public int getIs_internet() { ???????return is_internet; ???} ???public void setIs_internet(int is_internet) { ???????this.is_internet = is_internet; ???}}class ProxyResponse { ???private String success; ???private Map<String, Object> data; ???public String getSuccess() { ???????return success; ???} ???public void setSuccess(String success) { ???????this.success = success; ???} ???public Map<String, Object> getData() { ???????return data; ???} ???public void setData(Map<String, Object> data) { ???????this.data = data; ???}}

 第二篇博客

import java.io.BufferedReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.UnsupportedEncodingException;import java.net.InetSocketAddress;import java.net.MalformedURLException;import java.net.Proxy;import java.net.URL;import java.net.URLConnection;public class GetHtml { ???public static void main(String[] args) throws UnsupportedEncodingException { ???????//输入代理ip,端口,及所要爬取的url ???????gethtml("121.61.101.222",808,"http://club.autohome.com.cn/bbs/forum-c-2533-1.html?orderby=dateline&qaType=-1"); ???} ???public static String gethtml(String ip,int port,String url) throws UnsupportedEncodingException{ ???????URL url1 = null; ???????try { ???????????url1 = new URL(url); ???????} catch (MalformedURLException e1) { ???????????e1.printStackTrace(); ???????} ???????InetSocketAddress addr = null; ???????//代理服务器的ip及端口 ???????addr = new InetSocketAddress(ip, port); ???????Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); // http proxy ???????InputStream in = null; ???????try { ???????????URLConnection conn = url1.openConnection(proxy); ???????????conn.setConnectTimeout(3000); ???????????in = conn.getInputStream(); ???????} catch (Exception e) { ???????????System.out.println("ip " + " is not aviable");//异常IP ???????} ???????String s = convertStreamToString(in); ???????System.out.println(s); ???????return s; ???} ???public static String convertStreamToString(InputStream is) throws UnsupportedEncodingException { ???????if (is == null) ???????????return ""; ???????BufferedReader reader = new BufferedReader(new InputStreamReader(is,"gb2312")); ???????StringBuilder sb = new StringBuilder(); ???????String line = null; ???????try { ???????????while ((line = reader.readLine()) != null) { ???????????????sb.append(line + "/n"); ???????????} ???????} catch (IOException e) { ???????????e.printStackTrace(); ???????} finally { ???????????try { ???????????????is.close(); ???????????} catch (IOException e) { ???????????????e.printStackTrace(); ???????????} ???????} ???????return sb.toString(); ???}}

httpclient:Ip 代理

原文地址:http://www.cnblogs.com/Michael2397/p/7821930.html

知识推荐

我的编程学习网——分享web前端后端开发技术知识。 垃圾信息处理邮箱 tousu563@163.com 网站地图
icp备案号 闽ICP备2023006418号-8 不良信息举报平台 互联网安全管理备案 Copyright 2023 www.wodecom.cn All Rights Reserved