我是靠谱客的博主 体贴长颈鹿,最近开发中收集的这篇文章主要介绍Java爬虫之学习篇 HttpClient学习,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

HttpClient

简介:网络爬虫就是用程序帮助我们访问网络上的资源,我们一直以来都是使用HTTP协议访问互联网的网页,网络爬虫需要编写程序,在这里使用同样的HTTP协议访问网页。
这里我们使用Java 的 HTTP协议客户端 HttpClient 这个技术,来实现抓取网页数据。

准备工作:HttpClient的jar包的maven坐标
	<dependency>
		  <groupId>org.apache.httpcomponents</groupId>
		  <artifactId>httpclient</artifactId>
		  <version>4.5.2</version>
	</dependency>
HttpClient的Get请求:
import java.io.IOException;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

public class HttpClientGet {

	public static void main(String[] args) {
		
		//创建Httpclient对象
		CloseableHttpClient httpClient = HttpClients.createDefault();
		
		//创建httpGet对象,设置url地址:
		HttpGet httpGet = new HttpGet("http://www.itcast.com");
		CloseableHttpResponse response= null;
		try {
			//使用httpClient发起请求 获取 response
			response = httpClient.execute(httpGet);
			
			//解析响应
			if(response.getStatusLine().getStatusCode() == 200) {
				String content = EntityUtils.toString(response.getEntity(),"utf8");
				System.out.println(content.length());
			}
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			
			try {
				//关闭httpClient
				httpClient.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			try {
				//关闭response
				response.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
		}
	}
}
HttpClient的Get带参数请求:
public class HttpClientParamGet {

	public static void main(String[] args) throws Exception {
		
		//创建Httpclient对象
		CloseableHttpClient httpClient = HttpClients.createDefault();
		
		//需要请求的地址为:http://yun.itheima.com/search?keys=Java
		//创建URIBuidler
		URIBuilder uriBuilder = new URIBuilder("http://yun.itheima.com/search");
		
		//设置参数
		uriBuilder.setParameter("keys", "Java");
	/*	
	 * 多个参数的设置
		uriBuilder.setParameter("keys", "keys")
				  .setParameter("keys", "keys");*/		

		//创建httpGet对象,设置url地址:
		HttpGet httpGet = new HttpGet(uriBuilder.build());
		CloseableHttpResponse response= null;
		try {
			//使用httpClient发起请求 获取 response
			response = httpClient.execute(httpGet);
			
			//解析响应
			if(response.getStatusLine().getStatusCode() == 200) {
				String content = EntityUtils.toString(response.getEntity(),"utf8");
				System.out.println(content.length());
			}
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			
			try {
				//关闭httpClient
				httpClient.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			try {
				//关闭response
				response.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
		}
	}
}
HTTpost请求
public class HttpClientPost {

	public static void main(String[] args) {
		
		//创建Httpclient对象
		CloseableHttpClient httpClient = HttpClients.createDefault();
		
		//创建httpGet对象,设置url地址:
		HttpPost httpPost = new HttpPost("http://www.itcast.com");
		CloseableHttpResponse response= null;
		try {
			//使用httpClient发起请求 获取 response
			response = httpClient.execute(httpPost);
			
			//解析响应
			if(response.getStatusLine().getStatusCode() == 200) {
				String content = EntityUtils.toString(response.getEntity(),"utf8");
				System.out.println(content.length());
			}
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			
			try {
				//关闭httpClient
				httpClient.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			try {
				//关闭response
				response.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
		}
	}
}
HttpClient的post带参数请求:
public class HttpClientPost {

	public static void main(String[] args) throws Exception {
		
		//创建Httpclient对象
		CloseableHttpClient httpClient = HttpClients.createDefault();
		
		//需要请求的地址为:http://yun.itheima.com/search?keys=Java
		//创建httpGet对象,设置url地址:
		HttpPost httpPost = new HttpPost("http://yun.itheima.com/search");
		
		//声明一个List集合,封装表单中的数据。
		List<NameValuePair> params = new ArrayList<NameValuePair>();
		//参数的存放
		params.add(new BasicNameValuePair("keys","Java") );
		
		//创建表单的Entity对象,第一个参数就是封装好的表单数据,第二个参数就是 编码
		UrlEncodedFormEntity urlencode = new UrlEncodedFormEntity(params,"utf8");
		
		//设置表单的Entity对象到Post请求中
		httpPost.setEntity(urlencode);
		
		CloseableHttpResponse response= null;
		try {
			//使用httpClient发起请求 获取 response
			response = httpClient.execute(httpPost);
			
			//解析响应
			if(response.getStatusLine().getStatusCode() == 200) {
				String content = EntityUtils.toString(response.getEntity(),"utf8");
				System.out.println(content.length());
			}
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			
			try {
				//关闭httpClient
				httpClient.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			try {
				//关闭response
				response.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
		}
	}
}
HttpClient连接池

如果每次请求都要创建HttpClient,会有频繁创建和销毁的问题,可以使用连接池来解决这个问题。


public class HttpClientPoolGet {

	public static void main(String[] args) {
		
		//创建连接池管理器
		PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
		//设置最大连接数
		cm.setMaxTotal(100);
		//设置最大主机连接数
		cm.setDefaultMaxPerRoute(10);
		doGet(cm);
		doGet(cm);
		
	}

	private static void doGet(PoolingHttpClientConnectionManager cm) {

		//创建Httpclient对象
		CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
		
		//创建httpGet对象,设置url地址:
		HttpGet httpGet = new HttpGet("http://www.itcast.com");
		CloseableHttpResponse response= null;
		try {
			//使用httpClient发起请求 获取 response
			response = httpClient.execute(httpGet);
			
			//解析响应
			if(response.getStatusLine().getStatusCode() == 200) {
				String content = EntityUtils.toString(response.getEntity(),"utf8");
				System.out.println(content.length());
			}
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			
		/*	
		  //不能关闭httpClient,由管理池管理httpClient
		  try {
				//关闭httpClient
				httpClient.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}*/
			try {
				//关闭response
				response.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
		}
		
	}
}
请求参数
public class HttpClientGet {

	public static void main(String[] args) {
		
		//创建Httpclient对象
		CloseableHttpClient httpClient = HttpClients.createDefault();
		
		//创建httpGet对象,设置url地址:
		HttpGet httpGet = new HttpGet("http://www.itcast.com");
		
		RequestConfig config =  RequestConfig.custom()
				.setConnectTimeout(500)   			//设置创建连接最大时长,单位毫秒
				.setConnectionRequestTimeout(1000)  //设置获取连接最大时长,单位毫秒
				.setSocketTimeout(1000*10)			//设置数据传输最大时长,单位毫秒
				.build();
				
		httpGet.setConfig(config);
		
		CloseableHttpResponse response= null;
		try {
			//使用httpClient发起请求 获取 response
			response = httpClient.execute(httpGet);
			
			//解析响应
			if(response.getStatusLine().getStatusCode() == 200) {
				String content = EntityUtils.toString(response.getEntity(),"utf8");
				System.out.println(content.length());
			}
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			
			try {
				//关闭httpClient
				httpClient.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			try {
				//关闭response
				response.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
		}
	}
}

最后

以上就是体贴长颈鹿为你收集整理的Java爬虫之学习篇 HttpClient学习的全部内容,希望文章能够帮你解决Java爬虫之学习篇 HttpClient学习所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(44)

评论列表共有 0 条评论

立即
投稿
返回
顶部