概述
HttpClient
简介:网络爬虫就是用程序帮助我们访问网络上的资源,我们一直以来都是使用HTTP协议访问互联网的网页,网络爬虫需要编写程序,在这里使用同样的HTTP协议访问网页。
这里我们使用Java 的 HTTP协议客户端 HttpClient 这个技术,来实现抓取网页数据。
准备工作:HttpClient的jar包的maven坐标
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
HttpClient的Get请求:
import java.io.IOException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
public class HttpClientGet {
public static void main(String[] args) {
//创建Httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//创建httpGet对象,设置url地址:
HttpGet httpGet = new HttpGet("http://www.itcast.com");
CloseableHttpResponse response= null;
try {
//使用httpClient发起请求 获取 response
response = httpClient.execute(httpGet);
//解析响应
if(response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(),"utf8");
System.out.println(content.length());
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
try {
//关闭httpClient
httpClient.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
//关闭response
response.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
HttpClient的Get带参数请求:
public class HttpClientParamGet {
public static void main(String[] args) throws Exception {
//创建Httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//需要请求的地址为:http://yun.itheima.com/search?keys=Java
//创建URIBuidler
URIBuilder uriBuilder = new URIBuilder("http://yun.itheima.com/search");
//设置参数
uriBuilder.setParameter("keys", "Java");
/*
* 多个参数的设置
uriBuilder.setParameter("keys", "keys")
.setParameter("keys", "keys");*/
//创建httpGet对象,设置url地址:
HttpGet httpGet = new HttpGet(uriBuilder.build());
CloseableHttpResponse response= null;
try {
//使用httpClient发起请求 获取 response
response = httpClient.execute(httpGet);
//解析响应
if(response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(),"utf8");
System.out.println(content.length());
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
try {
//关闭httpClient
httpClient.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
//关闭response
response.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
HTTpost请求
public class HttpClientPost {
public static void main(String[] args) {
//创建Httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//创建httpGet对象,设置url地址:
HttpPost httpPost = new HttpPost("http://www.itcast.com");
CloseableHttpResponse response= null;
try {
//使用httpClient发起请求 获取 response
response = httpClient.execute(httpPost);
//解析响应
if(response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(),"utf8");
System.out.println(content.length());
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
try {
//关闭httpClient
httpClient.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
//关闭response
response.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
HttpClient的post带参数请求:
public class HttpClientPost {
public static void main(String[] args) throws Exception {
//创建Httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//需要请求的地址为:http://yun.itheima.com/search?keys=Java
//创建httpGet对象,设置url地址:
HttpPost httpPost = new HttpPost("http://yun.itheima.com/search");
//声明一个List集合,封装表单中的数据。
List<NameValuePair> params = new ArrayList<NameValuePair>();
//参数的存放
params.add(new BasicNameValuePair("keys","Java") );
//创建表单的Entity对象,第一个参数就是封装好的表单数据,第二个参数就是 编码
UrlEncodedFormEntity urlencode = new UrlEncodedFormEntity(params,"utf8");
//设置表单的Entity对象到Post请求中
httpPost.setEntity(urlencode);
CloseableHttpResponse response= null;
try {
//使用httpClient发起请求 获取 response
response = httpClient.execute(httpPost);
//解析响应
if(response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(),"utf8");
System.out.println(content.length());
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
try {
//关闭httpClient
httpClient.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
//关闭response
response.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
HttpClient连接池
如果每次请求都要创建HttpClient,会有频繁创建和销毁的问题,可以使用连接池来解决这个问题。
public class HttpClientPoolGet {
public static void main(String[] args) {
//创建连接池管理器
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
//设置最大连接数
cm.setMaxTotal(100);
//设置最大主机连接数
cm.setDefaultMaxPerRoute(10);
doGet(cm);
doGet(cm);
}
private static void doGet(PoolingHttpClientConnectionManager cm) {
//创建Httpclient对象
CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
//创建httpGet对象,设置url地址:
HttpGet httpGet = new HttpGet("http://www.itcast.com");
CloseableHttpResponse response= null;
try {
//使用httpClient发起请求 获取 response
response = httpClient.execute(httpGet);
//解析响应
if(response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(),"utf8");
System.out.println(content.length());
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
/*
//不能关闭httpClient,由管理池管理httpClient
try {
//关闭httpClient
httpClient.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}*/
try {
//关闭response
response.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
请求参数
public class HttpClientGet {
public static void main(String[] args) {
//创建Httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//创建httpGet对象,设置url地址:
HttpGet httpGet = new HttpGet("http://www.itcast.com");
RequestConfig config = RequestConfig.custom()
.setConnectTimeout(500) //设置创建连接最大时长,单位毫秒
.setConnectionRequestTimeout(1000) //设置获取连接最大时长,单位毫秒
.setSocketTimeout(1000*10) //设置数据传输最大时长,单位毫秒
.build();
httpGet.setConfig(config);
CloseableHttpResponse response= null;
try {
//使用httpClient发起请求 获取 response
response = httpClient.execute(httpGet);
//解析响应
if(response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(),"utf8");
System.out.println(content.length());
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
try {
//关闭httpClient
httpClient.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
//关闭response
response.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
最后
以上就是体贴长颈鹿为你收集整理的Java爬虫之学习篇 HttpClient学习的全部内容,希望文章能够帮你解决Java爬虫之学习篇 HttpClient学习所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复