通过HttpClientUtil 工具类进行爬取网页,解析网页,解析json数据,通过Io流保存解析好的数据
代码如下:
package com.ly.spider.http;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.http.impl.client.CloseableHttpClient;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by Eric on 2017/7/5.
*/
public class SpiderImpl {
public static void main(String[] args) throws Exception {
SpiderImpl spider = new SpiderImpl();
spider.checkStatus();
}
public String checkStatus() throws Exception{
StringBuilder stringBuilder = new StringBuilder();
Map<String,String> headerMap = new HashMap<String,String>();
headerMap.put("Host","www.wdzj.com");
headerMap.put("Connection","keep-alive");
headerMap.put("Upgrade-Insecure-Requests","1");
headerMap.put("User-Agent","Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36");
headerMap.put("Accept-Encoding","gzip, deflate, sdch");
headerMap.put("Accept-Language","zh-CN,zh;q=0.8");
headerMap.put("Accept","application/json, text/javascript, */*; q=0.01");
CloseableHttpClient httpClient = HttpClientUtil.getDefaultHttpClient();
String[] column_key = { "platName", "cityName",
"term", "serviceAttitude" };
Map<String,Object> paramMap = new HashMap<String,Object>();
List<String> list = new ArrayList<>();
for(int i=1;i<191;i++) {
paramMap.put("params", "");
paramMap.put("sort", "0");
paramMap.put("currPage", i);
HttpContext context = HttpClientUtil.doPost(httpClient,"http://www.wdzj.com/front_select-plat",paramMap,headerMap);
String responseContent = HttpClientUtil.getResponseContent(context);
list.add(responseContent);
}
JSONArray jsonArray = JSONArray.parseArray(list.toString());
for(int i=0;i<jsonArray.size();i++){
String result = jsonArray.getJSONObject(i).getString("list");
JSONArray jsonArray1 = JSONArray.parseArray(result);
for (int j=0;j<jsonArray1.size();j++){
for (Object json: jsonArray1) {
JSONObject itemJson = (JSONObject) json;
for (String column : column_key) {
stringBuilder.append(itemJson.get(column) + "\t");
}
stringBuilder.append("\n");
}
}
}
String filePath = "F:\\Test\\tests.txt";
String value =stringBuilder.toString();
String encoding = "utf-8";
IOUtil.writeFile(filePath,value,encoding);
System.out.println("success");
return null;
}
}
IOUtils 工具类,用于保存解析好的数据到本地
package com.ly.spider.http;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
/**
* 文件IO
*
* @author zel
*
*/
public class IOUtil {
public static void writeFile(String filePath, String value, String encoding) {
FileOutputStream fos = null;
try {
fos = new FileOutputStream(new File(filePath));
fos.write(value.getBytes(encoding));
fos.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (fos != null) {
try {
fos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public static void main(String[] args) {
String filePath = "test.txt";
String value = "hello world,123";
String encoding = "utf-8";
IOUtil.writeFile(filePath, value, encoding);
System.out.println("done!");
}
}