Jsoup获取http代理列表,并存放在properties文件,并随机取出使用

这是一个练习,写得一坨屎一样的练习!!!
jsoup架包的下载地址:http://jsoup.org/download
这是用于java web利用代理抓取页面再显示到前台的练习,由于太久没写代码了,里面的逻辑简直和屎一样!!!
涉及到了重复请求的问题,所以我加了个标识!我只会这样,求更好的方法。

package com.hxgsn.entities;

public class ipPort {

	private String ip;
	private int port;

	public String getIp() {
		return ip;
	}

	public void setIp(String ip) {
		this.ip = ip;
	}

	public int getPort() {
		return port;
	}

	public void setPort(int port) {
		this.port = port;
	}

}
package com.hxgsn.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.hxgsn.entities.ipPort;

public class httpProxy {

	private Properties properties = new Properties();

	private List list = new ArrayList();

	public Properties getProperties() {
		return properties;
	}

	public void setProperties(Properties properties) {
		this.properties = properties;
	}

	public List getList() {
		if (list.size() == 0 && flag) {
			flag = false;// 禁止重复请求
			propToList();// 当集合里没数据,重新请求数据
		}
		Collections.shuffle(list);// 随机地打乱参数list里的元素顺序
		return list;
	}

	public void setList(List list) {
		this.list = list;
	}

	public boolean flag = true;// 防止重复请求

	public void getIpResult() {
		Document doc;
		try {
			doc = Jsoup.connect("http://cn-proxy.com/").get(); // 获取请求连接
			Pattern pattern = Pattern
					.compile("(\\d+\\.\\d+\\.\\d+\\.\\d+)(\\d+)");// 使用正则表达式取出需要的数据
			Matcher matcher = pattern.matcher(doc.toString().replaceAll("\\s*",
					""));// 去除doc里的空格和换行
			Properties properties = new Properties();
			try {
				OutputStream outputStream = new FileOutputStream(getRootPath()
						+ "/bin/ip.properties");// 创建ip.properties文件
				System.out.println("开始写入数据");
				while (matcher.find()) {
					properties.setProperty(matcher.group(1), matcher.group(2));// 添加数据到ip.properties
					System.out.println(matcher.group(1) + ":"
							+ matcher.group(2));
				}
				properties.store(outputStream, "http://cn-proxy.com/");// 添加注释
				outputStream.close();// 关闭文件流
				System.out.println("写入数据结束");
			} catch (IOException e) {
				e.printStackTrace();
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public String getRootPath() {
		String result = httpProxy.class.getResource("httpProxy.class")
				.toString().replaceAll("%20", " ");// 获取当前class的路径
		int index = result.indexOf("WEB-INF");
		if (index == -1) {
			index = result.indexOf("bin");
		}
		result = result.substring(0, index);
		if (result.startsWith("jar")) {
			result = result.substring(10);
		} else if (result.startsWith("file")) {
			result = result.substring(6);
		}
		if (!result.substring(1, 2).equals(":")) {
			result = "/" + result;
		}
		if (result.endsWith("/"))
			result = result.substring(0, result.length() - 1);
		return result;
	}

	public void propToList() {
		try {
			do {
				File file = new File(getRootPath() + "/bin/ip.properties");
				boolean exists = file.exists();// 判断ip.properties存不存在
				if (!exists) {
					getIpResult();// ip.properties不存在,调用getIpResult方法创建一个,并写入数据
				}
				InputStream inputStream = new FileInputStream(getRootPath()
						+ "/bin/ip.properties");
				properties.load(inputStream);
				inputStream.close();
				if (properties.size() == 0) {
					getIpResult();// 判断ip.properties是否被清空,为0即已被清空,重新写入数据
				}
			} while (properties.size() == 0);// 判断ip.properties文件状态

			System.out.println("开始读取文件内容");
			Iterator it = properties.entrySet().iterator();
			while (it.hasNext()) {
				Map.Entry entry = (Map.Entry) it.next();
				ipPort txt = new ipPort();
				txt.setIp(entry.getKey().toString());
				txt.setPort(Integer.parseInt(entry.getValue().toString()));// Object转换为int
				list.add(txt);
			}
			flag = true;// 更改请求状态
			System.out.println("读取文件内容结束");
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public void delIp(ipPort ip) {
		if (flag) {
			list.remove(ip);// 删除指定对象
			if (list.size() == 0) {
				getIpResult();// 集合被删除全部对象后,重新读取
			}
		}
	}

}
package com.hxgsn.dome;

import com.hxgsn.entities.ipPort;
import com.hxgsn.util.httpProxy;

public class dome {

	public static void main(String[] args) {

		httpProxy hp = new httpProxy();
		ipPort ip = new ipPort();

		int listSize = hp.getList().size();
		for (int i = 0; i < listSize; i++) {
			ip = hp.getList().get(0);
			System.out.println("ip" + (i + 1) + ":" + ip.getIp() + ":"
					+ ip.getPort());
			hp.delIp(ip);
			System.out.println(hp.getList().size());
		}

		System.out.println(hp.getList().size());

	}

}

分享到: