使用python爬取免费IP并验证
前言人生苦短,我用python实现爬取目标地址:https://www.kuaidaili.com/free/inha/1全部代码import requestsimport re#目标地址url = "https://www.kuaidaili.com/free/inha/1"#头信息User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi
·
前言
人生苦短,我用python
实现
爬取目标地址:
https://www.kuaidaili.com/free/inha/1
全部代码
import requests
import re
#目标地址
url = "https://www.kuaidaili.com/free/inha/1"
#头信息User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36"
}
#get请求到的数据
response = requests.get(url,headers=headers)
#获取源代码
html = response.text
#re.S换行的干扰
#正则匹配到源代码中的IP和端口
ips = re.findall("<td data-title=.IP.>(\d+\.\d+\.\d+\.\d+)</td>",html,re.S)
ports = re.findall("<td data-title=.PORT.>(\d+)</td>",html,re.S)
#循环交给IP
for ip in zip(ips,ports):
proxies = {
"http":"http://" + ip[0] + ":" + ip[1],
"https":"http://" + ip[0] + ":" + ip[1],
}
#异常处理
try:
res = requests.get("https://www.baidu.com/",proxies=proxies,timeout=2)
print(ip,"能使用")
#在当前目录生成一个txt
with open("ip.txt",mode="a+") as f:
#用:分开ip中值
f.write(":" . join(ip))
f.write("\n")
#当访问百度错误
except Exception as e:
print(ip,"不能使用")
更多推荐
已为社区贡献1条内容
所有评论(0)