爬虫--破解验证码的几种方式
1.使用selenium手动输入2.使用打码平台(超级鹰http://www.chaojiying.com/price.html)推荐3.机器学习去第三方打码平台注册账号(超级鹰),拿到Python的接口压缩包#!/usr/bin/env pythoncoding:utf-8import requestsfrom hashlib import md5class Chaojiying_Client(
·
1.使用selenium 手动输入
2.使用打码平台(超级鹰http://www.chaojiying.com/price.html)推荐
3.机器学习
去第三方打码平台注册账号(超级鹰),拿到Python的接口压缩包
#!/usr/bin/env python
# coding:utf-8
import requests
from hashlib import md5
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
return r.json()
def ReportError(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
if __name__ == '__main__':
chaojiying = Chaojiying_Client('超级鹰用户名', '超级鹰用户名的密码', '96001') #用户中心>>软件ID 生成一个替换 96001
im = open('a.jpg', 'rb').read() #本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
print chaojiying.PostPic(im, 1902) #1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加()
使用爬虫
from chaojiying_Python.chaojiying import Chaojiying_Client
import random
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}
# 验证码url
# http://icode.renren.com/getcode.do?t=web_login&rnd=0.33034738219753246
def get_code():
url = 'http://icode.renren.com/getcode.do?t=web_login&rnd=' + str(random.random())
response = session.get(url, headers=headers)
# 图片二进制存入本地
with open('code.jpg', 'wb') as fp:
fp.write(response.content)
# 用超级鹰来破解验证码
chaojiying = Chaojiying_Client('niejeff', 'abcdef123456', '898304')
img = open('code.jpg', 'rb').read()
code = chaojiying.PostPic(img, 1902)['pic_str']
print(code)
return code
# 登录
def login(code):
print(code)
url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2020732118628'
data = {
"email": "18566218480",
"icode": code,
"origURL": "http://www.renren.com/home",
"domain": "renren.com",
"key_id": '1',
"captcha_type": "web_login",
"password": "7ed07397ab7e42f74bad38c9834a208089cfb3d17fd5a356a0fa02f6a760fbdd",
"rkey": "ef5b7e827f32a9a466aa5259f890f4a6",
"f":""
}
# 发送请求: 登录
response = session.post(url, headers=headers, data=data)
content = response.content.decode()
print(content)
# 登录后
def get_profile():
url = 'http://www.renren.com/548819077/profile'
response = session.get(url, headers=headers)
print(response.text)
if __name__ == '__main__':
# 保存cookie: 保证是同一个会话
session = requests.session()
# 先获取验证码
code = get_code()
# 登录
login(code)
# 登录后,再获取个人中心
get_profile()
更多推荐
已为社区贡献2条内容
所有评论(0)