“知道”题库脚本——升级版
#!/usr/bin/env python# encoding: utf-8'''@author: JHC@license: None@contact: JHC000abc@gmail.com@file: ttt.py@time: 2022/4/22 23:27@desc:替换self.uuid就能把题和答案写入mysql数据库uuid在cookies里'''import jsonimport r
·
#!/usr/bin/env python
# encoding: utf-8
'''
@author: JHC
@license: None
@contact: JHC000abc@gmail.com
@file: ttt.py
@time: 2022/4/22 23:27
@desc:替换self.uuid就能把题和答案写入mysql数据库
uuid在cookies里
'''
import json
import requests
import pymysql.cursors
import sys
from tqdm import tqdm
class Spider():
def __init__(self):
self.con = pymysql.connect(host='localhost', port=3306, user='root', password='123456', db='questionsbank',
charset='utf8', cursorclass=pymysql.cursors.DictCursor)
# self.uuid="V8qYDjej"
self.uuid = "Vj1vy1A7"
self.courseId = "10464858"
self.randomExerciseStyle = "0"
self.isFirst = True
self.params = (
('courseId', self.courseId),
('randomExerciseStyle',self.randomExerciseStyle),
('isFirst',self.isFirst),
('uuid',self.uuid)
)
self.url_queryAnswerSheet = 'https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/queryAnswerSheet'
self.url_queryRandomExerciseDetail = 'https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/queryRandomExerciseDetail'
def get_exerciseId_list(self):
'''获取练习所有题id'''
res = requests.get(url=self.url_queryAnswerSheet,params=self.params)
exerciseId_list = [i["exerciseId"] for i in res.json()["rt"]["lists"]]
return exerciseId_list
def get_content(self):
'''获取题目内容'''
exerciseId_list = self.get_exerciseId_list()
INDEX = 1
for questionId in tqdm(exerciseId_list):
params = (
('courseId', self.courseId),
('questionId', questionId),
('times', '2'),
('randomExerciseStyle', self.randomExerciseStyle),
('uuid', self.uuid),
)
response = requests.get(url=self.url_queryRandomExerciseDetail,
params=params)
if response.json()["rt"] is not None:
randomExerciseSortId = response.json()["rt"]["randomAnswerDetailDto"]["randomExerciseId"]
answerList = [i["id"] for i in response.json()["rt"]["optionList"]]
questionType = response.json()["rt"]["questionName"]
if questionType == '单选题':
answer = self.get_answer_radio(randomExerciseSortId, answerList)
self.radio(response, INDEX,questionType,answer)
elif questionType == '多选题':
answer = self.get_answer_multi(randomExerciseSortId,answerList)
self.multi(response, INDEX, questionType,answer)
elif questionType == '判断题':
answer = self.get_answer_judge(randomExerciseSortId)
self.judge(response, INDEX,questionType,answer)
else:
print('异常')
INDEX += 1
else:
print("异常题号为{}".format(INDEX))
INDEX += 1
def judge(self,response, INDEX,questionType,answer):
'''判断'''
key1, key2, key3, key4, key5, key6 = self.set_opt(response)
id, title, type = self.process_data(INDEX, questionType, response)
self.insert_data(id=id,title=title,type=type,key1=key1,key2=key2,key3=key3,key4=key4,key5=key5,key6=key6,answer=answer)
def multi(self,response,INDEX,questionType,answer):
'''多选'''
key1, key2, key3, key4, key5, key6 = self.set_opt(response)
id, title, type = self.process_data(INDEX, questionType, response)
self.insert_data(id=id, title=title, type=type,key1=key1,key2=key2,key3=key3,key4=key4,key5=key5,key6=key6,answer=answer)
def radio(self,response,INDEX,questionType,answer):
'''单选'''
key1, key2, key3, key4, key5, key6 = self.set_opt(response)
id, title, type = self.process_data(INDEX, questionType, response)
self.insert_data(id=id, title=title, type=type, key1=key1, key2=key2, key3=key3, key4=key4, key5=key5, key6=key6,answer=answer)
def process_data(self, INDEX, questionType, response):
'''删除题目中的垃圾字符'''
content = response.json()["rt"]["content"]
id = INDEX
title = str(content).replace(" ","").replace("<p>","").replace("</p>","").replace("<br>","")
type = questionType
return id, title, type
def set_opt(self,response):
'''拼接选项,纯为了好看'''
optionList = response.json()["rt"]["optionList"]
flag = 1
key1, key2, key3, key4, key5, key6 = None,None,None,None,None,None
for option in optionList:
option_content = option["content"].replace(" ","").replace("<p>","").replace("</p>","")
if flag == 1:
single = 'A. '
key1 = single + option_content
if flag == 2:
single = 'B. '
key2 = single + option_content
if flag == 3:
single = 'C. '
key3 = single + option_content
if flag == 4:
single = 'D. '
key4 = single + option_content
if flag == 5:
single = 'E. '
key5 = single + option_content
if flag == 6:
single = 'F. '
key6 = single + option_content
flag += 1
return key1,key2,key3,key4,key5,key6
def insert_data(self,id,title,type,key1=None,key2=None,key3=None,key4=None,key5=None,key6=None,answer=None):
'''写入数据到mysql'''
try:
with self.con.cursor() as cur:
sql = 'INSERT INTO xingce (id, title,key1,key2,key3,key4,key5,key6,type,answer) VALUES (%s,%s, %s, %s, %s, %s,%s,%s,%s,%s)'
cur.execute(sql, (id,title,key1,key2,key3,key4,key5,key6,type,answer))
self.con.commit()
except :
self.con.rollback()
def get_answer_judge(self,randomExerciseSortId):
'''判断题答案获取'''
for key in range(0,10):
url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format(self.uuid)
headers = {
"Content-Type": "application/json",
}
data = {"data": {"answerContent": key, "fileList": [], "isReview": True, "randomExerciseSortId": randomExerciseSortId,
"courseId": "10464858"}}
res = requests.post(url, headers=headers, data=json.dumps(data))
if res.json()["rt"] == True:
if key == 0:
return "B"
else:
return "A"
else:
pass
def get_answer_radio(self, randomExerciseSortId,answerList):
'''单选题,答案获取'''
for key in answerList:
url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format(
self.uuid)
headers = {
"Content-Type": "application/json",
}
data = {"data": {"answerContent": key, "fileList": [], "isReview": True,
"randomExerciseSortId": randomExerciseSortId,
"courseId": "10464858"}}
res = requests.post(url, headers=headers, data=json.dumps(data))
if res.json()["rt"] == True:
if answerList.index(key) == 0:
return "A"
elif answerList.index(key) == 1:
return "B"
elif answerList.index(key) == 2:
return "C"
elif answerList.index(key) == 3:
return "D"
elif answerList.index(key) == 4:
return "E"
elif answerList.index(key) == 5:
return "F"
else:
pass
def get_answer_multi(self,randomExerciseSortId,answerList):
'''多选题 答案获取'''
url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format(
self.uuid)
headers = {
"Content-Type": "application/json",
}
key_list = self.get_key(answerList)
for key in key_list:
if len(key)>0:
key_new = [str(i) for i in key]
key_str = ",".join(key_new)
data = {"data": {"answerContent": str(key_str), "fileList": [], "isReview": True,
"randomExerciseSortId": randomExerciseSortId,
"courseId": "10464858"}}
res = requests.post(url, headers=headers, data=json.dumps(data))
if res.json()["rt"] == True:
r_answer_list = []
for i in key:
if answerList.index(i)==0:
r_answer_list.append("A")
if answerList.index(i)==1:
r_answer_list.append("B")
if answerList.index(i)==2:
r_answer_list.append("C")
if answerList.index(i)==3:
r_answer_list.append("D")
if answerList.index(i)==4:
r_answer_list.append("E")
if answerList.index(i)==5:
r_answer_list.append("F")
return ",".join(r_answer_list)
else:
pass
else:
pass
def get_key(self,answerList):
'''
返回多选题选项所有不重复的组合
'''
lena = len(answerList)
sum = []
for i in range(2 ** lena):
comb = []
for j in range(lena):
if (i >> j) % 2 == 1:
comb.append(answerList[j])
sum.append(comb)
return sum
if __name__ == '__main__':
sp = Spider()
sp.get_content()
更多推荐
已为社区贡献7条内容
所有评论(0)