#!/usr/bin/env python
# encoding: utf-8
'''
@author: JHC
@license: None
@contact: JHC000abc@gmail.com
@file: ttt.py
@time: 2022/4/22 23:27
@desc:替换self.uuid就能把题和答案写入mysql数据库
		uuid在cookies里
'''
import json
import requests
import pymysql.cursors
import sys
from tqdm import tqdm


class Spider():

    def __init__(self):
        self.con = pymysql.connect(host='localhost', port=3306, user='root', password='123456', db='questionsbank',
                              charset='utf8', cursorclass=pymysql.cursors.DictCursor)
        # self.uuid="V8qYDjej"
        self.uuid = "Vj1vy1A7"
        self.courseId = "10464858"
        self.randomExerciseStyle = "0"
        self.isFirst = True

        self.params = (
            ('courseId', self.courseId),
            ('randomExerciseStyle',self.randomExerciseStyle),
            ('isFirst',self.isFirst),
            ('uuid',self.uuid)
        )
        self.url_queryAnswerSheet = 'https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/queryAnswerSheet'
        self.url_queryRandomExerciseDetail = 'https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/queryRandomExerciseDetail'

    def get_exerciseId_list(self):
    	'''获取练习所有题id'''
        res = requests.get(url=self.url_queryAnswerSheet,params=self.params)
        exerciseId_list = [i["exerciseId"] for i in res.json()["rt"]["lists"]]
        return exerciseId_list

    def get_content(self):
    	'''获取题目内容'''
        exerciseId_list = self.get_exerciseId_list()
        INDEX = 1
        for questionId in tqdm(exerciseId_list):
            params = (
                ('courseId', self.courseId),
                ('questionId', questionId),
                ('times', '2'),
                ('randomExerciseStyle', self.randomExerciseStyle),
                ('uuid', self.uuid),
            )
            response = requests.get(url=self.url_queryRandomExerciseDetail,
                                     params=params)
            if response.json()["rt"] is not None:
                randomExerciseSortId = response.json()["rt"]["randomAnswerDetailDto"]["randomExerciseId"]
                answerList = [i["id"] for i in response.json()["rt"]["optionList"]]
                questionType = response.json()["rt"]["questionName"]
                if questionType == '单选题':
                    answer = self.get_answer_radio(randomExerciseSortId, answerList)
                    self.radio(response, INDEX,questionType,answer)
                elif questionType == '多选题':
                    answer = self.get_answer_multi(randomExerciseSortId,answerList)
                    self.multi(response, INDEX, questionType,answer)
                elif questionType == '判断题':
                    answer = self.get_answer_judge(randomExerciseSortId)
                    self.judge(response, INDEX,questionType,answer)
                else:
                    print('异常')
                INDEX += 1
            else:
                print("异常题号为{}".format(INDEX))
                INDEX += 1


    def judge(self,response, INDEX,questionType,answer):
	    '''判断'''
        key1, key2, key3, key4, key5, key6 = self.set_opt(response)
        id, title, type = self.process_data(INDEX, questionType, response)
        self.insert_data(id=id,title=title,type=type,key1=key1,key2=key2,key3=key3,key4=key4,key5=key5,key6=key6,answer=answer)

    def multi(self,response,INDEX,questionType,answer):
	    '''多选'''
        key1, key2, key3, key4, key5, key6 = self.set_opt(response)
        id, title, type = self.process_data(INDEX, questionType, response)
        self.insert_data(id=id, title=title, type=type,key1=key1,key2=key2,key3=key3,key4=key4,key5=key5,key6=key6,answer=answer)

    def radio(self,response,INDEX,questionType,answer):
    	'''单选'''
        key1, key2, key3, key4, key5, key6 = self.set_opt(response)
        id, title, type = self.process_data(INDEX, questionType, response)
        self.insert_data(id=id, title=title, type=type, key1=key1, key2=key2, key3=key3, key4=key4, key5=key5, key6=key6,answer=answer)

    def process_data(self, INDEX, questionType, response):
    	'''删除题目中的垃圾字符'''
        content = response.json()["rt"]["content"]
        id = INDEX
        title = str(content).replace("&nbsp;","").replace("<p>","").replace("</p>","").replace("<br>","")
        type = questionType
        return id, title, type

    def set_opt(self,response):
    	'''拼接选项,纯为了好看'''
        optionList = response.json()["rt"]["optionList"]
        flag = 1
        key1, key2, key3, key4, key5, key6 = None,None,None,None,None,None
        for option in optionList:
            option_content = option["content"].replace("&nbsp;","").replace("<p>","").replace("</p>","")
            if flag == 1:
                single = 'A. '
                key1 = single + option_content
            if flag == 2:
                single = 'B. '
                key2 = single + option_content
            if flag == 3:
                single = 'C. '
                key3 = single + option_content
            if flag == 4:
                single = 'D. '
                key4 = single + option_content
            if flag == 5:
                single = 'E. '
                key5 = single + option_content
            if flag == 6:
                single = 'F. '
                key6 = single + option_content
            flag += 1
        return key1,key2,key3,key4,key5,key6

    def insert_data(self,id,title,type,key1=None,key2=None,key3=None,key4=None,key5=None,key6=None,answer=None):
    	'''写入数据到mysql'''
        try:
            with self.con.cursor() as cur:
                sql = 'INSERT INTO xingce (id, title,key1,key2,key3,key4,key5,key6,type,answer) VALUES (%s,%s, %s, %s, %s, %s,%s,%s,%s,%s)'
                cur.execute(sql, (id,title,key1,key2,key3,key4,key5,key6,type,answer))
            self.con.commit()
        except :
            self.con.rollback()

    def get_answer_judge(self,randomExerciseSortId):
        '''判断题答案获取'''
        for key in range(0,10):
            url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format(self.uuid)
            headers = {
                "Content-Type": "application/json",
            }
            data = {"data": {"answerContent": key, "fileList": [], "isReview": True, "randomExerciseSortId": randomExerciseSortId,
                             "courseId": "10464858"}}

            res = requests.post(url, headers=headers, data=json.dumps(data))

            if res.json()["rt"] == True:
                if key == 0:
                    return "B"
                else:
                    return "A"
            else:
                pass

    def get_answer_radio(self, randomExerciseSortId,answerList):
        '''单选题,答案获取'''
        for key in answerList:
            url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format(
                self.uuid)
            headers = {
                "Content-Type": "application/json",
            }
            data = {"data": {"answerContent": key, "fileList": [], "isReview": True,
                             "randomExerciseSortId": randomExerciseSortId,
                             "courseId": "10464858"}}

            res = requests.post(url, headers=headers, data=json.dumps(data))
            if res.json()["rt"] == True:
                if answerList.index(key) == 0:
                    return "A"
                elif answerList.index(key) == 1:
                    return "B"
                elif answerList.index(key) == 2:
                    return "C"
                elif answerList.index(key) == 3:
                    return "D"
                elif answerList.index(key) == 4:
                    return "E"
                elif answerList.index(key) == 5:
                    return "F"
            else:
                pass

    def get_answer_multi(self,randomExerciseSortId,answerList):
        '''多选题 答案获取'''
        url = "https://hike-examstu.zhihuishu.com/zhsathome/randomExercise/submitAnswer?uuid={}".format(
            self.uuid)
        headers = {
            "Content-Type": "application/json",
        }

        key_list = self.get_key(answerList)
        for key in key_list:
            if len(key)>0:
                key_new = [str(i) for i in key]
                key_str = ",".join(key_new)
                data = {"data": {"answerContent": str(key_str), "fileList": [], "isReview": True,
                                 "randomExerciseSortId": randomExerciseSortId,
                                 "courseId": "10464858"}}
                res = requests.post(url, headers=headers, data=json.dumps(data))
                if res.json()["rt"] == True:
                    r_answer_list = []
                    for i in key:
                        if answerList.index(i)==0:
                            r_answer_list.append("A")
                        if answerList.index(i)==1:
                            r_answer_list.append("B")
                        if answerList.index(i)==2:
                            r_answer_list.append("C")
                        if answerList.index(i)==3:
                            r_answer_list.append("D")
                        if answerList.index(i)==4:
                            r_answer_list.append("E")
                        if answerList.index(i)==5:
                            r_answer_list.append("F")
                    return ",".join(r_answer_list)
                else:
                    pass
            else:
                pass

    def get_key(self,answerList):
    	'''
		返回多选题选项所有不重复的组合
		'''
        lena = len(answerList)
        sum = []
        for i in range(2 ** lena):
            comb = []

            for j in range(lena):
                if (i >> j) % 2 == 1:
                    comb.append(answerList[j])

            sum.append(comb)
        return sum




if __name__ == '__main__':

    sp = Spider()
    sp.get_content()



Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐