#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2018-08-28 10:33:19
# Project: HM

from pyspider.libs.base_handler import *
import json
from time import time
def get_headers():
    return {
    'cookie': 'JSESSIONID=66667F8BCFC324DBCA1BFE9637215CD7; Hm_lvt_fa008fd70d63f84cf4de09ddf81382e5=1535422445; Hm_lvt_0f88184729be2e2f69bdb90a5394f472=1535422480; Hm_lpvt_0f88184729be2e2f69bdb90a5394f472=1535423181; SERVERID=3483c5b2c5245f493d16b039e72201ea|1535423203|1535422444; Hm_lpvt_fa008fd70d63f84cf4de09ddf81382e5={}'.format(int(time())),
    'content-type': 'application/json;charset=UTF-8'
}
def_data = {"serialNumber": "2018082810194169445266050"}


class Handler(BaseHandler):
    crawl_config = {
        'retries': 10
    }

    @every(minutes=24 * 60)
    def on_start(self):
        # data = def_data
        data = json.dumps(def_data)
        self.crawl('https://mall.ihaima.com/api//product/car/carSeriesDetail', callback=self.index_page,headers=get_headers(),
                   validate_cert=False, method='POST', data=data)

    @config(age=10 * 24 * 60 * 60)
    def index_page(self, response):
        print response.text
        for each in response.doc('a[href^="http"]').items():
            self.crawl(each.attr.href, callback=self.detail_page)

    @config(priority=2)
    def detail_page(self, response):
        return {
            "url": response.url,
            "title": response.doc('title').text(),
        }

网站:http://mall.ihaima.com/carlist

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐