Cookie带时间戳
#!/usr/bin/env python# -*- encoding: utf-8 -*-# Created on 2018-08-28 10:33:19# Project: HMfrom pyspider.libs.base_handler import *import jsonfrom time import timedef get_headers():retur...
·
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2018-08-28 10:33:19
# Project: HM
from pyspider.libs.base_handler import *
import json
from time import time
def get_headers():
return {
'cookie': 'JSESSIONID=66667F8BCFC324DBCA1BFE9637215CD7; Hm_lvt_fa008fd70d63f84cf4de09ddf81382e5=1535422445; Hm_lvt_0f88184729be2e2f69bdb90a5394f472=1535422480; Hm_lpvt_0f88184729be2e2f69bdb90a5394f472=1535423181; SERVERID=3483c5b2c5245f493d16b039e72201ea|1535423203|1535422444; Hm_lpvt_fa008fd70d63f84cf4de09ddf81382e5={}'.format(int(time())),
'content-type': 'application/json;charset=UTF-8'
}
def_data = {"serialNumber": "2018082810194169445266050"}
class Handler(BaseHandler):
crawl_config = {
'retries': 10
}
@every(minutes=24 * 60)
def on_start(self):
# data = def_data
data = json.dumps(def_data)
self.crawl('https://mall.ihaima.com/api//product/car/carSeriesDetail', callback=self.index_page,headers=get_headers(),
validate_cert=False, method='POST', data=data)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
print response.text
for each in response.doc('a[href^="http"]').items():
self.crawl(each.attr.href, callback=self.detail_page)
@config(priority=2)
def detail_page(self, response):
return {
"url": response.url,
"title": response.doc('title').text(),
}
更多推荐
已为社区贡献1条内容
所有评论(0)