#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2018-09-10 13:39:22
# Project: qunaer

from pyspider.libs.base_handler import *


class Handler(BaseHandler):
    crawl_config = {
    }

    @every(minutes=24 * 60)
    def on_start(self):
        self.crawl('http://travel.qunar.com/travelbook/list.htm', callback=self.index_page)

    @config(age=10 * 24 * 60 * 60)
    def index_page(self, response):
        for each in response.doc('li > .tit > a').items():
            self.crawl(each.attr.href, callback=self.detail_page, fetch_type="js")
        next = response.doc(".next").attr.href
        self.crawl(next, callback=self.index_page)

    @config(priority=2)
    def detail_page(self, response):
        costing = response.doc("li.f_item.howmuch > p > span.data").text() + "元"
        title = response.doc("#booktitle").text()
        days = response.doc(" li.f_item.howlong > p > span.data").text() + "天"
        departure_date = response.doc(" li.f_item.when > p > span.data").text()
        cover_img = response.doc(".cover_img").attr.src
        text = response.doc(".imglst").text()
        return {
            "cover_img"
            "url": response.url,
            "标题": title,
            "人均费用": costing,
            "天数": days,
            "出发日期": departure_date,
            "封面": cover_img,
            "正文": text

        }

 

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐