用BeautifulSoup处理html文件

#!/usr/bin/env python
# coding=utf-8
import urllib2
import urllib
from bs4 import BeautifulSoup 
import re
def getContent(url):
    content = urllib2.urlopen(url).read()
    soup=BeautifulSoup(content)
    global siteUrls
    siteUrls = soup.findAll('li',attrs={'class':'span3'})
    for i in siteUrls:
        file=i.findAll('img')   
        for t in file:
            id=t.get('data-id')
            name=t.get('data-src')
            imgpath='H:\python_learn\photo/%s.jpg' % id
            urllib.urlretrieve(name,imgpath)      
for i in xrange(1,7):
    url='http://www.dbmeizi.com/?p=%s' % i
    getContent(url)
Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐