代码如下:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#2018/05/29

import re
import shutil
import xml.dom.minidom as xmldom
import os
import zipfile
import cabfile
import requests
import xlwt


def gzip_file():
    z = zipfile.ZipFile('D:\python\UpdateInfo.zip', 'r')
    z.extractall(path=r"D:\python")
    z.close()

def resolve_xml():
    # 通过minidom解析xml文件
    xmlfilepath = os.path.abspath("UpdateInfo.xml")

    # 得到文档对象
    domobj = xmldom.parse(xmlfilepath)
    # 得到元素对象
    elementobj = domobj.documentElement

    #获得子标签
    URL = elementobj.getElementsByTagName("URL")
    URL0 = URL[0].firstChild.data  #显示标签对之间的数据

    # 获得子标签
    Delta = elementobj.getElementsByTagName("Delta")
    URL1 = Delta[-1].getAttribute('URL')#显示URL标签的属性

    return URL0,URL1

def download_file(URL0,URL1):#返回index文件的路径
    URL = URL0 + URL1 #下载增量库的url
    f = requests.get(URL)
    with open(URL1, "wb") as code:
        code.write(f.content)#保存压缩包到当前目录
    cab = cabfile.CabFile(URL1)
    URL2 = os.path.splitext(URL1) #分离文件名和扩展名
    os.mkdir(URL2[0])
    cab.extract(URL2[0])
    cab.close()
    os.remove(URL1)
    shutil.move(URL2[0] + '\index.xml', 'D:\python')
    shutil.rmtree(URL2[0])

def get_QMExpVul_text():
    data = []
    f = open("QMExpVul.xml")
    for line in f:
        patchname = re.findall('..../../.*', line)
        data.extend(patchname)
    f.close()
    return data

def get_index_text():
    data = []
    xmlfilepath = os.path.abspath("index.xml")

    domobj = xmldom.parse(xmlfilepath)

    elementobj = domobj.documentElement

    FileLocation = elementobj.getElementsByTagName("FileLocation")
    for i in range(FileLocation.length):
        Url = FileLocation[i].getAttribute('Url')
        data.append(Url)
    return data

def export_data(QMExpVul,index):
    print '文件对比开始'
    for sub_url in QMExpVul:
        for i in range(len(index)):
            try:
                # print(index[i].find('windows10'))
                if index[i].find('windows10')==-1 or index[i].find(sub_url) != -1:
                    index.pop(i)
                    continue
            except:
                continue
    result = []
    for line in index:
        KB_number = re.findall('kb(\d*)',line)
        sort = re.findall('software/(.*)/2.*', line)
        system = re.findall('\d{4}/\d{2}/(.*)-kb', line)
        time = re.findall('\d{4}/..',line)
        MD5 = re.findall('_(.*).cab',line)
        detail_url = 'https://support.microsoft.com/en-us/help/'
        data = {'KB_number':KB_number,'分类':sort,'系统':system,'时间':time,'URL':line,'MD5':MD5,'描述链接':detail_url+KB_number[0]}
        result.append(data)
    return result

def create_excel(data):
    # 创建一个Workbook对象,这就相当于创建了一个Excel文件
    book = xlwt.Workbook(encoding='utf-8', style_compression=0)

    # 创建一个sheet对象,一个sheet对象对应Excel文件中的一张表格。
    sheet = book.add_sheet('result', cell_overwrite_ok=True)
    # 其中的test是这张表的名字,cell_overwrite_ok,表示是否可以覆盖单元格,其实是Worksheet实例化的一个参数,默认值是False
    # 向表test中添加数据
    sheet.write(0, 0, 'KB_number')  # 其中的'0-行, 0-列'指定表中的单元,'EnglishName'是向该单元写入的内容
    sheet.write(0, 1, '分类')
    sheet.write(0, 2, '系统')
    sheet.write(0, 3, '时间')
    sheet.write(0, 4, 'URL')
    sheet.write(0, 5, 'MD5')
    sheet.write(0, 6, '描述链接')

    row = 1
    for i in data:
        #向excel表中循环写入数据
        sheet.write(row, 0, i['KB_number'])  # 其中的'0-行, 0-列'指定表中的单元,'EnglishName'是向该单元写入的内容
        sheet.write(row, 1, i['分类'])
        sheet.write(row, 2, i['系统'])
        sheet.write(row, 3, i['时间'])
        sheet.write(row, 4, i['URL'])
        sheet.write(row, 5, i['MD5'])
        sheet.write(row, 6, i['描述链接'])
        row+=1

    # 最后,将以上操作保存到指定的Excel文件中
    book.save('result.xlsx')
    print '文件生成成功'

if __name__ == '__main__':
    if os.path.exists('index.xml'):
        os.remove('index.xml')
    if os.path.exists('wsusscn2_0eaa61f64d0e0ac33c3986b848ade4e1'):
        shutil.rmtree('wsusscn2_0eaa61f64d0e0ac33c3986b848ade4e1')
    if os.path.exists('result.xls'):
        os.remove('result.xls')
    gzip_file()
    URL0, URL1 = resolve_xml()
    print URL0 + URL1
    indexxml = download_file(URL0,URL1)
    print '获取增量库文件成功'
    QMExpVul = get_QMExpVul_text()
    index = get_index_text()
    data = export_data(QMExpVul,index)
    create_excel(data)

有需要借鉴的小伙伴可以看看

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐