python实现txt文件和XML文件内容对比
代码如下:#!/usr/bin/env python# -*- coding: utf-8 -*-#2018/05/29import reimport shutilimport xml.dom.minidom as xmldomimport osimport zipfileimport cabfileimport requestsimport xlwtdef gzip_...
·
代码如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#2018/05/29
import re
import shutil
import xml.dom.minidom as xmldom
import os
import zipfile
import cabfile
import requests
import xlwt
def gzip_file():
z = zipfile.ZipFile('D:\python\UpdateInfo.zip', 'r')
z.extractall(path=r"D:\python")
z.close()
def resolve_xml():
# 通过minidom解析xml文件
xmlfilepath = os.path.abspath("UpdateInfo.xml")
# 得到文档对象
domobj = xmldom.parse(xmlfilepath)
# 得到元素对象
elementobj = domobj.documentElement
#获得子标签
URL = elementobj.getElementsByTagName("URL")
URL0 = URL[0].firstChild.data #显示标签对之间的数据
# 获得子标签
Delta = elementobj.getElementsByTagName("Delta")
URL1 = Delta[-1].getAttribute('URL')#显示URL标签的属性
return URL0,URL1
def download_file(URL0,URL1):#返回index文件的路径
URL = URL0 + URL1 #下载增量库的url
f = requests.get(URL)
with open(URL1, "wb") as code:
code.write(f.content)#保存压缩包到当前目录
cab = cabfile.CabFile(URL1)
URL2 = os.path.splitext(URL1) #分离文件名和扩展名
os.mkdir(URL2[0])
cab.extract(URL2[0])
cab.close()
os.remove(URL1)
shutil.move(URL2[0] + '\index.xml', 'D:\python')
shutil.rmtree(URL2[0])
def get_QMExpVul_text():
data = []
f = open("QMExpVul.xml")
for line in f:
patchname = re.findall('..../../.*', line)
data.extend(patchname)
f.close()
return data
def get_index_text():
data = []
xmlfilepath = os.path.abspath("index.xml")
domobj = xmldom.parse(xmlfilepath)
elementobj = domobj.documentElement
FileLocation = elementobj.getElementsByTagName("FileLocation")
for i in range(FileLocation.length):
Url = FileLocation[i].getAttribute('Url')
data.append(Url)
return data
def export_data(QMExpVul,index):
print '文件对比开始'
for sub_url in QMExpVul:
for i in range(len(index)):
try:
# print(index[i].find('windows10'))
if index[i].find('windows10')==-1 or index[i].find(sub_url) != -1:
index.pop(i)
continue
except:
continue
result = []
for line in index:
KB_number = re.findall('kb(\d*)',line)
sort = re.findall('software/(.*)/2.*', line)
system = re.findall('\d{4}/\d{2}/(.*)-kb', line)
time = re.findall('\d{4}/..',line)
MD5 = re.findall('_(.*).cab',line)
detail_url = 'https://support.microsoft.com/en-us/help/'
data = {'KB_number':KB_number,'分类':sort,'系统':system,'时间':time,'URL':line,'MD5':MD5,'描述链接':detail_url+KB_number[0]}
result.append(data)
return result
def create_excel(data):
# 创建一个Workbook对象,这就相当于创建了一个Excel文件
book = xlwt.Workbook(encoding='utf-8', style_compression=0)
# 创建一个sheet对象,一个sheet对象对应Excel文件中的一张表格。
sheet = book.add_sheet('result', cell_overwrite_ok=True)
# 其中的test是这张表的名字,cell_overwrite_ok,表示是否可以覆盖单元格,其实是Worksheet实例化的一个参数,默认值是False
# 向表test中添加数据
sheet.write(0, 0, 'KB_number') # 其中的'0-行, 0-列'指定表中的单元,'EnglishName'是向该单元写入的内容
sheet.write(0, 1, '分类')
sheet.write(0, 2, '系统')
sheet.write(0, 3, '时间')
sheet.write(0, 4, 'URL')
sheet.write(0, 5, 'MD5')
sheet.write(0, 6, '描述链接')
row = 1
for i in data:
#向excel表中循环写入数据
sheet.write(row, 0, i['KB_number']) # 其中的'0-行, 0-列'指定表中的单元,'EnglishName'是向该单元写入的内容
sheet.write(row, 1, i['分类'])
sheet.write(row, 2, i['系统'])
sheet.write(row, 3, i['时间'])
sheet.write(row, 4, i['URL'])
sheet.write(row, 5, i['MD5'])
sheet.write(row, 6, i['描述链接'])
row+=1
# 最后,将以上操作保存到指定的Excel文件中
book.save('result.xlsx')
print '文件生成成功'
if __name__ == '__main__':
if os.path.exists('index.xml'):
os.remove('index.xml')
if os.path.exists('wsusscn2_0eaa61f64d0e0ac33c3986b848ade4e1'):
shutil.rmtree('wsusscn2_0eaa61f64d0e0ac33c3986b848ade4e1')
if os.path.exists('result.xls'):
os.remove('result.xls')
gzip_file()
URL0, URL1 = resolve_xml()
print URL0 + URL1
indexxml = download_file(URL0,URL1)
print '获取增量库文件成功'
QMExpVul = get_QMExpVul_text()
index = get_index_text()
data = export_data(QMExpVul,index)
create_excel(data)
有需要借鉴的小伙伴可以看看
更多推荐
已为社区贡献6条内容
所有评论(0)