一个对帐的小脚本
前一段时间一直在做各种对数据的事情,因为我们的业务涉及到银联钱包,并且我们的客户认为其给的某些数据有问题,所以我们写了这样一个小脚本来验证,经验证数据没有问题,不过给过来的数据需要进行一些差错处理,需要仔细一点。废话不多说,直接上程序#!/usr/bin/env python# encoding: utf-8import loggingimport pymysqlimport oscl
·
前一段时间一直在做各种对数据的事情,因为我们的业务涉及到银联钱包,并且我们的客户认为其给的某些数据有问题,所以我们写了这样一个小脚本来验证,经验证数据没有问题,不过给过来的数据需要进行一些差错处理,需要仔细一点。
废话不多说,直接上程序
#!/usr/bin/env python
# encoding: utf-8
import logging
import pymysql
import os
class compare(object):
def __init__(self):
self.file_path = "G:/codes/python/total"
self.db_conn = None
self.__cid_info = dict()
self.get_file_list = list()
self.pay_file_list = list()
def get_conn(self):
try:
self.db_conn = pymysql.connect(host = "127.0.0.1", port=3306,
user="root", passwd="205339", db="dbvip",charset='utf8')
except Exception,e:
logging.error("mysql get connection error [%s]"%e)
return False
return True
def get_filelist(self):
if not self.file_path: return -1
list = os.listdir(self.file_path)
list.sort()
for file_name in list:
if file_name[9:12] == 'get':
self.get_file_list.append(file_name)
else:
self.pay_file_list.append(file_name)
self.get_file_list.sort()
self.pay_file_list.sort()
logging.info("get_file_list %s"%self.get_file_list)
logging.info("pay_file_list %s"%self.pay_file_list)
return 0
def load_get_file(self):
for filename in self.get_file_list:
fname = self.file_path + '/' + filename
if not os.path.exists(fname):
logging.error("Fname %s not exists."%(fname))
return -1
if not os.access(fname, os.R_OK):
logging.error("Fname %s not readable."%(fname));
return -1
logging.info("Open file %s..."%fname)
fhandle = open(fname)
flist = fhandle.readlines()
fhandle.close()
count = 0
if filename == "20140324-get.txt":
for line in flist:
count += 1
if len(line) == 0:
continue
rec = line.split(',')
cid = rec[0]
addpoint = rec[1]
if not self.__cid_info.has_key(cid):
self.__cid_info[cid] = dict()
self.__cid_info[cid]['addpoint'] = 0
self.__cid_info[cid]['decpoint'] = 0
self.__cid_info[cid]['addpoint'] += float(addpoint)
if count % 1000 == 0:
logging.info("processing line %d"%count)
continue
elif filename[:6] < "201411":
for line in flist:
count += 1
if count == 1:
continue
if len(line) == 0:
continue
rec = line.split(' ')
cid = rec[1]
account = rec[2]
verder_id = rec[3]
date = rec[4]
addpoint = rec[13]
status = rec[14]
if account <> '' or status <> '成功':
logging.info("account %s,status %s line %d"%(account,status,count))
continue
if not self.__cid_info.has_key(cid):
self.__cid_info[cid] = dict()
self.__cid_info[cid]['addpoint'] = 0
self.__cid_info[cid]['decpoint'] = 0
self.__cid_info[cid]['addpoint'] += float(addpoint)
if count % 1000 == 0:
logging.info("processing line %d"%count)
else:
for line in flist:
count += 1
if len(line) == 0:
continue
rec = line.split(',')
cid = rec[0].strip('\"')
account = rec[1].strip('\"')
verder_id = rec[2].strip('\"')
date = rec[3].strip('\"')
if date >= "2015/07/01":
continue
addpoint = float(rec[10][1:8])*(10 ** int(rec[10][-3:]))
status = rec[11].strip('\"')[:4]
#if cid == 'c00055248975': print("addpoint[%s],date[%s]"%(addpoint,date))
if account <> '' or status <> '成功':
logging.info("account %s,status %s line %d"%(account,status,count))
continue
if not self.__cid_info.has_key(cid):
self.__cid_info[cid] = dict()
self.__cid_info[cid]['addpoint'] = 0
self.__cid_info[cid]['decpoint'] = 0
self.__cid_info[cid]['addpoint'] += float(addpoint)
if count % 1000 == 0:
logging.info("processing line %d"%count)
return 0
def load_pay_file(self):
for filename in self.pay_file_list:
fname = self.file_path + '/' + filename
if not os.path.exists(fname):
logging.error("Fname %s not exists."%(fname))
return -1
if not os.access(fname, os.R_OK):
logging.error("Fname %s not readable."%(fname));
return -1
logging.info("Open file %s..."%fname)
fhandle = open(fname)
flist = fhandle.readlines()
fhandle.close()
count = 0
if filename == "20140324-pay.txt":
for line in flist:
count += 1
if len(line) == 0:
continue
rec = line.split(',')
cid = rec[0]
decpoint = rec[1]
if not self.__cid_info.has_key(cid):
logging.info("the cid has not been send point %s,line %d"%(cid,count))
continue
self.__cid_info[cid]['decpoint'] += float(decpoint)
if count % 1000 == 0:
logging.info("processing line %d"%count)
elif filename[:6] < "201411":
for line in flist:
count += 1
if count == 1:
continue
if len(line) == 0:
continue
rec = line.split(' ')
cid = rec[1]
account = rec[2]
verder_id = rec[3]
date = rec[4]
decpoint = rec[13]
status = rec[14]
if account == '' or status <> '成功':
logging.info("account %s,status %s,line %d"%(account,status,count))
continue
if not self.__cid_info.has_key(cid):
logging.info("the cid has not been send point %s,line %d"%(cid,count))
continue
self.__cid_info[cid]['decpoint'] += float(decpoint)
if count % 1000 == 0:
logging.info("processing line %d"%count)
else:
for line in flist:
count += 1
if len(line) == 0:
continue
rec = line.split(',')
cid = rec[0].strip('\"')
account = rec[1].strip('\"')
verder_id = rec[2].strip('\"')
date = rec[3].strip('\"')
if date >= "2015/07/01":
continue
decpoint = float(rec[10][1:5])*(10 ** int(rec[10][-3:]))
status = rec[11].strip('\"')[:4]
if account == '' or status <> '成功':
logging.info("account %s,status %s line %d"%(account,status,count))
continue
if not self.__cid_info.has_key(cid):
logging.info("the cid has not been send point %s,line %d"%(cid,count))
continue
self.__cid_info[cid]['decpoint'] += float(decpoint)
if count % 1000 == 0:
logging.info("processing line %d"%count)
return 0
def commit_data(self):
count = 0
cur = self.db_conn.cursor()
for cid in self.__cid_info.keys():
count += 1
opt_sql = "insert into bm_cid_info(cid,add_point,dec_point) values('%s',%f,%f)"%(cid, round(self.__cid_info[cid]['addpoint'],2), round(self.__cid_info[cid]['decpoint'],2))
try:
cur.execute(opt_sql)
except Exception,e:
logging.error("mysql insert bm_cid_info error [%s]"%e)
cur.close()
return False
if count % 10000 == 0:
self.db_conn.commit()
logging.info("handled user %d"%count)
logging.info("handled total user %d"%count)
self.db_conn.commit()
cur.close()
return True
def run(self):
if self.get_conn() < 0: return -1
if self.get_filelist() < 0: return -1
if self.load_get_file() < 0: return -1
if self.load_pay_file() < 0: return -1
if self.commit_data() < 0: return -1
return 0
if __name__ == '__main__':
comp = compare()
logging.basicConfig( filename="G:\codes\logs\compare.log",
format = "%(asctime)s %(levelname)s[%(process)d]:%(message)s",
level = "INFO")
if comp.run() < 0:
logging.info("compare failed")
else:
logging.info("compare succeed")
一个简单的小程序,说一下流程,我们有两种文件需要获取,一种是get.txt,一种是pay.txt,这两种文件格式基本相同但是有些差别,所以写了两个函数来进行处理,中间因为有过文件格式的变动所以需要好多个分枝进行处理,处理完之后会用一个字典把得到的所有信息汇总起来,最后提交到数据库里去,只实现了这么一个小功能,不过现在有了个新的想法,因为上一次看过一个讲python闭包的小视频,等一会试一下看看能不能改写一下这个程序变得更简单一点,要是可以的话再来给大家分享。
更多推荐
已为社区贡献1条内容
所有评论(0)