前一段时间一直在做各种对数据的事情,因为我们的业务涉及到银联钱包,并且我们的客户认为其给的某些数据有问题,所以我们写了这样一个小脚本来验证,经验证数据没有问题,不过给过来的数据需要进行一些差错处理,需要仔细一点。
废话不多说,直接上程序

#!/usr/bin/env python
# encoding: utf-8
import logging
import pymysql
import os


class compare(object):

    def __init__(self):
        self.file_path = "G:/codes/python/total"
        self.db_conn = None
        self.__cid_info = dict()
        self.get_file_list = list()
        self.pay_file_list = list()

    def get_conn(self):
        try:
            self.db_conn = pymysql.connect(host = "127.0.0.1", port=3306,
                                user="root", passwd="205339", db="dbvip",charset='utf8')
        except Exception,e:
            logging.error("mysql get connection error [%s]"%e)
            return False
        return True

    def get_filelist(self):
        if not self.file_path: return -1
        list = os.listdir(self.file_path)
        list.sort()
        for file_name in list:
            if file_name[9:12] == 'get':
                self.get_file_list.append(file_name)
            else:
                self.pay_file_list.append(file_name)
        self.get_file_list.sort()
        self.pay_file_list.sort()
        logging.info("get_file_list %s"%self.get_file_list)
        logging.info("pay_file_list %s"%self.pay_file_list)
        return 0

    def load_get_file(self):
        for filename in self.get_file_list:
            fname = self.file_path + '/' + filename
            if not os.path.exists(fname):
                logging.error("Fname %s not exists."%(fname))
                return -1
            if not os.access(fname, os.R_OK):
                logging.error("Fname %s not readable."%(fname));
                return -1
            logging.info("Open file %s..."%fname)
            fhandle = open(fname)
            flist   = fhandle.readlines()
            fhandle.close()
            count = 0
            if filename == "20140324-get.txt":
                for line in flist:
                    count += 1
                    if len(line) == 0:
                        continue
                    rec        = line.split(',')
                    cid        = rec[0]
                    addpoint   = rec[1]
                    if not self.__cid_info.has_key(cid):
                        self.__cid_info[cid] = dict()
                        self.__cid_info[cid]['addpoint'] = 0
                        self.__cid_info[cid]['decpoint'] = 0
                    self.__cid_info[cid]['addpoint'] += float(addpoint)
                    if count % 1000 == 0:
                        logging.info("processing line %d"%count)
                continue
            elif filename[:6] < "201411":
                for line in flist:
                    count += 1
                    if count == 1:
                        continue
                    if len(line) == 0:
                        continue
                    rec        = line.split(' ')
                    cid        = rec[1]
                    account    = rec[2]
                    verder_id  = rec[3]
                    date       = rec[4]
                    addpoint   = rec[13]
                    status     = rec[14]
                    if account <> '' or status <> '成功':
                        logging.info("account %s,status %s line %d"%(account,status,count))
                        continue
                    if not self.__cid_info.has_key(cid):
                        self.__cid_info[cid] = dict()
                        self.__cid_info[cid]['addpoint'] = 0
                        self.__cid_info[cid]['decpoint'] = 0
                    self.__cid_info[cid]['addpoint'] += float(addpoint)
                    if count % 1000 == 0:
                        logging.info("processing line %d"%count)
            else:
                for line in flist:
                    count += 1
                    if len(line) == 0:
                        continue
                    rec        = line.split(',')
                    cid        = rec[0].strip('\"')
                    account    = rec[1].strip('\"')
                    verder_id  = rec[2].strip('\"')
                    date       = rec[3].strip('\"')
                    if date >= "2015/07/01":
                        continue
                    addpoint   = float(rec[10][1:8])*(10 ** int(rec[10][-3:]))
                    status     = rec[11].strip('\"')[:4]
                    #if cid == 'c00055248975': print("addpoint[%s],date[%s]"%(addpoint,date))
                    if account <> '' or status <> '成功':
                        logging.info("account %s,status %s line %d"%(account,status,count))
                        continue
                    if not self.__cid_info.has_key(cid):
                        self.__cid_info[cid] = dict()
                        self.__cid_info[cid]['addpoint'] = 0
                        self.__cid_info[cid]['decpoint'] = 0
                    self.__cid_info[cid]['addpoint'] += float(addpoint)
                    if count % 1000 == 0:
                        logging.info("processing line %d"%count)
        return 0

    def load_pay_file(self):
        for filename in self.pay_file_list:
            fname = self.file_path + '/' + filename
            if not os.path.exists(fname):
                logging.error("Fname %s not exists."%(fname))
                return -1
            if not os.access(fname, os.R_OK):
                logging.error("Fname %s not readable."%(fname));
                return -1
            logging.info("Open file %s..."%fname)
            fhandle = open(fname)
            flist   = fhandle.readlines()
            fhandle.close()
            count = 0
            if filename == "20140324-pay.txt":
                for line in flist:
                    count += 1
                    if len(line) == 0:
                        continue
                    rec        = line.split(',')
                    cid        = rec[0]
                    decpoint   = rec[1]
                    if not self.__cid_info.has_key(cid):
                        logging.info("the cid has not been send point %s,line %d"%(cid,count))
                        continue
                    self.__cid_info[cid]['decpoint'] += float(decpoint)
                    if count % 1000 == 0:
                        logging.info("processing line %d"%count)
            elif filename[:6] < "201411":
                for line in flist:
                    count += 1
                    if count == 1:
                        continue
                    if len(line) == 0:
                        continue
                    rec        = line.split(' ')
                    cid        = rec[1]
                    account    = rec[2]
                    verder_id  = rec[3]
                    date       = rec[4]
                    decpoint   = rec[13]
                    status     = rec[14]
                    if account == '' or status <> '成功':
                        logging.info("account %s,status %s,line %d"%(account,status,count))
                        continue
                    if not self.__cid_info.has_key(cid):
                        logging.info("the cid has not been send point %s,line %d"%(cid,count))
                        continue
                    self.__cid_info[cid]['decpoint'] += float(decpoint)
                    if count % 1000 == 0:
                        logging.info("processing line %d"%count)
            else:
                for line in flist:
                    count += 1
                    if len(line) == 0:
                        continue
                    rec        = line.split(',')
                    cid        = rec[0].strip('\"')
                    account    = rec[1].strip('\"')
                    verder_id  = rec[2].strip('\"')
                    date       = rec[3].strip('\"')
                    if date >= "2015/07/01":
                        continue
                    decpoint   = float(rec[10][1:5])*(10 ** int(rec[10][-3:]))
                    status     = rec[11].strip('\"')[:4]
                    if account == '' or status <> '成功':
                        logging.info("account %s,status %s line %d"%(account,status,count))
                        continue
                    if not self.__cid_info.has_key(cid):
                        logging.info("the cid has not been send point %s,line %d"%(cid,count))
                        continue
                    self.__cid_info[cid]['decpoint'] += float(decpoint)
                    if count % 1000 == 0:
                        logging.info("processing line %d"%count)
        return 0

    def commit_data(self):
        count = 0
        cur = self.db_conn.cursor()
        for cid in self.__cid_info.keys():
            count += 1
            opt_sql = "insert into bm_cid_info(cid,add_point,dec_point) values('%s',%f,%f)"%(cid, round(self.__cid_info[cid]['addpoint'],2), round(self.__cid_info[cid]['decpoint'],2))
            try:
                cur.execute(opt_sql)
            except Exception,e:
                logging.error("mysql insert bm_cid_info error [%s]"%e)
                cur.close()
                return False
            if count % 10000 == 0:
                self.db_conn.commit()
                logging.info("handled user %d"%count)
        logging.info("handled total user %d"%count)
        self.db_conn.commit()
        cur.close()
        return True

    def run(self):
        if self.get_conn() < 0: return -1
        if self.get_filelist() < 0: return -1
        if self.load_get_file() < 0: return -1
        if self.load_pay_file() < 0: return -1
        if self.commit_data() < 0: return -1
        return 0

if __name__ == '__main__':
    comp = compare()
    logging.basicConfig( filename="G:\codes\logs\compare.log",
                         format  = "%(asctime)s %(levelname)s[%(process)d]:%(message)s",
                         level   = "INFO")
    if comp.run() < 0:
        logging.info("compare failed")
    else:
        logging.info("compare succeed")

一个简单的小程序,说一下流程,我们有两种文件需要获取,一种是get.txt,一种是pay.txt,这两种文件格式基本相同但是有些差别,所以写了两个函数来进行处理,中间因为有过文件格式的变动所以需要好多个分枝进行处理,处理完之后会用一个字典把得到的所有信息汇总起来,最后提交到数据库里去,只实现了这么一个小功能,不过现在有了个新的想法,因为上一次看过一个讲python闭包的小视频,等一会试一下看看能不能改写一下这个程序变得更简单一点,要是可以的话再来给大家分享。

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐