Python 的json文件,合并检测解决方案:

此程序的主要功能是:将你今天爬取的json文件与昨天的做 “并集”

#! /bin/python
import imp
import requests
import json
import sys

def readfile(filepath):    
	f = open(filepath, 'r')    
	jdb = json.load(f)    
	f.close()
	#json数组的元素个数    
	count = len(jdb['domains'])
	return jdb,count


def writefile(filepath,jsontext):    
	#‘w’ 把每次先把文件清空再写入
	fdatabase = open(filepath, 'w')
	fdatabase.write(json.dumps(jsontext,indent=4,ensure_ascii=False))
	fdatabase.close()


if __name__=='__main__':

    if sys.version_info < (3, 0):        
    imp.reload(sys)        
    sys.setdefaultencoding('utf-8')
    
    #刚爬取到的文件的路径    
    cur_dir = sys.path[0]    
    work_dir = cur_dir + '/../db/chinaz-com/'    
    chinazfilepath = work_dir+'20200717141142.json'
    fchinaz = open(chinazfilepath, 'r')    
    jchinaz = json.load(fchinaz)    
    fchinaz.close()
    
    #数据库路径    
    cur_dir = sys.path[0]    
    work_dir = cur_dir + '/../db/'    
    dbfilepath = work_dir+'20200709103906.json'
    jdb,count=readfile(dbfilepath)
    
    if len(jdb['domains']) == 0:  #文件数据为空时
    
    	for domainchinaz in jchinaz['domains']:    
		jdb["domains"].append({"id":domainchinaz['id'],"domain":domainchinaz['domain'],"name":domainchinaz['name'],"description":domainchinaz['description']})    
    else:   #昨天的文件不为空
            #for-if-for下面是逻辑思想
        for domainchinaz in jchinaz['domains']:            
            i=1            
            for domaindb in jdb['domains']:                
                if domainchinaz['domain'] == domaindb['domain']:
                	break                
                elif (domainchinaz['domain'] != domaindb['domain']) and(i==len(jdb['domains'])):
                        jdb["domains"].append({"id":len(jdb['domains'])+1,"domain":domainchinaz['domain'],"name":domainchinaz['name'],"description":domainchinaz['description']})                    
                         break                
       		else:                   
            	     i+=1
    #合并后的文件路径    
    cur_dir = sys.path[0]    
    work_dir = cur_dir + '/../db/'    
    newfilepath = work_dir+'20200709103906.json'
    writefile(newfilepath,jdb)
    print('ok')
Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐