python数据存储
csv以逗号分隔,但是我发现我写的文件里出现多列跟逗号没关系,就算去除了逗号也还是一样。#!/usr/bin/env python# -*- coding:utf8 -*-# @TIME:2019/5/18 13:39# @Author:17976# @File:piplines.py# @Description:import reimport pymo...
·
csv以逗号分隔,但是我发现我写的文件里出现多列跟逗号没关系,就算去除了逗号也还是一样。
#!/usr/bin/env python
# -*- coding:utf8 -*-
# @TIME :2019/5/18 13:39
# @Author :17976
# @File :piplines.py
# @Description:
import re
import pymongo
from pymongo.errors import DuplicateKeyError
import settings
class csv_write():
def __init__(self,file_name='wenshu.csv'):
self.file_name = file_name
def write_item(self,item):
with open(self.file_name, 'a+', encoding='utf-8-sig') as f:
for value in item.values():
if type(value) == dict:
for val in value.values():
replacedStr = str(val).replace(",",' ')
f.write(f"{replacedStr}" + ',')
else:
replacedStr = str(value).replace(",",' ')
f.write(f"{replacedStr}" + ',')
f.write('\n')
class mongoStore():
def __init__(self,table_name):
host = settings.mongo_msg["MONGODB_HOST"]
port = int(settings.mongo_msg["MONGODB_PORT"])
dbname = settings.mongo_msg["MONGODB_DBNAME"]
table = table_name
#创建数据库连接
self.client = pymongo.MongoClient(host=host, port=port)
# 指定数据库
mydb = self.client[dbname]
# # 设置文书ID为唯一索引,避免插入重复数据
mydb[table].ensure_index('casedocid', unique=True)
self.post = mydb[table]
def process_item(self, item):
try:
data = dict(item)
self.post.insert(data)
return item
except DuplicateKeyError:
# 索引相同,即为重复数据,捕获错误
print('Duplicate key error collection')
return item
def close_client(self):
self.client.close()
这是一篇csv文件分割的脚本,非常棒,但是跟我想要解决的问题无关,以后或许用的着:
https://www.cnblogs.com/weiqi/p/6230464.html
更多推荐
已为社区贡献3条内容
所有评论(0)