Python——文件处理
#!/usr/bin/env python# -*- coding:utf-8 -*-import sys, os, zipfile, tempfile, tarfile, fnmatch;def read_file(fileName):'''read file and print file line count'''f = open(fileNa
·
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import sys, os, zipfile, tempfile, tarfile, fnmatch;
def read_file(fileName):
'''
read file and print file line count
'''
f = open(fileName, 'r')
try:
line_list = f.readlines()
#read file each line without '\n'
#line_list = f.read().splitlines()
#line_list = f.read().split('\n')
#line_list = [L.rstrip('\n') for L in f.readlines()]
print len(line_list)
finally:
f.close()
def read_file_by_chunks(fileName, chunksize=100):
file_object = open(fileName, 'rb')
while True:
chunk = file_object.read(chunksize)
if not chunk:
break;
yield chunk
file_object.close()
def search_replace_text_infile(stext='', rtext='', input_file=sys.stdin, output_file=sys.stdout):
if isinstance(input_file, basestring):
ifile = open(input_file, 'r')
else:
ifile = input_file
if isinstance(output_file, basestring):
ofile = open(output_file, 'w')
else:
ofile = output_file
for s in ifile:
ofile.write(s.replace(stext, rtext))
if isinstance(ifile, basestring):
ifile.close()
if isinstance(ofile, basestring):
ofile.close()
def getline(filename, desired_line_number):
if desired_line_number < 1:
return ''
for current_line, line in enumerate(open(filename, 'rU')):
if desired_line_number-1 == current_line:
return line.rstrip('\n')
return ''
def linecount_w(filename):
sys_cmd = 'wc -l ' + filename
return int(os.popen(sys_cmd).read().split()[0])
def linecount_1(filename):
return len(open(filename, 'r').readlines())
def linecount_2(filename):
linecount = -1
for linecount, line in enumerate(open(filename, 'r')):
pass
return linecount+1
def linecount_3(filename):
linecount = 0
f = open(filename, 'r')
while True:
chunk = f.read(65535)
if not chunk:
break
linecount += chunk.count('\n')
return linecount
def words_of_file_in_firstline(filename, line_to_words=str.split):
firstline = getline(filename, 1)
for word in line_to_words(firstline):
yield word
def read_from_zip_file(filename):
zipf = zipfile.ZipFile(filename, 'r')
for fname in zipf.namelist():
bytes = zipf.read(fname)
print 'File %s in %s has %d bytes' % (fname, filename, len(bytes))
zipf.close()
def just_for_fun():
#create a temp file with zip suffix
handle, tmpFileName = tempfile.mkstemp('.zip')
os.close(handle)
#open temp zip file
tmpZfile = zipfile.ZipFile(tmpFileName, 'w')
#write something to temp zip file
#paremeter 1:the name of the file in temp file
#parameter 2:The file contents is the string 'bytes'
tmpZfile.writestr('hello.py', 'def f(): \
return "hello world from " + __file__\n')
tmpZfile.close()
#set python module search path
#put temp zip file into first one
sys.path.insert(0, tmpFileName)
#import 'hello' module
import hello
#call hello module's f function
print hello.f()
os.unlink(tmpFileName)
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
class ZipString(zipfile.ZipFile):
'''
read zip file content from a string
'''
def __init__(self, dataString):
zipfile.ZipFile.__init__(self, StringIO(dataString))
def make_tar(source_folder, dest_folder, compression='bz2'):
if compression:
file_ext = '.' + compression
compressionPara = ':' + compression
else:
file_ext = ''
compressionPara = ''
arcname = os.path.basename(source_folder)
dest_file = "%s.tar%s" % (arcname, file_ext)
dest_path = os.path.join(dest_folder, dest_file)
out = tarfile.TarFile.open(dest_path, 'w'+compressionPara)
out.add(source_folder, arcname)
out.close()
return dest_path
CHUNK_SIZE = 16 * 1024
def adapte_file(fileObj):
'''
use file-like object to adapte a real file object
'''
if isinstance(fileObj, file):
return fileObj
tmpFileObj = tempfile.TemporaryFile()
while True:
data = fileObj.read(CHUNK_SIZE)
if not data:
break
tmpFileObj.write(data)
fileObj.close()
tmpFileObj.seek(0)
return tmpFileObj
def all_files(rootPath, patterns='*', single_level=False):
patterns = patterns.split(';')
for path, dirs, files in os.walk(rootPath):
files.sort()
for file in files:
for pattern in patterns:
#test whether filename matchs pattern
if fnmatch.fnmatch(file, pattern):
yield os.path.join(path, file)
break
if single_level:
break
def swapextensions(rootPath, before, after):
if before[:1] != '.':
before = '.' + before
extLen = -len(before)
if after[:1] != '.':
after = '.' + after
swapCount = 0
for path, dirs, files in os.walk(rootPath):
for file in files:
if file[extLen:] == before:
oldfile = os.path.join(path, file)
newfile = oldfile[:extLen] + after
os.rename(oldfile, newfile)
swapCount += 1
return swapCount
def search_file(filename, searchPath, pathsep=os.pathsep):
for path in searchPath.split(pathsep):
candidate = os.path.join(path, filename)
if os.path.isfile(candidate):
return os.path.abspath(candidate)
def addPythonSearchPath(newPath):
'''
add new path to module search path of python
return:
1 - success
0 - added path already on sys.path
-1 - added path doesn't exist
'''
if not os.path.exists(newPath):
return -1
newPath = os.path.abspath(newPath)
if sys.platform == 'win32':
newPath = newPath.lower()
for searchPath in sys.path:
searchPath = os.path.abspath(searchPath)
if sys.platform == 'win32':
searchPath = searchPath.lower()
else:
if newPath in (searchPath, searchPath + os.sep):
return 0
sys.path.append(newPath)
return 1
if __name__ == '__main__':
read_file('apache_log')
totalbytes = 0
for chunk in read_file_by_chunks('twittericon.png'):
totalbytes += len(chunk)
print 'twittericon.png file size is %d bytes' % totalbytes
#search_replace_text_infile('who', '***')
desired_line_number = 1
print 'The %d line in "apache_log" is "%s"' % (desired_line_number, getline('apache_log', desired_line_number))
import linecache
#also relate clearcache, checkcache
print 'The %d line in "sample.txt" is "%s"' % (desired_line_number, linecache.getline('sample.txt', desired_line_number).rstrip('\n'))
for f in linecount_w, linecount_1, linecount_2, linecount_3:
print f.__name__, f('apache_log')
wordlist = []
for word in words_of_file_in_firstline('apache_log'):
#revert each word
wordlist.append(word[::-1])
print ' '.join(wordlist)
read_from_zip_file('Archive.zip')
just_for_fun()
#print make_tar('./OCP', '.')
for path in all_files('./OCP', '*.py'):
print path
path = './OCP'
before = 'html'
after = 'htm'
print 'the count of swap extensions from %s to %s at path "%s" is %d' % (before, after, path, swapextensions(path, before, after))
searchpath = '/bin' + os.pathsep + '/usr/bin'
find_file = search_file('ls', searchpath)
if find_file:
print 'File "ls" found at %s.' % find_file
else:
print 'File "ls" not found.'
print 'origin search path:'
for x in sys.path:
print x
print "add new path to module search path of python, result:%d" % addPythonSearchPath('./OCP')
print 'New search path:'
for x in sys.path:
print x
更多推荐
已为社区贡献1条内容
所有评论(0)