Python——文件处理

lingedeng

4598人浏览 · 2011-12-27 17:41:31

lingedeng · 2011-12-27 17:41:31 发布

#!/usr/bin/env python
# -*- coding:utf-8 -*-

import sys, os, zipfile, tempfile, tarfile, fnmatch;

def read_file(fileName):
    '''
    read file and print file line count
    '''
    f = open(fileName, 'r')
    try:
        line_list = f.readlines()
        #read file each line without '\n'
        #line_list = f.read().splitlines()
        #line_list = f.read().split('\n')
        #line_list = [L.rstrip('\n') for L in f.readlines()]
        print len(line_list)
    finally:
        f.close()

def read_file_by_chunks(fileName, chunksize=100):
    file_object = open(fileName, 'rb')
    while True:
        chunk = file_object.read(chunksize)
        if not chunk:
            break;
        yield chunk
    file_object.close()

def search_replace_text_infile(stext='', rtext='', input_file=sys.stdin, output_file=sys.stdout):
    if isinstance(input_file, basestring):
        ifile = open(input_file, 'r')
    else:
        ifile = input_file
    
    if isinstance(output_file, basestring):
        ofile = open(output_file, 'w')
    else:
        ofile = output_file
    
    for s in ifile:
        ofile.write(s.replace(stext, rtext))
    
    if isinstance(ifile, basestring):
        ifile.close()
    if isinstance(ofile, basestring):
        ofile.close()
    
def getline(filename, desired_line_number):
    if desired_line_number < 1:
        return ''
    
    for current_line, line in enumerate(open(filename, 'rU')):
        if desired_line_number-1 == current_line:
            return line.rstrip('\n')

    return ''

def linecount_w(filename):
    sys_cmd = 'wc -l ' + filename
    return int(os.popen(sys_cmd).read().split()[0])
    
def linecount_1(filename):
    return len(open(filename, 'r').readlines())
    
def linecount_2(filename):
    linecount = -1
    for linecount, line in enumerate(open(filename, 'r')):
        pass
    return linecount+1

def linecount_3(filename):
    linecount = 0
    f = open(filename, 'r')
    while True:
        chunk = f.read(65535)
        if not chunk:
            break
        linecount += chunk.count('\n')
    return linecount

def words_of_file_in_firstline(filename, line_to_words=str.split):
    firstline = getline(filename, 1)
    for word in line_to_words(firstline):
        yield word
    

def read_from_zip_file(filename):
    zipf = zipfile.ZipFile(filename, 'r')
    for fname in zipf.namelist():
        bytes = zipf.read(fname)
        print 'File %s in %s has %d bytes' % (fname, filename, len(bytes))
    zipf.close()

def just_for_fun():
    #create a temp file with zip suffix
    handle, tmpFileName = tempfile.mkstemp('.zip')
    os.close(handle)
    
    #open temp zip file
    tmpZfile = zipfile.ZipFile(tmpFileName, 'w')
    #write something to temp zip file
    #paremeter 1:the name of the file in temp file
    #parameter 2:The file contents is the string 'bytes'
    tmpZfile.writestr('hello.py', 'def f(): \
                                        return "hello world from " + __file__\n')
    tmpZfile.close()
    #set python module search path
    #put temp zip file into first one
    sys.path.insert(0, tmpFileName)
    #import 'hello' module
    import hello
    #call hello module's f function
    print hello.f()
    os.unlink(tmpFileName)
    
try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO
class ZipString(zipfile.ZipFile):
    '''
    read zip file content from a string
    '''
    def __init__(self, dataString):
        zipfile.ZipFile.__init__(self, StringIO(dataString))

def make_tar(source_folder, dest_folder, compression='bz2'):
    if compression:
        file_ext = '.' + compression
        compressionPara = ':' + compression
    else:
        file_ext = ''
        compressionPara = ''
    
    arcname = os.path.basename(source_folder)
    dest_file = "%s.tar%s" % (arcname, file_ext)
    dest_path = os.path.join(dest_folder, dest_file)
    
    out = tarfile.TarFile.open(dest_path, 'w'+compressionPara)
    out.add(source_folder, arcname)
    out.close()
    return dest_path

CHUNK_SIZE = 16 * 1024
def adapte_file(fileObj):
    '''
    use file-like object to adapte a real file object
    '''
    if isinstance(fileObj, file):
        return fileObj
    
    tmpFileObj = tempfile.TemporaryFile()
    while True:
        data = fileObj.read(CHUNK_SIZE)
        if not data:
            break
        tmpFileObj.write(data)
    
    fileObj.close()
    tmpFileObj.seek(0)
    return tmpFileObj

def all_files(rootPath, patterns='*', single_level=False):
    patterns = patterns.split(';')
    for path, dirs, files in os.walk(rootPath):
        files.sort()
        for file in files:
            for pattern in patterns:
                #test whether filename matchs pattern
                if fnmatch.fnmatch(file, pattern):
                    yield os.path.join(path, file)
                    break
        if single_level:
            break

def swapextensions(rootPath, before, after):
    if before[:1] != '.':
        before = '.' + before
    extLen = -len(before)
    if after[:1] != '.':
        after = '.' + after
    
    swapCount = 0
    for path, dirs, files in os.walk(rootPath):
        for file in files:
            if file[extLen:] == before:
                oldfile = os.path.join(path, file)
                newfile = oldfile[:extLen] + after
                os.rename(oldfile, newfile)
                swapCount += 1
    return swapCount

def search_file(filename, searchPath, pathsep=os.pathsep):
    for path in searchPath.split(pathsep):
        candidate = os.path.join(path, filename)
        if os.path.isfile(candidate):
            return os.path.abspath(candidate)

def addPythonSearchPath(newPath):
    '''
    add new path to module search path of python
    return:
    1  -  success
    0  -  added path already on sys.path
    -1 -  added path doesn't exist
    '''
    if not os.path.exists(newPath):
        return -1
    newPath = os.path.abspath(newPath)
    
    if sys.platform == 'win32':
        newPath = newPath.lower()
    for searchPath in sys.path:
        searchPath = os.path.abspath(searchPath)
        if sys.platform == 'win32':
            searchPath = searchPath.lower()
        else:
            if newPath in (searchPath, searchPath + os.sep):
                return 0
    
    sys.path.append(newPath)
    return 1

if __name__ == '__main__':
    read_file('apache_log')
    
    totalbytes = 0
    for chunk in read_file_by_chunks('twittericon.png'):
        totalbytes += len(chunk)
    print 'twittericon.png file size is %d bytes' % totalbytes
    
    #search_replace_text_infile('who', '***')
    
    desired_line_number = 1
    print 'The %d line in "apache_log" is "%s"' % (desired_line_number, getline('apache_log', desired_line_number))
    
    import linecache
    #also relate clearcache, checkcache
    print 'The %d line in "sample.txt" is "%s"' % (desired_line_number, linecache.getline('sample.txt', desired_line_number).rstrip('\n'))
    
    for f in linecount_w, linecount_1, linecount_2, linecount_3:
        print f.__name__, f('apache_log')
        
    wordlist = []
    for word in words_of_file_in_firstline('apache_log'):
        #revert each word
        wordlist.append(word[::-1])
    print ' '.join(wordlist)
    
    read_from_zip_file('Archive.zip')
    
    just_for_fun()
    
    #print make_tar('./OCP', '.')
    
    for path in all_files('./OCP', '*.py'):
        print path
        
    path = './OCP'
    before = 'html'
    after = 'htm'
    print 'the count of swap extensions from %s to %s at path "%s" is %d' % (before, after, path, swapextensions(path, before, after))
    
    searchpath = '/bin' + os.pathsep + '/usr/bin'
    find_file = search_file('ls', searchpath)
    if find_file:
        print 'File "ls" found at %s.' % find_file
    else:
        print 'File "ls" not found.'
    
    print 'origin search path:'
    for x in sys.path:
        print x
    print "add new path to module search path of python, result:%d" % addPythonSearchPath('./OCP')
    print 'New search path:'
    for x in sys.path:
        print x

CSDN学习社区

CSDN联合极客时间，共同打造面向开发者的精品内容学习社区，助力成长！

更多推荐

cover

Kaldi之父，IEEE Fellow，小米首席语音科学家Daniel Povey将出席2024全球机器学习技术大会并发表演讲！

CSDN学习社区

cover

探索神经网络在商品销售和图像识别中的应用

CSDN学习社区

cover

基于stm32F103的座面声控台灯

CSDN学习社区

所有评论(0)

查看更多评论

lingedeng

已为社区贡献1条内容