我要学习

小游戏:找到最大的Python文件

扫描标准库目录

示例:my_PP4E/system/filetools/bigpy_dir.py

#!/usr/bin/env python
"""
找出单个目录下最大的Python源码文件。
搜索Python源代码库,除非指定了dir命令行参数。
"""

import os
import glob
import sys


def bigpyDir(dir):
    allpy = glob.glob(dir + os.sep + '*.py')
    # print(allpy)
    allsizes = []

    for filename in allpy:
        size = os.path.getsize(filename)
        allsizes.append((size, filename))

    allsizes.sort()  # 默认升序排列
    return allsizes  # 文件大小在文件元组中率先出现,排列时以它为依据


def test():
    if len(sys.argv) == 1:
        dir = '/home/alone/anaconda3/lib/python3.7'
    else:
        dir = sys.argv[1]
    sortedfile = bigpyDir(dir)
    print(sortedfile[:3])
    print(sortedfile[-3:])


if __name__ == '__main__':
    test()

输出:bigpy_dir.py

[(64, '/home/alone/anaconda3/lib/python3.7/__phello__.foo.py'), (129, '/home/alone/anaconda3/lib/python3.7/contextvars.py'), (257, '/home/alone/anaconda3/lib/python3.7/struct.py')]
[(117120, '/home/alone/anaconda3/lib/python3.7/inspect.py'), (143602, '/home/alone/anaconda3/lib/python3.7/turtle.py'), (228535, '/home/alone/anaconda3/lib/python3.7/_pydecimal.py')]

扫描标准库目录树

示例:bigpy_tree.py

#!/usr/bin/env python
"""
扫描整个目录树中最大的Python源代码文件。
搜索Python源代码库,利用pprint漂亮地显示结果。
"""

import os
import pprint
import sys

TRACE = False


def getLibPath():
    if sys.platform[:3] == 'win':
        return 'C:\\Python\\Lib'
    else:
        return '/home/alone/anaconda3/lib'  # 需手动修改


def getFileList(rootdir):
    pyfiles = []
    for curdir, subdir, curfile in os.walk(rootdir):
        if TRACE:
            print(curdir)
        for eachfile in curfile:
            if eachfile.endswith('.py'):
                if TRACE:
                    print(eachfile)
                path = os.path.join(curdir, eachfile)
                size = os.path.getsize(path)
                pyfiles.append((size, path))
    return pyfiles


def bigpyTree(rootdir):
    pyfiles = getFileList(rootdir)
    pyfiles.sort()

    return pyfiles


def test():
    libpath = getLibPath()
    allfiles = bigpyTree(libpath)
    pprint.pprint(allfiles[:3])
    pprint.pprint(allfiles[-3:])


if __name__ == '__main__':
    test()

输出:bigpy_tree.py(linux)

[(0, '/home/alone/anaconda3/lib/python3.7/email/mime/__init__.py'),
 (0,
  '/home/alone/anaconda3/lib/python3.7/lib2to3/tests/data/fixers/myfixes/__init__.py'),
 (0, '/home/alone/anaconda3/lib/python3.7/pydoc_data/__init__.py')]
[(857010,
  '/home/alone/anaconda3/lib/python3.7/site-packages/astropy/_erfa/core.py'),
 (886617,
  '/home/alone/anaconda3/lib/python3.7/site-packages/sympy/integrals/rubi/rules/trinomial_products.py'),
 (1121413,
  '/home/alone/anaconda3/lib/python3.7/site-packages/sympy/integrals/rubi/rules/sine.py')]

扫描模块搜索路径

示例:bigpy_path.py

#!/usr/bin/env python
"""
找出模块导入搜索路径下最大的Python源码文件。
跳过已经访问过的目录,统一路径和大小写的格式以便使之正确匹配。
并在pprint打印结果中添加文件行数。
"""

import sys
import os
import pprint


def isWin():
    if sys.platform[:3] == 'win':
        return True
    else:
        False


def normpath(path):
    path = os.path.normpath(path)
    if isWin():
        return path, os.path.normcase(path)
    return path, None


def getPySizeList(curdir, filenames, trace=0):
    allsizes = []
    for eachfile in filenames:
        if eachfile.endswith('.py'):
            if trace > 1:
                print('\t' + eachfile)
            eachfile = os.path.join(curdir, eachfile)
            try:
                size = os.path.getsize(eachfile)
            except os.error:
                print('skipping', eachfile, sys.exc_info()[0])
            else:
                numlines = len(list(open(eachfile, 'rb')))
                allsizes.append((size, numlines, eachfile))
    return allsizes


def getAllSizes(trace=0):
    visited = []
    allsizes = []

    for libdir in sys.path:
        for curdir, subdir, files in os.walk(libdir):
            if trace > 0:
                print(curdir)
            curdir, dircase = normpath(curdir)
            if not isWin():
                dircase = curdir
            if dircase in visited:
                continue
            else:
                visited.append(dircase)
            allsizes.extend(getPySizeList(curdir, files, trace=trace))
    return allsizes


def showResult(allsizes):
    print('By size...')
    allsizes.sort()
    pprint.pprint(allsizes[:3])
    pprint.pprint(allsizes[-3:])

    print('By lines...')
    allsizes.sort(key=lambda x: x[1])
    pprint.pprint(allsizes[:3])
    pprint.pprint(allsizes[-3:])


def bigpyPath(trace=0, show=True):  # trace:1代表目录,2代表加上文件
    allsizes = getAllSizes(trace=trace)
    if show:
        showResult(allsizes)


if __name__ == '__main__':
    bigpyPath()

输出:bigpy_path.py(Linux)

By size...
[(0, 0, '/home/alone/anaconda3/lib/python3.7/email/mime/__init__.py'),
 (0,
  0,
  '/home/alone/anaconda3/lib/python3.7/lib2to3/tests/data/fixers/myfixes/__init__.py'),
 (0, 0, '/home/alone/anaconda3/lib/python3.7/pydoc_data/__init__.py')]
[(857010,
  22630,
  '/home/alone/anaconda3/lib/python3.7/site-packages/astropy/_erfa/core.py'),
 (886617,
  4071,
  '/home/alone/anaconda3/lib/python3.7/site-packages/sympy/integrals/rubi/rules/trinomial_products.py'),
 (1121413,
  5171,
  '/home/alone/anaconda3/lib/python3.7/site-packages/sympy/integrals/rubi/rules/sine.py')]
By lines...
[(0, 0, '/home/alone/anaconda3/lib/python3.7/email/mime/__init__.py'),
 (0,
  0,
  '/home/alone/anaconda3/lib/python3.7/lib2to3/tests/data/fixers/myfixes/__init__.py'),
 (0, 0, '/home/alone/anaconda3/lib/python3.7/pydoc_data/__init__.py')]
[(660933, 13441, '/home/alone/anaconda3/lib/python3.7/pydoc_data/topics.py'),
 (538989,
  13497,
  '/home/alone/anaconda3/lib/python3.7/site-packages/Cython/Compiler/ExprNodes.py'),
 (857010,
  22630,
  '/home/alone/anaconda3/lib/python3.7/site-packages/astropy/_erfa/core.py')]

扫描整台机器

示例:bigext_tree.py

#!/usr/bin/env python
"""
找到任意目录树里所有给定类型的文件里最大的那个。
避免重复路径,捕获错误,添加追踪和行数大小。
同样使用集合、文件迭代器和生成器以避免装在整个文件。
并试图绕过不可解码的目录/文件名称的打印。
"""

import os
import pprint
import sys


def tryprint(text):
    try:
        print(text)
    except UnicodeEncodeError:
        print(text.encode)


def getFromCommand():
    returnVar = [os.curdir, '.py', 1]
    if len(sys.argv) > 1:
        returnVar[0] = sys.argv[1]
        if len(sys.argv) > 2:
            returnVar[1] = sys.argv[2]
            if len(sys.argv) > 3:
                returnVar[2] = int(sys.argv[3])
    return returnVar


def getFileList(curdir, files, extname, allsizes, trace):
    for file in files:
        if file.endswith(extname):
            if trace > 1:
                tryprint('\t' + file)
            fullpath = os.path.join(curdir, file)

            try:
                size = os.path.getsize(fullpath)
                numlines = sum(+1 for line in open(fullpath, 'rb'))
            except Exception:
                print('error', sys.exc_info[0])
            else:
                allsizes.append((size, numlines, fullpath))


def getAllSizes(dirname, extname, trace):
    visited = set()
    allsizes = []

    for curdir, subdir, files in os.walk(dirname):
        if trace:
            tryprint(curdir)
        curdir = os.path.normpath(curdir)

        if sys.platform[:3] == 'win':
            fixname = os.path.normcase(curdir)
        else:
            fixname = curdir

        if fixname in visited:
            if trace:
                tryprint('skipping ' + curdir)
        else:
            visited.add(curdir)

            getFileList(curdir, files, extname, allsizes, trace)

    allsizes.sort()
    return allsizes


def showResult(allsizes):
    for title, key in (('bytes', 0), ('lines', 1)):
        print('\nBy {}...'.format(title))
        allsizes.sort(key=lambda x: x[key])
        pprint.pprint(allsizes[:3])
        pprint.pprint(allsizes[-3:])


def bigextTree(
    command=True, show=True, dirname=os.curdir, extname='.py', trace=1
):
    if command:
        [dirname, extname, trace] = getFromCommand()

    allsizes = getAllSizes(dirname, extname, trace)
    if show:
        showResult(allsizes)


if __name__ == '__main__':
    bigextTree()

输出:bigext_tree.py

python-programming---markdown-notes$ ./my_PP4E/system/filetools/bigext_tree.py . .py 0

By bytes...
[(0, 0, 'PP4E/Examples/PP4E/System/Media/__init__.py'),
 (7, 1, 'PP4E/Examples/PP4E/Dstruct/Basic/__init__.py'),
 (12, 3, 'PP4E/Examples/PP4E/Ai/ExpertSystem/__init__.py')]
[(52953, 1089, 'PP4E/Examples/PP4E/Gui/TextEditor/textEditor.py'),
 (52953,
  1089,
  'PP4E/changes/detailed-diffs/1.2/patched-files-12/textEditor.py'),
 (52953,
  1089,
  'PP4E/changes/detailed-diffs/1.3/patched-files-13/prior '
  '-release/patched-files-12/textEditor.py')]

By lines...
[(0, 0, 'PP4E/Examples/PP4E/System/Media/__init__.py'),
 (7, 1, 'PP4E/Examples/PP4E/Dstruct/Basic/__init__.py'),
 (17, 1, 'PP4E/Examples/PP4E/Dstruct/Classics/__init__.py')]
[(52953, 1089, 'PP4E/Examples/PP4E/Gui/TextEditor/textEditor.py'),
 (52953,
  1089,
  'PP4E/changes/detailed-diffs/1.2/patched-files-12/textEditor.py'),
 (52953,
  1089,
  'PP4E/changes/detailed-diffs/1.3/patched-files-13/prior '
  '-release/patched-files-12/textEditor.py')]
python-programming---markdown-notes$ ./my_PP4E/system/filetools/bigext_tree.py ./my_PP4E/system/ .py 1
./my_PP4E/system/
./my_PP4E/system/filetools
./my_PP4E/system/__pycache__

By bytes...
[(28, 1, 'my_PP4E/system/hello_out.py'),
 (29, 4, 'my_PP4E/system/test_argv.py'),
 (53, 2, 'my_PP4E/system/test_tty.py')]
[(2069, 82, 'my_PP4E/system/filetools/bigpy_path.py'),
 (2421, 94, 'my_PP4E/system/filetools/bigext_tree.py'),
 (2835, 93, 'my_PP4E/system/multi_3.py')]

By lines...
[(28, 1, 'my_PP4E/system/hello_out.py'),
 (53, 2, 'my_PP4E/system/test_tty.py'),
 (54, 2, 'my_PP4E/system/writer.py')]
[(2069, 82, 'my_PP4E/system/filetools/bigpy_path.py'),
 (2835, 93, 'my_PP4E/system/multi_3.py'),
 (2421, 94, 'my_PP4E/system/filetools/bigext_tree.py')]
python-programming---markdown-notes$ ./my_PP4E/system/filetools/bigext_tree.py . .txt 0

By bytes...
[(4, 2, 'PP4E/Examples/PP4E/System/Streams/input.txt'),
 (5, 2, 'my_PP4E/system/input.txt'),
 (6, 1, 'PP4E/Examples/PP4E/Gui/ShellGui/eggs.txt')]
[(360381,
  4693,
  'PP4E/Examples/PP4E/Internet/Email/PyMailGui/media/i18n-filenames-tests/Mail-saved-after-sent--OpenMeInGUI.txt'),
 (360381,
  4693,
  'PP4E/changes/detailed-diffs/1.3/patched-files-13/i18n-filenames-tests/Mail-saved-after-sent--OpenMeInGUI.txt'),
 (3316452,
  43205,
  'PP4E/Examples/PP4E/Internet/Email/PyMailGui/SavedMail/savemany-3E.txt')]

By lines...
[(6, 1, 'PP4E/Examples/PP4E/Gui/ShellGui/eggs.txt'),
 (6, 1, 'PP4E/Examples/PP4E/Gui/ShellGui/new-eggs.txt'),
 (6, 1, 'PP4E/Examples/PP4E/Gui/ShellGui/temp/eggs.txt')]
[(360381,
  4693,
  'PP4E/Examples/PP4E/Internet/Email/PyMailGui/media/i18n-filenames-tests/Mail-saved-after-sent--OpenMeInGUI.txt'),
 (360381,
  4693,
  'PP4E/changes/detailed-diffs/1.3/patched-files-13/i18n-filenames-tests/Mail-saved-after-sent--OpenMeInGUI.txt'),
 (3316452,
  43205,
  'PP4E/Examples/PP4E/Internet/Email/PyMailGui/SavedMail/savemany-3E.txt')]

———————————————————————————————————————————

😃 学完博客后,是不是有所启发呢?如果对此还有疑问,欢迎在评论区留言哦。
如果还想了解更多的信息,欢迎大佬们关注我哦,也可以查看我的个人博客网站BeacherHou

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐