来玩个游戏吧!10秒内找到最大的Python文件>_(Python编程 | 系统编程 | 完整的系统程序 | 找到最大的Python文件)
文章目录小游戏:找到最大的*Python*文件扫描标准库目录扫描标准库目录树扫描模块搜索路径扫描整台机器小游戏:找到最大的Python文件扫描标准库目录示例:my_PP4E/system/filetools/bigpy_dir.py#!/usr/bin/env python"""找出单个目录下最大的Python源码文件。搜索Python源代码库,除非指定了dir命令行参数。"""import os
·
小游戏:找到最大的Python文件
扫描标准库目录
示例:my_PP4E/system/filetools/bigpy_dir.py
#!/usr/bin/env python
"""
找出单个目录下最大的Python源码文件。
搜索Python源代码库,除非指定了dir命令行参数。
"""
import os
import glob
import sys
def bigpyDir(dir):
allpy = glob.glob(dir + os.sep + '*.py')
# print(allpy)
allsizes = []
for filename in allpy:
size = os.path.getsize(filename)
allsizes.append((size, filename))
allsizes.sort() # 默认升序排列
return allsizes # 文件大小在文件元组中率先出现,排列时以它为依据
def test():
if len(sys.argv) == 1:
dir = '/home/alone/anaconda3/lib/python3.7'
else:
dir = sys.argv[1]
sortedfile = bigpyDir(dir)
print(sortedfile[:3])
print(sortedfile[-3:])
if __name__ == '__main__':
test()
输出:bigpy_dir.py
[(64, '/home/alone/anaconda3/lib/python3.7/__phello__.foo.py'), (129, '/home/alone/anaconda3/lib/python3.7/contextvars.py'), (257, '/home/alone/anaconda3/lib/python3.7/struct.py')]
[(117120, '/home/alone/anaconda3/lib/python3.7/inspect.py'), (143602, '/home/alone/anaconda3/lib/python3.7/turtle.py'), (228535, '/home/alone/anaconda3/lib/python3.7/_pydecimal.py')]
扫描标准库目录树
示例:bigpy_tree.py
#!/usr/bin/env python
"""
扫描整个目录树中最大的Python源代码文件。
搜索Python源代码库,利用pprint漂亮地显示结果。
"""
import os
import pprint
import sys
TRACE = False
def getLibPath():
if sys.platform[:3] == 'win':
return 'C:\\Python\\Lib'
else:
return '/home/alone/anaconda3/lib' # 需手动修改
def getFileList(rootdir):
pyfiles = []
for curdir, subdir, curfile in os.walk(rootdir):
if TRACE:
print(curdir)
for eachfile in curfile:
if eachfile.endswith('.py'):
if TRACE:
print(eachfile)
path = os.path.join(curdir, eachfile)
size = os.path.getsize(path)
pyfiles.append((size, path))
return pyfiles
def bigpyTree(rootdir):
pyfiles = getFileList(rootdir)
pyfiles.sort()
return pyfiles
def test():
libpath = getLibPath()
allfiles = bigpyTree(libpath)
pprint.pprint(allfiles[:3])
pprint.pprint(allfiles[-3:])
if __name__ == '__main__':
test()
输出:bigpy_tree.py(linux)
[(0, '/home/alone/anaconda3/lib/python3.7/email/mime/__init__.py'),
(0,
'/home/alone/anaconda3/lib/python3.7/lib2to3/tests/data/fixers/myfixes/__init__.py'),
(0, '/home/alone/anaconda3/lib/python3.7/pydoc_data/__init__.py')]
[(857010,
'/home/alone/anaconda3/lib/python3.7/site-packages/astropy/_erfa/core.py'),
(886617,
'/home/alone/anaconda3/lib/python3.7/site-packages/sympy/integrals/rubi/rules/trinomial_products.py'),
(1121413,
'/home/alone/anaconda3/lib/python3.7/site-packages/sympy/integrals/rubi/rules/sine.py')]
扫描模块搜索路径
示例:bigpy_path.py
#!/usr/bin/env python
"""
找出模块导入搜索路径下最大的Python源码文件。
跳过已经访问过的目录,统一路径和大小写的格式以便使之正确匹配。
并在pprint打印结果中添加文件行数。
"""
import sys
import os
import pprint
def isWin():
if sys.platform[:3] == 'win':
return True
else:
False
def normpath(path):
path = os.path.normpath(path)
if isWin():
return path, os.path.normcase(path)
return path, None
def getPySizeList(curdir, filenames, trace=0):
allsizes = []
for eachfile in filenames:
if eachfile.endswith('.py'):
if trace > 1:
print('\t' + eachfile)
eachfile = os.path.join(curdir, eachfile)
try:
size = os.path.getsize(eachfile)
except os.error:
print('skipping', eachfile, sys.exc_info()[0])
else:
numlines = len(list(open(eachfile, 'rb')))
allsizes.append((size, numlines, eachfile))
return allsizes
def getAllSizes(trace=0):
visited = []
allsizes = []
for libdir in sys.path:
for curdir, subdir, files in os.walk(libdir):
if trace > 0:
print(curdir)
curdir, dircase = normpath(curdir)
if not isWin():
dircase = curdir
if dircase in visited:
continue
else:
visited.append(dircase)
allsizes.extend(getPySizeList(curdir, files, trace=trace))
return allsizes
def showResult(allsizes):
print('By size...')
allsizes.sort()
pprint.pprint(allsizes[:3])
pprint.pprint(allsizes[-3:])
print('By lines...')
allsizes.sort(key=lambda x: x[1])
pprint.pprint(allsizes[:3])
pprint.pprint(allsizes[-3:])
def bigpyPath(trace=0, show=True): # trace:1代表目录,2代表加上文件
allsizes = getAllSizes(trace=trace)
if show:
showResult(allsizes)
if __name__ == '__main__':
bigpyPath()
输出:bigpy_path.py(Linux)
By size...
[(0, 0, '/home/alone/anaconda3/lib/python3.7/email/mime/__init__.py'),
(0,
0,
'/home/alone/anaconda3/lib/python3.7/lib2to3/tests/data/fixers/myfixes/__init__.py'),
(0, 0, '/home/alone/anaconda3/lib/python3.7/pydoc_data/__init__.py')]
[(857010,
22630,
'/home/alone/anaconda3/lib/python3.7/site-packages/astropy/_erfa/core.py'),
(886617,
4071,
'/home/alone/anaconda3/lib/python3.7/site-packages/sympy/integrals/rubi/rules/trinomial_products.py'),
(1121413,
5171,
'/home/alone/anaconda3/lib/python3.7/site-packages/sympy/integrals/rubi/rules/sine.py')]
By lines...
[(0, 0, '/home/alone/anaconda3/lib/python3.7/email/mime/__init__.py'),
(0,
0,
'/home/alone/anaconda3/lib/python3.7/lib2to3/tests/data/fixers/myfixes/__init__.py'),
(0, 0, '/home/alone/anaconda3/lib/python3.7/pydoc_data/__init__.py')]
[(660933, 13441, '/home/alone/anaconda3/lib/python3.7/pydoc_data/topics.py'),
(538989,
13497,
'/home/alone/anaconda3/lib/python3.7/site-packages/Cython/Compiler/ExprNodes.py'),
(857010,
22630,
'/home/alone/anaconda3/lib/python3.7/site-packages/astropy/_erfa/core.py')]
扫描整台机器
示例:bigext_tree.py
#!/usr/bin/env python
"""
找到任意目录树里所有给定类型的文件里最大的那个。
避免重复路径,捕获错误,添加追踪和行数大小。
同样使用集合、文件迭代器和生成器以避免装在整个文件。
并试图绕过不可解码的目录/文件名称的打印。
"""
import os
import pprint
import sys
def tryprint(text):
try:
print(text)
except UnicodeEncodeError:
print(text.encode)
def getFromCommand():
returnVar = [os.curdir, '.py', 1]
if len(sys.argv) > 1:
returnVar[0] = sys.argv[1]
if len(sys.argv) > 2:
returnVar[1] = sys.argv[2]
if len(sys.argv) > 3:
returnVar[2] = int(sys.argv[3])
return returnVar
def getFileList(curdir, files, extname, allsizes, trace):
for file in files:
if file.endswith(extname):
if trace > 1:
tryprint('\t' + file)
fullpath = os.path.join(curdir, file)
try:
size = os.path.getsize(fullpath)
numlines = sum(+1 for line in open(fullpath, 'rb'))
except Exception:
print('error', sys.exc_info[0])
else:
allsizes.append((size, numlines, fullpath))
def getAllSizes(dirname, extname, trace):
visited = set()
allsizes = []
for curdir, subdir, files in os.walk(dirname):
if trace:
tryprint(curdir)
curdir = os.path.normpath(curdir)
if sys.platform[:3] == 'win':
fixname = os.path.normcase(curdir)
else:
fixname = curdir
if fixname in visited:
if trace:
tryprint('skipping ' + curdir)
else:
visited.add(curdir)
getFileList(curdir, files, extname, allsizes, trace)
allsizes.sort()
return allsizes
def showResult(allsizes):
for title, key in (('bytes', 0), ('lines', 1)):
print('\nBy {}...'.format(title))
allsizes.sort(key=lambda x: x[key])
pprint.pprint(allsizes[:3])
pprint.pprint(allsizes[-3:])
def bigextTree(
command=True, show=True, dirname=os.curdir, extname='.py', trace=1
):
if command:
[dirname, extname, trace] = getFromCommand()
allsizes = getAllSizes(dirname, extname, trace)
if show:
showResult(allsizes)
if __name__ == '__main__':
bigextTree()
输出:bigext_tree.py
python-programming---markdown-notes$ ./my_PP4E/system/filetools/bigext_tree.py . .py 0
By bytes...
[(0, 0, 'PP4E/Examples/PP4E/System/Media/__init__.py'),
(7, 1, 'PP4E/Examples/PP4E/Dstruct/Basic/__init__.py'),
(12, 3, 'PP4E/Examples/PP4E/Ai/ExpertSystem/__init__.py')]
[(52953, 1089, 'PP4E/Examples/PP4E/Gui/TextEditor/textEditor.py'),
(52953,
1089,
'PP4E/changes/detailed-diffs/1.2/patched-files-12/textEditor.py'),
(52953,
1089,
'PP4E/changes/detailed-diffs/1.3/patched-files-13/prior '
'-release/patched-files-12/textEditor.py')]
By lines...
[(0, 0, 'PP4E/Examples/PP4E/System/Media/__init__.py'),
(7, 1, 'PP4E/Examples/PP4E/Dstruct/Basic/__init__.py'),
(17, 1, 'PP4E/Examples/PP4E/Dstruct/Classics/__init__.py')]
[(52953, 1089, 'PP4E/Examples/PP4E/Gui/TextEditor/textEditor.py'),
(52953,
1089,
'PP4E/changes/detailed-diffs/1.2/patched-files-12/textEditor.py'),
(52953,
1089,
'PP4E/changes/detailed-diffs/1.3/patched-files-13/prior '
'-release/patched-files-12/textEditor.py')]
python-programming---markdown-notes$ ./my_PP4E/system/filetools/bigext_tree.py ./my_PP4E/system/ .py 1
./my_PP4E/system/
./my_PP4E/system/filetools
./my_PP4E/system/__pycache__
By bytes...
[(28, 1, 'my_PP4E/system/hello_out.py'),
(29, 4, 'my_PP4E/system/test_argv.py'),
(53, 2, 'my_PP4E/system/test_tty.py')]
[(2069, 82, 'my_PP4E/system/filetools/bigpy_path.py'),
(2421, 94, 'my_PP4E/system/filetools/bigext_tree.py'),
(2835, 93, 'my_PP4E/system/multi_3.py')]
By lines...
[(28, 1, 'my_PP4E/system/hello_out.py'),
(53, 2, 'my_PP4E/system/test_tty.py'),
(54, 2, 'my_PP4E/system/writer.py')]
[(2069, 82, 'my_PP4E/system/filetools/bigpy_path.py'),
(2835, 93, 'my_PP4E/system/multi_3.py'),
(2421, 94, 'my_PP4E/system/filetools/bigext_tree.py')]
python-programming---markdown-notes$ ./my_PP4E/system/filetools/bigext_tree.py . .txt 0
By bytes...
[(4, 2, 'PP4E/Examples/PP4E/System/Streams/input.txt'),
(5, 2, 'my_PP4E/system/input.txt'),
(6, 1, 'PP4E/Examples/PP4E/Gui/ShellGui/eggs.txt')]
[(360381,
4693,
'PP4E/Examples/PP4E/Internet/Email/PyMailGui/media/i18n-filenames-tests/Mail-saved-after-sent--OpenMeInGUI.txt'),
(360381,
4693,
'PP4E/changes/detailed-diffs/1.3/patched-files-13/i18n-filenames-tests/Mail-saved-after-sent--OpenMeInGUI.txt'),
(3316452,
43205,
'PP4E/Examples/PP4E/Internet/Email/PyMailGui/SavedMail/savemany-3E.txt')]
By lines...
[(6, 1, 'PP4E/Examples/PP4E/Gui/ShellGui/eggs.txt'),
(6, 1, 'PP4E/Examples/PP4E/Gui/ShellGui/new-eggs.txt'),
(6, 1, 'PP4E/Examples/PP4E/Gui/ShellGui/temp/eggs.txt')]
[(360381,
4693,
'PP4E/Examples/PP4E/Internet/Email/PyMailGui/media/i18n-filenames-tests/Mail-saved-after-sent--OpenMeInGUI.txt'),
(360381,
4693,
'PP4E/changes/detailed-diffs/1.3/patched-files-13/i18n-filenames-tests/Mail-saved-after-sent--OpenMeInGUI.txt'),
(3316452,
43205,
'PP4E/Examples/PP4E/Internet/Email/PyMailGui/SavedMail/savemany-3E.txt')]
———————————————————————————————————————————
😃 学完博客后,是不是有所启发呢?如果对此还有疑问,欢迎在评论区留言哦。
如果还想了解更多的信息,欢迎大佬们关注我哦,也可以查看我的个人博客网站BeacherHou。
更多推荐
已为社区贡献6条内容
所有评论(0)