python输出指定目录下所有文件(全路径),并核对md5码
最近在工作中发现需要经常对公司的脚本文件的md5值进行比对,因此写了一个脚本进行处理,下面是脚本的优化过程
版本一:
功能说明:使用os.walk()对指定目录进行浏览,然后对得出的三个参数进行组合,最后判断文件是否存在,然后输出:
def all_files(filepath):result = []for root,dirs,files in os.walk(filepath):if dirs:for i in dirs:for j in files:if os.path.isfile(os.path.join(root,i,j)):result.append(os.path.join(root,i,j))else:for overfile in files:if os.path.isfile(os.path.join(root,overfile)):result.append(os.path.join(root,overfile))result.sort()return resultfor sourcefile in all_files(sys.argv[1]): if not sourcefile.endswith('.md5'):subprocess.call(['md5sum',sourcefile])else:subprocess.call(['cat',sourcefile])
版本二:
查阅《python cookbook》后发现由于对os.walk()功能的不熟悉,版本一中有很多多余的循环和判断,参考后,优化如下:
def all_files(checkpath):result = []for path,subdirs,files in os.walk(checkpath):files.extend(subdirs)files.sort()for name in files:result.append(os.path.join(path,name))return resultfor sourcefile in all_files(sys.argv[1]):if not sourcefile.endswith('.md5'):subprocess.call(['md5sum',sourcefile])else:subprocess.call(['cat',sourcefile])
版本三:
增加对多个目录进行比对,只是对脚本后半部分增加对列表的处理:
for pathlist in sys.argv[1:]:for sourcefile in all_files(pathlist):if not sourcefile.endswith('.md5'):subprocess.call(['md5sum',sourcefile])else:subprocess.call(['cat',sourcefile])
版本四:
最终脚本,增加帮助功能:
#! /usr/bin/python"""The script use to check md5sum for the ufsd files."""
__version__ = "$CheckMd5 = 2.0$"import sys
import os
import subprocess
from optparse import OptionParser#print 'Check path',sys.argv[1:]def all_files(checkpath):result = []for path,subdirs,files in os.walk(checkpath):files.extend(subdirs)files.sort()for name in files:result.append(os.path.join(path,name))return resultdef cleckmd5():for pathlist in sys.argv[1:]: for sourcefile in all_files(pathlist):if not sourcefile.endswith('.md5'):subprocess.call(['md5sum',sourcefile])else:subprocess.call(['cat',sourcefile])def execute_from_command_line(argv = None):if argv is None:argv = sys.argvusage = "usage: %prog [options] arg"parser = OptionParser(usage)parser.add_option("-f","--file",dest = "filename",help = "read data from Filename")parser.add_option("-v","--version",action = "store_true",dest = "version")parser.add_option("-q","--quiet",action = "store_false",dest = "verbose")(options,args) = parser.parse_args()if options.filename: cleckmd5()if options.version:print __version__if options.verbose:print "reading %s..." %options.filenamesif __name__ == '__main__':execute_from_command_line(sys.argv)
直接查询文件md5sum命令
hashlib.md5(open(r'D:\My Documents\Downloads\sol-11_2-text-sparc.iso', 'rb').read()).hexdigest()