我是靠谱客的博主 醉熏红酒,最近开发中收集的这篇文章主要介绍Python3批量转换文本文件编码,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述



#-*- coding: utf-8 -*-
try :
     from chardet.universaldetector import UniversalDetector
     IsAuto = True
except  ImportError:
     IsAuto = False
import os
import os.path
import glob
 
def Convert_Auto( filename,out_enc = "utf-8" ): 
     ''' Re-encode text file with auto detec current encode. Need chardet Lib.
Input Parameter:
         filename: full path and file name, e.g. c:dir1file.txt
         out_enc: new encode. Default as 'utf-8'
Output Parameter
         None'''
     try
         f = open (filename, 'rb' )
         b = b ' '
         b + = f.read( 1024 )
         u = UniversalDetector()
         u.reset()
         u.feed(b)
         u.close()
         f.seek( 0 )
         b = f.read()
         f.close()
         in_enc = u.result[ 'encoding' ]
         new_content = b.decode(in_enc, 'ignore' )
         f = open (filename, 'w' , encoding = out_enc)
         f.write(new_content)
         f.close()
         print ( "Success: " + filename + " converted from " + in_enc + " to " + out_enc + " !" )
     except IOError:
         print ( "Error: " + filename + " FAIL to converted from " + in_enc + " to " + out_enc + " !" )
 
def Convert_Manu( filename,in_enc = 'gbk' , out_enc = "utf-8" ): 
     ''' Re-encode text file with manual decide input text encode.
Input Parameter:
         filename: full path and file name, e.g. c:dir1file.txt
         in_enc:  current encode. Default as 'gbk'
         out_enc: new encode. Default as 'utf-8'
Output Parameter
         None'''
     try
         print ( "convert " + filename)
         f = open (filename, 'rb' )
         b = f.read()
         f.close()
         new_content = b.decode(in_enc, 'ignore' )
         f = open (filename, 'w' , encoding = out_enc)
         f.write(new_content)
         f.close()
         print ( "Success: " + filename + " converted from " + in_enc + " to " + out_enc + " !" )
     except IOError:
         print ( "Error: " + filename + " FAIL to converted from " + in_enc + " to " + out_enc + " !" )
 
 
def explore( dir , IsLoopSubDIR = True ):
     '''Convert files encoding.
     Input: 
         dir         : Current folder
         IsLoopSubDIR:   True -- Include files in sub folder
                         False-- Only include files in current folder
     Output:
         NONE
     '''
     if IsLoopSubDIR:
         flist = getSubFileList( dir , '.txt' )
     else :
         flist = getCurrFileList( dir , '.txt' )
     for fname in flist:
         if IsAuto:
             Convert_Auto(fname, 'utf-8' )
         else :
             Convert_Manu(fname, 'gbk' , 'utf-8' )
 
     
def getSubFileList( dir , suffix = ''):
     '''Get all file list with specified  suffix under current folder(Include sub folder)
     Input: 
         dir     :   Current folder
         suffix  :   default to blank, means select all files.
     Output:
         File list
     '''
     flist = []
     for root, dirs, files in os.walk(os.getcwd()):
         for name in files:
             if name.endswith(suffix):
                 flist.append(os.path.join(root,  name))
     return flist
 
def getCurrFileList( dir , suffix = ''):
     '''Get all file list with specified suffix under current level folder
     Input: 
         dir     :   Current folder
         suffix  :   default to blank, means select all files.
     Output:
         File list
     '''
     if suffix = = '':  
         files = glob.glob( '*' )
     else :
         files = glob.glob( '*' + suffix)
     flist = []   
     for f in files:
         flist.append(os.path.join(os.getcwd(), f))
     return flist
         
         
def main(): 
     explore(os.getcwd(), True )
     
if __name__ = = "__main__"
    main() 

最后

以上就是醉熏红酒为你收集整理的Python3批量转换文本文件编码的全部内容,希望文章能够帮你解决Python3批量转换文本文件编码所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(72)

评论列表共有 0 条评论

立即
投稿
返回
顶部