Python3批量转换文本文件编码

307 阅读 0 评论 203 点赞
我是靠谱客的博主 醉熏红酒，这篇文章主要介绍Python3批量转换文本文件编码，现在分享给大家，希望可以做个参考。

                                
                            

                            


 


 

#-*- coding: utf-8 -*-

try
:

    
from
 chardet.universaldetector 
import
 UniversalDetector

    
IsAuto 
=
 True

except 
 ImportError:

    
IsAuto 
=
 False

import
 os

import
 os.path

import
 glob

 

def
 Convert_Auto( filename,out_enc
=
"utf-8"
 ):  

    
''' Re-encode text file with auto detec current encode. Need chardet Lib. 

Input Parameter:

        
filename: full path and file name, e.g. c:dir1file.txt

        
out_enc: new encode. Default as 'utf-8'

Output Parameter

        
None'''

    
try
:  

        
f
=
open
(filename,
'rb'
)

        
b
=
 b
' '

        
b
+
=
f.read(
1024
)

        
u
=
UniversalDetector()

        
u.reset()

        
u.feed(b)

        
u.close()

        
f.seek(
0
)

        
b
=
f.read()

        
f.close()

        
in_enc
=
u.result[
'encoding'
]

        
new_content
=
b.decode(in_enc, 
'ignore'
)

        
f
=
open
(filename, 
'w'
, encoding
=
out_enc)

        
f.write(new_content)

        
f.close()

        
print
 (
"Success: "
+
filename
+
" converted from "
+
 in_enc
+
" to "
+
out_enc 
+
" !"
)

    
except
 IOError: 

        
print
 (
"Error: "
+
filename
+
" FAIL to converted from "
+
 in_enc
+
" to "
+
out_enc
+
" !"
 )

 

def
 Convert_Manu( filename,in_enc
=
'gbk'
, out_enc
=
"utf-8"
 ):  

    
''' Re-encode text file with manual decide input text encode.

Input Parameter:

        
filename: full path and file name, e.g. c:dir1file.txt

        
in_enc:  current encode. Default as 'gbk'

        
out_enc: new encode. Default as 'utf-8'

Output Parameter

        
None'''

    
try
:  

        
print
 (
"convert "
 +
 filename)

        
f
=
open
(filename,
'rb'
)

        
b
=
f.read()

        
f.close()

        
new_content
=
b.decode(in_enc, 
'ignore'
)

        
f
=
open
(filename, 
'w'
, encoding
=
out_enc)

        
f.write(new_content)

        
f.close()

        
print
 (
"Success: "
+
filename
+
" converted from "
+
 in_enc
+
" to "
+
out_enc 
+
" !"
)

    
except
 IOError: 

        
print
 (
"Error: "
+
filename
+
" FAIL to converted from "
+
 in_enc
+
" to "
+
out_enc
+
" !"
 )

 

 

def
 explore(
dir
, IsLoopSubDIR
=
True
):

    
'''Convert files encoding.

    
Input:  

        
dir         : Current folder

        
IsLoopSubDIR:   True -- Include files in sub folder

                        
False-- Only include files in current folder

    
Output:

        
NONE

    
'''

    
if
 IsLoopSubDIR:

        
flist
=
getSubFileList(
dir
, 
'.txt'
)

    
else
:

        
flist
=
getCurrFileList(
dir
, 
'.txt'
)

    
for
 fname 
in
 flist:

        
if
 IsAuto:

            
Convert_Auto(fname, 
'utf-8'
)

        
else
:

            
Convert_Manu(fname, 
'gbk'
, 
'utf-8'
)

 

     

def
 getSubFileList(
dir
, suffix
=
''):

    
'''Get all file list with specified  suffix under current folder(Include sub folder)

    
Input:  

        
dir     :   Current folder

        
suffix  :   default to blank, means select all files.

    
Output:

        
File list

    
'''

    
flist
=
[]

    
for
 root, dirs, files 
in
 os.walk(os.getcwd()):

        
for
 name 
in
 files:

            
if
 name.endswith(suffix):

                
flist.append(os.path.join(root,  name))

    
return
 flist

 

def
 getCurrFileList(
dir
, suffix
=
''):

    
'''Get all file list with specified suffix under current level folder

    
Input:  

        
dir     :   Current folder

        
suffix  :   default to blank, means select all files.

    
Output:

        
File list

    
'''

    
if
 suffix
=
=
'':   

        
files
=
glob.glob(
'*'
)

    
else
:

        
files
=
glob.glob(
'*'
+
suffix)

    
flist
=
[]    

    
for
 f 
in
 files:

        
flist.append(os.path.join(os.getcwd(), f))

    
return
 flist

         

         

def
 main():  

    
explore(os.getcwd(), 
True
)

     

if
 __name__ 
=
=
 "__main__"
:  

   
main() 
最后以上就是醉熏红酒最近收集整理的关于Python3批量转换文本文件编码的全部内容，更多相关Python3批量转换文本文件编码内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供，作为学习参考使用，或来自网络收集整理，版权属于原作者所有。