概述
#-*- coding: utf-8 -*-
try
:
from
chardet.universaldetector
import
UniversalDetector
IsAuto
=
True
except
ImportError:
IsAuto
=
False
import
os
import
os.path
import
glob
def
Convert_Auto( filename,out_enc
=
"utf-8"
):
''' Re-encode text file with auto detec current encode. Need chardet Lib.
Input Parameter:
filename: full path and file name, e.g. c:dir1file.txt
out_enc: new encode. Default as 'utf-8'
Output Parameter
None'''
try
:
f
=
open
(filename,
'rb'
)
b
=
b
' '
b
+
=
f.read(
1024
)
u
=
UniversalDetector()
u.reset()
u.feed(b)
u.close()
f.seek(
0
)
b
=
f.read()
f.close()
in_enc
=
u.result[
'encoding'
]
new_content
=
b.decode(in_enc,
'ignore'
)
f
=
open
(filename,
'w'
, encoding
=
out_enc)
f.write(new_content)
f.close()
print
(
"Success: "
+
filename
+
" converted from "
+
in_enc
+
" to "
+
out_enc
+
" !"
)
except
IOError:
print
(
"Error: "
+
filename
+
" FAIL to converted from "
+
in_enc
+
" to "
+
out_enc
+
" !"
)
def
Convert_Manu( filename,in_enc
=
'gbk'
, out_enc
=
"utf-8"
):
''' Re-encode text file with manual decide input text encode.
Input Parameter:
filename: full path and file name, e.g. c:dir1file.txt
in_enc: current encode. Default as 'gbk'
out_enc: new encode. Default as 'utf-8'
Output Parameter
None'''
try
:
print
(
"convert "
+
filename)
f
=
open
(filename,
'rb'
)
b
=
f.read()
f.close()
new_content
=
b.decode(in_enc,
'ignore'
)
f
=
open
(filename,
'w'
, encoding
=
out_enc)
f.write(new_content)
f.close()
print
(
"Success: "
+
filename
+
" converted from "
+
in_enc
+
" to "
+
out_enc
+
" !"
)
except
IOError:
print
(
"Error: "
+
filename
+
" FAIL to converted from "
+
in_enc
+
" to "
+
out_enc
+
" !"
)
def
explore(
dir
, IsLoopSubDIR
=
True
):
'''Convert files encoding.
Input:
dir : Current folder
IsLoopSubDIR: True -- Include files in sub folder
False-- Only include files in current folder
Output:
NONE
'''
if
IsLoopSubDIR:
flist
=
getSubFileList(
dir
,
'.txt'
)
else
:
flist
=
getCurrFileList(
dir
,
'.txt'
)
for
fname
in
flist:
if
IsAuto:
Convert_Auto(fname,
'utf-8'
)
else
:
Convert_Manu(fname,
'gbk'
,
'utf-8'
)
def
getSubFileList(
dir
, suffix
=
''):
'''Get all file list with specified suffix under current folder(Include sub folder)
Input:
dir : Current folder
suffix : default to blank, means select all files.
Output:
File list
'''
flist
=
[]
for
root, dirs, files
in
os.walk(os.getcwd()):
for
name
in
files:
if
name.endswith(suffix):
flist.append(os.path.join(root, name))
return
flist
def
getCurrFileList(
dir
, suffix
=
''):
'''Get all file list with specified suffix under current level folder
Input:
dir : Current folder
suffix : default to blank, means select all files.
Output:
File list
'''
if
suffix
=
=
'':
files
=
glob.glob(
'*'
)
else
:
files
=
glob.glob(
'*'
+
suffix)
flist
=
[]
for
f
in
files:
flist.append(os.path.join(os.getcwd(), f))
return
flist
def
main():
explore(os.getcwd(),
True
)
if
__name__
=
=
"__main__"
:
main()
最后
以上就是醉熏红酒为你收集整理的Python3批量转换文本文件编码的全部内容,希望文章能够帮你解决Python3批量转换文本文件编码所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复