-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathencoding.py
More file actions
65 lines (49 loc) · 2.16 KB
/
encoding.py
File metadata and controls
65 lines (49 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import sys
import codecs
from chardet.universaldetector import UniversalDetector
# 把文件夹中所有 *.cpp, *.h, *.hpp, *.inl, ... 等源码文件全部转换为utf8编码
__exts__ = (
".cpp", ".h", ".hpp", ".inl", ".frag", ".vert", ".txt", ".md"
)
# 检测文件的编码
def detectEncoding(file):
detector = UniversalDetector()
with open(file, 'rb') as f1:
detector.feed(f1.read())
detector.close()
return detector.result
def encodeToTarget(fileName, encoding, target):
# TODO: This is dangerous ^^||, would need a backup option :)
# NOTE: Use 'replace' option which tolerates errorneous characters
data = codecs.open(fileName, 'rb', encoding, 'replace').read()
open(fileName, 'wb').write(data.encode(target, 'replace'))
def main():
# 检查输入的路径是否合法
if len(sys.argv) != 3:
print('Usage: ./encoding.py <Encoding_Dir> <Target Encoding>')
return
sourceDir = os.path.relpath(sys.argv[1])
targetEncoding = sys.argv[2]
for root, dirs, files in os.walk(sourceDir):
for file in files:
fileName = os.path.join(root, file)
if fileName.endswith(__exts__):
result = detectEncoding(fileName)
if result['confidence'] == 0 and result['encoding'] == None:
print('Empty: {}'.format(fileName))
continue
encoding = result['encoding'].lower()
confidence = result['confidence']
if confidence < 0.7:
print('Skipped: {}'.format(fileName))
continue
if encoding != targetEncoding:
encodeToTarget(fileName, encoding, targetEncoding)
print('Encoding: {}'.format(fileName))
else:
print('Skipped {}: {}'.format(targetEncoding, fileName))
if __name__ == '__main__':
print('\n************************************* Executing Encoding-check Script *************************************\n')
main()
print('\n************************************* Finished Encoding-check Script **************************************\n')