From dd629b6b667380214c33ccab1b80a091b9b3beb4 Mon Sep 17 00:00:00 2001 From: mozhou52 Date: Fri, 10 Jan 2025 18:35:53 +0800 Subject: [PATCH] =?UTF-8?q?fix:=E5=A4=84=E7=90=86=20mac=20=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E7=9A=84=E7=BC=96=E7=A0=81=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/code/index.py | 67 ++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/src/code/index.py b/src/code/index.py index edf78b4..0104c90 100644 --- a/src/code/index.py +++ b/src/code/index.py @@ -23,7 +23,6 @@ import os import logging import zipfile -import chardet import shutil # Close the info log printed by the oss SDK @@ -36,29 +35,29 @@ def get_zipfile_name(origin_name): # 解决中文乱码问题 name = origin_name try: - name_bytes = origin_name.encode(encoding="cp437") - except: - name_bytes = origin_name.encode(encoding="utf-8") - - # the string to be detect is long enough, the detection result accuracy is higher - detect = chardet.detect(name_bytes) - confidence = detect["confidence"] - detect_encoding = detect["encoding"] - if confidence > 0.75 and ( - detect_encoding.lower() in ["gb2312", "gbk", "gb18030", "ascii", "utf-8"] - ): - try: - if detect_encoding.lower() in ["gb2312", "gbk", "gb18030"]: - detect_encoding = "gb18030" - name = name_bytes.decode(detect_encoding) - except: - name = name_bytes.decode(encoding="gb18030") - else: + # 尝试常见编码 + name_bytes = origin_name.encode("cp437") + # macOS 系统可能使用 utf-8-mac 编码 try: - name = name_bytes.decode(encoding="gb18030") + name = name_bytes.decode("utf-8-mac") except: - name = name_bytes.decode(encoding="utf-8") - # fix windows \\ as dir segment + # 尝试其他常见编码 + for encoding in ["utf-8", "gb18030", "cp437"]: + try: + name = name_bytes.decode(encoding) + break + except: + continue + except: + # 如果无法编码为 cp437,直接尝试 utf-8 和 gb18030 + for encoding in ["utf-8", "gb18030"]: + try: + name = origin_name.encode(encoding).decode(encoding) + break + except: + continue + + # 替换路径分隔符 name = name.replace("\\", "/") return name @@ -133,29 +132,43 @@ def handler(event, context): try: with zipfile.ZipFile(tmpZipfile) as zip_file: for file_info in zip_file.infolist(): + # 跳过 macOS 系统生成的文件 + if "__MACOSX" in file_info.filename or file_info.filename.endswith(".DS_Store"): + continue + + # 跳过文件夹 if file_info.is_dir(): continue + + # 处理文件 f_size = file_info.file_size if ( WORK_DIR == "/tmp" and object_sizeMB + f_size / 1024 / 1024 > 10240 * 0.99 - ): # if zip file + one file size > 0.99G, skip extract and upload + ): LOGGER.error( "{} size is too large; skip extract and upload. Please use NAS and set the WORK_DIR environment variable to specify the NAS mount directory. For reference, see: https://help.aliyun.com/zh/functioncompute/fc-3-0/user-guide/configure-a-nas-file-system-1".format( file_info.filename ) ) continue + + # 解压文件 zip_file.extract(file_info.filename, tmpWorkDir) pathname = os.path.join(tmpWorkDir, file_info.filename) - newkey = os.path.join( - newKeyPrefix, get_zipfile_name(file_info.filename) - ) + + # 获取处理后的文件名 + cleaned_filename = get_zipfile_name(file_info.filename) + + # 构建新的 OSS 路径 + newkey = os.path.join(newKeyPrefix, cleaned_filename) LOGGER.info("upload to {}".format(newkey)) + + # 上传文件 bucket.put_object_from_file(newkey, pathname) os.remove(pathname) except Exception as e: LOGGER.error(e) finally: os.remove(tmpZipfile) - shutil.rmtree(tmpWorkDir) + shutil.rmtree(tmpWorkDir) \ No newline at end of file