From dd629b6b667380214c33ccab1b80a091b9b3beb4 Mon Sep 17 00:00:00 2001
From: mozhou52 <gaoweihong2022@163.com>
Date: Fri, 10 Jan 2025 18:35:53 +0800
Subject: [PATCH] =?UTF-8?q?fix:=E5=A4=84=E7=90=86=20mac=20=E5=AF=BC?=
 =?UTF-8?q?=E8=87=B4=E7=9A=84=E7=BC=96=E7=A0=81=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/code/index.py | 67 ++++++++++++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 27 deletions(-)

diff --git a/src/code/index.py b/src/code/index.py
index edf78b4..0104c90 100644
--- a/src/code/index.py
+++ b/src/code/index.py
@@ -23,7 +23,6 @@
 import os
 import logging
 import zipfile
-import chardet
 import shutil
 
 # Close the info log printed by the oss SDK
@@ -36,29 +35,29 @@
 def get_zipfile_name(origin_name):  # 解决中文乱码问题
     name = origin_name
     try:
-        name_bytes = origin_name.encode(encoding="cp437")
-    except:
-        name_bytes = origin_name.encode(encoding="utf-8")
-
-    # the string to be detect is long enough, the detection result accuracy is higher
-    detect = chardet.detect(name_bytes)
-    confidence = detect["confidence"]
-    detect_encoding = detect["encoding"]
-    if confidence > 0.75 and (
-        detect_encoding.lower() in ["gb2312", "gbk", "gb18030", "ascii", "utf-8"]
-    ):
-        try:
-            if detect_encoding.lower() in ["gb2312", "gbk", "gb18030"]:
-                detect_encoding = "gb18030"
-            name = name_bytes.decode(detect_encoding)
-        except:
-            name = name_bytes.decode(encoding="gb18030")
-    else:
+        # 尝试常见编码
+        name_bytes = origin_name.encode("cp437")
+        # macOS 系统可能使用 utf-8-mac 编码
         try:
-            name = name_bytes.decode(encoding="gb18030")
+            name = name_bytes.decode("utf-8-mac")
         except:
-            name = name_bytes.decode(encoding="utf-8")
-    # fix windows \\ as dir segment
+            # 尝试其他常见编码
+            for encoding in ["utf-8", "gb18030", "cp437"]:
+                try:
+                    name = name_bytes.decode(encoding)
+                    break
+                except:
+                    continue
+    except:
+        # 如果无法编码为 cp437，直接尝试 utf-8 和 gb18030
+        for encoding in ["utf-8", "gb18030"]:
+            try:
+                name = origin_name.encode(encoding).decode(encoding)
+                break
+            except:
+                continue
+
+    # 替换路径分隔符
     name = name.replace("\\", "/")
     return name
 
@@ -133,29 +132,43 @@ def handler(event, context):
     try:
         with zipfile.ZipFile(tmpZipfile) as zip_file:
             for file_info in zip_file.infolist():
+                # 跳过 macOS 系统生成的文件
+                if "__MACOSX" in file_info.filename or file_info.filename.endswith(".DS_Store"):
+                    continue
+
+                # 跳过文件夹
                 if file_info.is_dir():
                     continue
+
+                # 处理文件
                 f_size = file_info.file_size
                 if (
                     WORK_DIR == "/tmp"
                     and object_sizeMB + f_size / 1024 / 1024 > 10240 * 0.99
-                ):  # if zip file + one file size > 0.99G, skip extract and upload
+                ):
                     LOGGER.error(
                         "{} size is too large; skip extract and upload. Please use NAS and set the WORK_DIR environment variable to specify the NAS mount directory. For reference, see: https://help.aliyun.com/zh/functioncompute/fc-3-0/user-guide/configure-a-nas-file-system-1".format(
                             file_info.filename
                         )
                     )
                     continue
+
+                # 解压文件
                 zip_file.extract(file_info.filename, tmpWorkDir)
                 pathname = os.path.join(tmpWorkDir, file_info.filename)
-                newkey = os.path.join(
-                    newKeyPrefix, get_zipfile_name(file_info.filename)
-                )
+
+                # 获取处理后的文件名
+                cleaned_filename = get_zipfile_name(file_info.filename)
+
+                # 构建新的 OSS 路径
+                newkey = os.path.join(newKeyPrefix, cleaned_filename)
                 LOGGER.info("upload to {}".format(newkey))
+
+                # 上传文件
                 bucket.put_object_from_file(newkey, pathname)
                 os.remove(pathname)
     except Exception as e:
         LOGGER.error(e)
     finally:
         os.remove(tmpZipfile)
-        shutil.rmtree(tmpWorkDir)
+        shutil.rmtree(tmpWorkDir)
\ No newline at end of file