diff --git a/aws-lambda-builders-master/aws_lambda_builders/workflows/nodejs_npm/utils.py b/aws-lambda-builders-master/aws_lambda_builders/workflows/nodejs_npm/utils.py index ad92cfd..fa49f43 100644 --- a/aws-lambda-builders-master/aws_lambda_builders/workflows/nodejs_npm/utils.py +++ b/aws-lambda-builders-master/aws_lambda_builders/workflows/nodejs_npm/utils.py @@ -21,7 +21,26 @@ def copy_file(self, file_path, destination_path): def extract_tarfile(self, tarfile_path, unpack_dir): with tarfile.open(tarfile_path, "r:*") as tar: - tar.extractall(unpack_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, unpack_dir) def file_exists(self, filename): return os.path.isfile(filename) diff --git a/aws-lambda-builders-master/aws_lambda_builders/workflows/python_pip/utils.py b/aws-lambda-builders-master/aws_lambda_builders/workflows/python_pip/utils.py index 19ee765..f18712f 100644 --- a/aws-lambda-builders-master/aws_lambda_builders/workflows/python_pip/utils.py +++ b/aws-lambda-builders-master/aws_lambda_builders/workflows/python_pip/utils.py @@ -55,7 +55,26 @@ def extract_zipfile(self, zipfile_path, unpack_dir): def extract_tarfile(self, tarfile_path, unpack_dir): with tarfile.open(tarfile_path, "r:*") as tar: - tar.extractall(unpack_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, unpack_dir) def directory_exists(self, path): return os.path.isdir(path) diff --git a/aws-lambda-builders-master/aws_lambda_builders/workflows/ruby_bundler/utils.py b/aws-lambda-builders-master/aws_lambda_builders/workflows/ruby_bundler/utils.py index a3f3643..460c176 100644 --- a/aws-lambda-builders-master/aws_lambda_builders/workflows/ruby_bundler/utils.py +++ b/aws-lambda-builders-master/aws_lambda_builders/workflows/ruby_bundler/utils.py @@ -18,7 +18,26 @@ class OSUtils(object): def extract_tarfile(self, tarfile_path, unpack_dir): with tarfile.open(tarfile_path, "r:*") as tar: - tar.extractall(unpack_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, unpack_dir) def popen(self, command, stdout=None, stderr=None, env=None, cwd=None): p = subprocess.Popen(command, stdout=stdout, stderr=stderr, env=env, cwd=cwd) diff --git a/aws-sam-cli-main/samcli/local/docker/container.py b/aws-sam-cli-main/samcli/local/docker/container.py index a43e485..575eef0 100644 --- a/aws-sam-cli-main/samcli/local/docker/container.py +++ b/aws-sam-cli-main/samcli/local/docker/container.py @@ -331,7 +331,29 @@ def copy(self, from_container_path, to_host_path): fp.seek(0) with tarfile.open(fileobj=fp, mode="r") as tar: - tar.extractall(path=to_host_path) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=to_host_path) @staticmethod def _write_container_output(output_itr, stdout=None, stderr=None): diff --git a/aws-sam-cli-master/samcli/local/docker/container.py b/aws-sam-cli-master/samcli/local/docker/container.py index a43e485..575eef0 100644 --- a/aws-sam-cli-master/samcli/local/docker/container.py +++ b/aws-sam-cli-master/samcli/local/docker/container.py @@ -331,7 +331,29 @@ def copy(self, from_container_path, to_host_path): fp.seek(0) with tarfile.open(fileobj=fp, mode="r") as tar: - tar.extractall(path=to_host_path) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=to_host_path) @staticmethod def _write_container_output(output_itr, stdout=None, stderr=None): diff --git a/chalice-master/chalice/utils.py b/chalice-master/chalice/utils.py index 3307c4d..b847420 100644 --- a/chalice-master/chalice/utils.py +++ b/chalice-master/chalice/utils.py @@ -233,7 +233,26 @@ def extract_zipfile(self, zipfile_path, unpack_dir): def extract_tarfile(self, tarfile_path, unpack_dir): # type: (str, str) -> None with tarfile.open(tarfile_path, 'r:*') as tar: - tar.extractall(unpack_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, unpack_dir) def directory_exists(self, path): # type: (str) -> bool diff --git a/sagemaker-python-sdk-master/src/sagemaker/local/image.py b/sagemaker-python-sdk-master/src/sagemaker/local/image.py index f0a3ed8..3bdacd1 100644 --- a/sagemaker-python-sdk-master/src/sagemaker/local/image.py +++ b/sagemaker-python-sdk-master/src/sagemaker/local/image.py @@ -605,7 +605,26 @@ def _prepare_serving_volumes(self, model_location): for filename in model_data_source.get_file_list(): if tarfile.is_tarfile(filename): with tarfile.open(filename) as tar: - tar.extractall(path=model_data_source.get_root_dir()) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=model_data_source.get_root_dir()) volumes.append(_Volume(model_data_source.get_root_dir(), "/opt/ml/model")) diff --git a/sagemaker-python-sdk-master/src/sagemaker/utils.py b/sagemaker-python-sdk-master/src/sagemaker/utils.py index 4409c0b..0fe078d 100644 --- a/sagemaker-python-sdk-master/src/sagemaker/utils.py +++ b/sagemaker-python-sdk-master/src/sagemaker/utils.py @@ -450,7 +450,26 @@ def _create_or_update_code_dir( download_file_from_url(source_directory, local_code_path, sagemaker_session) with tarfile.open(name=local_code_path, mode="r:gz") as t: - t.extractall(path=code_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(t, path=code_dir) elif source_directory: if os.path.exists(code_dir): @@ -487,7 +506,26 @@ def _extract_model(model_uri, sagemaker_session, tmp): else: local_model_path = model_uri.replace("file://", "") with tarfile.open(name=local_model_path, mode="r:gz") as t: - t.extractall(path=tmp_model_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(t, path=tmp_model_dir) return tmp_model_dir diff --git a/sagemaker-python-sdk-master/src/sagemaker/workflow/_repack_model.py b/sagemaker-python-sdk-master/src/sagemaker/workflow/_repack_model.py index 60b74d6..191639d 100644 --- a/sagemaker-python-sdk-master/src/sagemaker/workflow/_repack_model.py +++ b/sagemaker-python-sdk-master/src/sagemaker/workflow/_repack_model.py @@ -60,7 +60,26 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None): # extract the contents of the previous training job's model archive to the "src" # directory of this training job with tarfile.open(name=local_path, mode="r:gz") as tf: - tf.extractall(path=src_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tf, path=src_dir) # copy the custom inference script to code/ entry_point = os.path.join("/opt/ml/code", inference_script) diff --git a/sagemaker-python-sdk-master/src/sagemaker/workflow/_utils.py b/sagemaker-python-sdk-master/src/sagemaker/workflow/_utils.py index d3bfe3d..21da674 100644 --- a/sagemaker-python-sdk-master/src/sagemaker/workflow/_utils.py +++ b/sagemaker-python-sdk-master/src/sagemaker/workflow/_utils.py @@ -232,7 +232,26 @@ def _inject_repack_script(self): src_dir = os.path.join(tmp, "src") with tarfile.open(name=local_path, mode="r:gz") as tf: - tf.extractall(path=src_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tf, path=src_dir) shutil.copy2(fname, os.path.join(src_dir, REPACK_SCRIPT)) with tarfile.open(name=local_path, mode="w:gz") as tf: diff --git a/sagemaker-python-sdk-master/tests/integ/test_horovod.py b/sagemaker-python-sdk-master/tests/integ/test_horovod.py index a5dfa79..fd1e3a3 100644 --- a/sagemaker-python-sdk-master/tests/integ/test_horovod.py +++ b/sagemaker-python-sdk-master/tests/integ/test_horovod.py @@ -77,7 +77,26 @@ def extract_files_from_s3(s3_url, tmpdir, sagemaker_session): s3.Bucket(parsed_url.netloc).download_file(parsed_url.path.lstrip("/"), model) with tarfile.open(model, "r") as tar_file: - tar_file.extractall(tmpdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar_file, tmpdir) def _create_and_fit_estimator(sagemaker_session, tf_version, py_version, instance_type, tmpdir): diff --git a/sagemaker-python-sdk-master/tests/integ/test_horovod_mx.py b/sagemaker-python-sdk-master/tests/integ/test_horovod_mx.py index 6d9733e..85009e1 100644 --- a/sagemaker-python-sdk-master/tests/integ/test_horovod_mx.py +++ b/sagemaker-python-sdk-master/tests/integ/test_horovod_mx.py @@ -78,7 +78,26 @@ def extract_files_from_s3(s3_url, tmpdir, sagemaker_session): s3.Bucket(parsed_url.netloc).download_file(parsed_url.path.lstrip("/"), model) with tarfile.open(model, "r") as tar_file: - tar_file.extractall(tmpdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar_file, tmpdir) def _create_and_fit_estimator(mxnet_version, py_version, sagemaker_session, instance_type, tmpdir): diff --git a/sagemaker-python-sdk-master/tests/unit/test_fw_utils.py b/sagemaker-python-sdk-master/tests/unit/test_fw_utils.py index c15ced3..ada64ec 100644 --- a/sagemaker-python-sdk-master/tests/unit/test_fw_utils.py +++ b/sagemaker-python-sdk-master/tests/unit/test_fw_utils.py @@ -351,7 +351,26 @@ def list_tar_files(folder, tar_ball, tmpdir): startpath = str(tmpdir.ensure(folder, dir=True)) with tarfile.open(name=tar_ball, mode="r:gz") as t: - t.extractall(path=startpath) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(t, path=startpath) def walk(): for root, dirs, files in os.walk(startpath): diff --git a/sagemaker-python-sdk-master/tests/unit/test_utils.py b/sagemaker-python-sdk-master/tests/unit/test_utils.py index 5c0b217..2540ee1 100644 --- a/sagemaker-python-sdk-master/tests/unit/test_utils.py +++ b/sagemaker-python-sdk-master/tests/unit/test_utils.py @@ -632,7 +632,26 @@ def list_tar_files(tar_ball, tmp): os.mkdir(startpath) with tarfile.open(name=tar_ball, mode="r:gz") as t: - t.extractall(path=startpath) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(t, path=startpath) def walk(): for root, dirs, files in os.walk(startpath): diff --git a/sagemaker-training-toolkit-master/src/sagemaker_training/files.py b/sagemaker-training-toolkit-master/src/sagemaker_training/files.py index 9e0ea8b..d763b9e 100644 --- a/sagemaker-training-toolkit-master/src/sagemaker_training/files.py +++ b/sagemaker-training-toolkit-master/src/sagemaker_training/files.py @@ -131,7 +131,26 @@ def download_and_extract(uri, path): # type: (str, str) -> None s3_download(uri, dst) with tarfile.open(name=dst, mode="r:gz") as t: - t.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(t, path=path) elif os.path.isdir(uri): if uri == path: @@ -141,7 +160,26 @@ def download_and_extract(uri, path): # type: (str, str) -> None shutil.copytree(uri, path) elif tarfile.is_tarfile(uri): with tarfile.open(name=uri, mode="r:gz") as t: - t.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(t, path=path) else: shutil.copy2(uri, path)