diff --git a/README.md b/README.md index 1b0419b..6a8f38a 100644 --- a/README.md +++ b/README.md @@ -80,9 +80,9 @@ $ pridepy download-all-public-raw-files -a PXD012353 -o /Users/yourname/Download Additional options: -- `-skip` flag is used to skip the download of files that already exist in the output directory. -- `--aspera_maximum_bandwidth` flag is used to specify the maximum bandwidth for the Aspera download. The default value is 100M. -- `--checksum_check` flag is used to check the checksum of the downloaded files. The default value is False. +- `--skip-if-downloaded-already` flag is used to skip files that already exist in the output directory. By default, files are re-downloaded even if they already exist. Use this flag to avoid re-downloading existing files. +- `--aspera-maximum-bandwidth` flag is used to specify the maximum bandwidth for the Aspera download. The default value is 100M. +- `--checksum-check` flag is used to check the checksum of the downloaded files. The default value is False. ## Downloading raw files from ProteomeXchange (PX) @@ -93,7 +93,7 @@ $ pridepy download-px-raw-files -a PXD039236 -o /Users/yourname/Downloads/folder ``` - The tool resolves the ProteomeXchange XML and downloads via FTP when available, otherwise HTTP(S). -- Resume and skip-if-already-downloaded are supported by default. +- Resume is supported. Use `--skip-if-downloaded-already` flag to skip files that have already been downloaded. ## Download single file by name diff --git a/pridepy/pridepy.py b/pridepy/pridepy.py index 10b2246..74afe55 100644 --- a/pridepy/pridepy.py +++ b/pridepy/pridepy.py @@ -24,25 +24,24 @@ def main(): ) @click.option( "-o", - "--output_folder", + "--output-folder", required=True, help="output folder to download or copy raw files", ) @click.option( - "-skip", - "--skip_if_downloaded_already", - required=False, - default=True, - help="Boolean value to skip the download if the file has already been downloaded.", + "--skip-if-downloaded-already", + is_flag=True, + default=False, + help="Skip the download if the file has already been downloaded.", ) @click.option( - "--aspera_maximum_bandwidth", + "--aspera-maximum-bandwidth", required=False, help="Aspera maximum bandwidth (e.g 50M, 100M, 200M), depending on the user's network bandwidth, default is 100M", default="100M", ) @click.option( - "--checksum_check", + "--checksum-check", required=False, help="Download checksum file for project", is_flag=True, @@ -63,7 +62,7 @@ def download_all_public_raw_files( accession (str): PRIDE project accession. protocol (str): Protocol for downloading files (ftp, aspera, globus). Default is ftp. output_folder (str): Directory to save downloaded raw files. - skip_if_downloaded_already (bool): Skip download if files already exist. Default is True. + skip_if_downloaded_already (bool): Skip download if files already exist. Default is False. aspera_maximum_bandwidth (str): Maximum bandwidth for Aspera protocol. Default is 100M. checksum_check (bool): Flag to download checksum file for the project. Default is False. """ @@ -98,25 +97,24 @@ def download_all_public_raw_files( ) @click.option( "-o", - "--output_folder", + "--output-folder", required=True, help="output folder to download or copy raw files", ) @click.option( - "-skip", - "--skip_if_downloaded_already", - required=False, - default=True, - help="Boolean value to skip the download if the file has already been downloaded.", + "--skip-if-downloaded-already", + is_flag=True, + default=False, + help="Skip the download if the file has already been downloaded.", ) @click.option( - "--aspera_maximum_bandwidth", + "--aspera-maximum-bandwidth", required=False, help="Aspera maximum bandwidth (e.g 50M, 100M, 200M), depending on the user's network bandwidth, default is 100M", default="100M", ) @click.option( - "--checksum_check", + "--checksum-check", required=False, help="Download checksum file for project", is_flag=True, @@ -145,7 +143,7 @@ def download_all_public_category_files( accession (str): The PRIDE project accession identifier. protocol (str): The protocol to use for downloading files (ftp, aspera, globus). output_folder (str): The directory where the files will be downloaded. - skip_if_downloaded_already (bool): If True, skips downloading files that already exist. + skip_if_downloaded_already (bool): If True, skips downloading files that already exist. Default is False. aspera_maximum_bandwidth (str): Maximum bandwidth for Aspera transfers. checksum_check (bool): If True, downloads the checksum file for the project. category (str): The category of files to download. @@ -180,30 +178,29 @@ def download_all_public_category_files( default="ftp", help="Protocol to be used to download files either by ftp or aspera or from globus. Default is ftp", ) -@click.option("-f", "--file_name", required=True, help="fileName to be downloaded") +@click.option("-f", "--file-name", required=True, help="fileName to be downloaded") @click.option( "-o", - "--output_folder", + "--output-folder", required=True, help="output folder to download or copy files", ) @click.option( - "-skip", - "--skip_if_downloaded_already", - required=False, - default=True, - help="Boolean value to skip the download if the file has already been downloaded.", + "--skip-if-downloaded-already", + is_flag=True, + default=False, + help="Skip the download if the file has already been downloaded.", ) @click.option("--username", required=False, help="PRIDE login username for private files") @click.option("--password", required=False, help="PRIDE login password for private files") @click.option( - "--aspera_maximum_bandwidth", + "--aspera-maximum-bandwidth", required=False, help="Aspera maximum bandwidth (e.g 50M, 100M, 200M), depending on the user's network bandwidth, default is 100M", default="100M", ) @click.option( - "--checksum_check", + "--checksum-check", required=False, help="Download checksum file for project", is_flag=True, @@ -226,7 +223,7 @@ def download_file_by_name( :param protocol: Protocol to be used to download files either by ftp or aspera or from globus. Default is ftp :param file_name: fileName to be downloaded :param output_folder: output folder to download or copy files - :param skip_if_downloaded_already: Boolean value to skip the download if the file has already been downloaded. + :param skip_if_downloaded_already: Boolean value to skip the download if the file has already been downloaded. Default is False. :param username: PRIDE login username for private files :param password: PRIDE login password for private files :param aspera_maximum_bandwidth: Aspera maximum bandwidth (e.g 50M, 100M, 200M), depending on the user's network bandwidth, default is 100M @@ -267,16 +264,15 @@ def download_file_by_name( ) @click.option( "-o", - "--output_folder", + "--output-folder", required=True, help="output folder to download files", ) @click.option( - "-skip", - "--skip_if_downloaded_already", - required=False, - default=True, - help="Boolean to skip a file if it already exists", + "--skip-if-downloaded-already", + is_flag=True, + default=False, + help="Skip the download if the file has already been downloaded.", ) def download_px_raw_files(accession: str, output_folder: str, skip_if_downloaded_already: bool): """CLI wrapper to download raw files via ProteomeXchange XML.""" @@ -308,7 +304,7 @@ def list_private_files(accession, user, password): @main.command() @click.option( "-o", - "--output_file", + "--output-file", required=True, help="output file to save all the projects metadata", ) @@ -324,7 +320,7 @@ def stream_projects_metadata(output_file): @main.command() @click.option( "-o", - "--output_file", + "--output-file", required=True, help="output file to save all the files metadata", ) @@ -360,7 +356,7 @@ def stream_files_metadata(accession, output_file): ) @click.option( "-ps", - "--page_size", + "--page-size", required=False, default=100, type=click.IntRange(min=1, max=1000), @@ -376,14 +372,14 @@ def stream_files_metadata(accession, output_file): ) @click.option( "-sd", - "--sort_direction", + "--sort-direction", required=False, default="DESC", help="Sorting direction: ASC or DESC", ) @click.option( "-sf", - "--sort_fields", + "--sort-fields", required=False, default=["submission_date"], multiple=True,