{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/saketkc/pysradb/blob/develop/notebooks/02.Commandline_download.ipynb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Command-line Download\n", "\n", "This notebook demonstrates how to use pysradb from the command line to download SRA data." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Install pysradb if not already installed\n", "try:\n", " import pysradb\n", "\n", " print(f\"pysradb {pysradb.__version__} is already installed\")\n", "except ImportError:\n", " print(\"Installing pysradb from GitHub...\")\n", " import sys\n", "\n", " !{sys.executable} -m pip install -q git+https://github.com/saketkc/pysradb\n", " print(\"pysradb installed successfully!\")" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 238 }, "colab_type": "code", "id": "EU_ZNQwnHJff", "outputId": "95271151-2ca0-416b-b01f-3077a1239c22" }, "outputs": [], "source": [ "# pip install -U pysradb" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "id": "FK_PzlAUHU0b", "outputId": "a5c47c2f-f85e-4f68-fad0-8f549d0a8ea8" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "pysradb 2.4.1\n" ] } ], "source": [ "!pysradb --version" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "wFEs7nMEHiZA" }, "source": [ "## Get metadata for SRX (SRRs/SRS etc)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 71 }, "colab_type": "code", "id": "4Krzqj3IHX6J", "outputId": "c9f0b0de-aec0-4cd1-aaae-616ae0461faa" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "experiment_accession\trun_accession\tstudy_accession\tstudy_title\texperiment_title\texperiment_desc\torganism_taxid\torganism_name\tlibrary_name\tlibrary_strategy\tlibrary_source\tlibrary_selection\tlibrary_layout\tsample_accession\tsample_title\tbiosample\tbioproject\tinstrument\tinstrument_model\tinstrument_model_desc\ttotal_spots\ttotal_size\trun_total_spots\trun_total_bases\n", "SRX4720625\tSRR7882015\tSRP162234\tTranscriptomic profile of zebrafish cardiomyocytes throughout heart development\tGSM3396533: wt_GFPpos_24hpf_rep1; Danio rerio; RNA-Seq\tGSM3396533: wt_GFPpos_24hpf_rep1; Danio rerio; RNA-Seq\t7955\tDanio rerio\t\tRNA-Seq\tTRANSCRIPTOMIC\tcDNA\tPAIRED\tSRS3805811\t\tSAMN10095723\tPRJNA492280\tNextSeq 500\tNextSeq 500\tILLUMINA\t47867961\t3470385670\t47867961\t7230485009\n" ] } ], "source": [ "!pysradb srx-to-srr SRX4720625" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "yorftAz6HtFp" }, "source": [ "## Get detailed metadata " ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 71 }, "colab_type": "code", "id": "30cYmMXFHpRA", "outputId": "1ef01d12-c84c-4524-c163-e24e565b4584" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "experiment_accession\trun_accession\tstudy_accession\tstudy_title\texperiment_title\texperiment_desc\torganism_taxid\torganism_name\tlibrary_name\tlibrary_strategy\tlibrary_source\tlibrary_selection\tlibrary_layout\tsample_accession\tsample_title\tbiosample\tbioproject\tinstrument\tinstrument_model\tinstrument_model_desc\ttotal_spots\ttotal_size\trun_total_spots\trun_total_bases\trun_alias\tpublic_filename\tpublic_size\tpublic_date\tpublic_md5\tpublic_version\tpublic_semantic_name\tpublic_supertype\tpublic_sratoolkit\taws_url\taws_free_egress\taws_access_type\tpublic_url\tncbi_url\tncbi_free_egress\tncbi_access_type\tgcp_url\tgcp_free_egress\tgcp_access_type\texperiment_alias\tsource_name\ttissue\tdevelopmental stage\tgfp status\tgenetic background\tena_fastq_http\tena_fastq_http_1\tena_fastq_http_2\tena_fastq_ftp\tena_fastq_ftp_1\tena_fastq_ftp_2\n", "SRX4720625\tSRR7882015\tSRP162234\tTranscriptomic profile of zebrafish cardiomyocytes throughout heart development\tGSM3396533: wt_GFPpos_24hpf_rep1; Danio rerio; RNA-Seq\tGSM3396533: wt_GFPpos_24hpf_rep1; Danio rerio; RNA-Seq\t7955\tDanio rerio\t\tRNA-Seq\tTRANSCRIPTOMIC\tcDNA\tPAIRED\tSRS3805811\t\tSAMN10095723\tPRJNA492280\tNextSeq 500\tNextSeq 500\tILLUMINA\t47867961\t3470385670\t47867961\t7230485009\tGSM3396533_r1\tSRR7882015.sralite\t1881003321\t2020-06-14 12:02:25\t8161154ca4e9cf674e3f0e4af74c8455\t1\tSRA Lite\tPrimary ETL\t1\ts3://sra-pub-zq-8/SRR7882015/SRR7882015.sralite.1\ts3.us-east-1\taws identity\thttps://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos5/sra-pub-zq-11/SRR007/882/SRR7882015/SRR7882015.sralite.1\thttps://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos5/sra-pub-zq-11/SRR007/882/SRR7882015/SRR7882015.sralite.1\tworldwide\tanonymous\tgs://sra-pub-zq-107/SRR7882015/SRR7882015.zq.1\tgs.us-east1\tgcp identity\tGSM3396533\tFACS-sorted embryo cells\tFACS-sorted embryo cells\t24 hpf\tGFP positive\twild type\t\thttp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR788/005/SRR7882015/SRR7882015_1.fastq.gz\thttp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR788/005/SRR7882015/SRR7882015_2.fastq.gz\t\tera-fasp@fasp.sra.ebi.ac.uk:vol1/fastq/SRR788/005/SRR7882015/SRR7882015_1.fastq.gz\tera-fasp@fasp.sra.ebi.ac.uk:vol1/fastq/SRR788/005/SRR7882015/SRR7882015_2.fastq.gz\n" ] } ], "source": [ "!pysradb srx-to-srr SRX4720625 --detailed" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "tTK_0p6JHyCI" }, "source": [ "## Download all runs for a particular experiment" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "colab_type": "code", "id": "YQLxy1yzH6dQ", "outputId": "6c95ae03-f70b-4536-b461-84f8c206fa84" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Checking download URLs\n", "The following files will be downloaded: \n", "\n", "experiment_accession run_accession study_accession public_url download_url out_dir filesize\n", "SRX4720625 SRR7882015 SRP162234 https://sra-downloadb.be-md.ncbi.nlm.nih.gov/sos5/sra-pub-zq-11/SRR007/882/SRR7882015/SRR7882015.sralite.1 ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR788/SRR7882015/SRR7882015.sra /data/github/pysradb/notebooks/pysradb_downloads 1.9 GB \n", "\n", "\n", "Total size: 1.9 GB\n", "\n", "\n", " 0%| | 0/1 [00:00\n", " sys.exit(parse_args())\n", " File \"/data/github/pysradb/pysradb/cli.py\", line 1215, in parse_args\n", " download(\n", " File \"/data/github/pysradb/pysradb/cli.py\", line 111, in download\n", " sradb.download(\n", " File \"/data/github/pysradb/pysradb/sradb.py\", line 1543, in download\n", " if not confirm(\"Start download? \"):\n", " File \"/data/github/pysradb/pysradb/utils.py\", line 269, in confirm\n", " choice = input(\"{} [Y/n]: \".format(preceeding_text)).lower()\n", "KeyboardInterrupt\n" ] } ], "source": [ "!pysradb download -p SRP162234" ] } ], "metadata": { "colab": { "authorship_tag": "ABX9TyN0BPTWXcI8R2yJh5F/hEBk", "collapsed_sections": [], "include_colab_link": true, "name": "02.Commandline_download", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 }