{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "05.Downloading subsets of a project.ipynb", "provenance": [], "collapsed_sections": [], "authorship_tag": "ABX9TyMwGm36+9vVPTY3ariKzrRm", "include_colab_link": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/saketkc/pysradb/blob/develop/notebooks/05.Downloading_subsets_of_a_project.ipynb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Downloading Subsets of a Project\n", "\n", "This notebook shows how to filter and download specific samples from a larger SRA project." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Install pysradb if not already installed\n", "try:\n", " import pysradb\n", "\n", " print(f\"pysradb {pysradb.__version__} is already installed\")\n", "except ImportError:\n", " print(\"Installing pysradb from GitHub...\")\n", " import sys\n", "\n", " !{sys.executable} -m pip install -q git+https://github.com/saketkc/pysradb\n", " print(\"pysradb installed successfully!\")" ] }, { "cell_type": "code", "metadata": { "id": "ZimlPnZwF3JP", "colab_type": "code", "outputId": "25bca786-e5e1-4980-f304-8493b06c3481", "colab": { "base_uri": "https://localhost:8080/", "height": 683 } }, "source": [ "pip install git+https://github.com/saketkc/pysradb.git" ], "execution_count": 1, "outputs": [ { "output_type": "stream", "text": [ "Collecting git+https://github.com/saketkc/pysradb.git\n", " Cloning https://github.com/saketkc/pysradb.git to /tmp/pip-req-build-3dlg9hp3\n", " Running command git clone -q https://github.com/saketkc/pysradb.git /tmp/pip-req-build-3dlg9hp3\n", "Requirement already satisfied: pandas==0.25.3 in /usr/local/lib/python3.6/dist-packages (from pysradb==0.10.3.dev0) (0.25.3)\n", "Collecting tqdm==4.41.1\n", " Using cached https://files.pythonhosted.org/packages/72/c9/7fc20feac72e79032a7c8138fd0d395dc6d8812b5b9edf53c3afd0b31017/tqdm-4.41.1-py2.py3-none-any.whl\n", "Collecting requests==2.22.0\n", " Using cached https://files.pythonhosted.org/packages/51/bd/23c926cd341ea6b7dd0b2a00aba99ae0f828be89d72b2190f27c11d4b7fb/requests-2.22.0-py2.py3-none-any.whl\n", "Collecting xmltodict==0.12.0\n", " Using cached https://files.pythonhosted.org/packages/28/fd/30d5c1d3ac29ce229f6bdc40bbc20b28f716e8b363140c26eff19122d8a5/xmltodict-0.12.0-py2.py3-none-any.whl\n", "Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas==0.25.3->pysradb==0.10.3.dev0) (2.6.1)\n", "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas==0.25.3->pysradb==0.10.3.dev0) (2018.9)\n", "Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas==0.25.3->pysradb==0.10.3.dev0) (1.17.5)\n", "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests==2.22.0->pysradb==0.10.3.dev0) (2.8)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests==2.22.0->pysradb==0.10.3.dev0) (1.24.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests==2.22.0->pysradb==0.10.3.dev0) (2019.11.28)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests==2.22.0->pysradb==0.10.3.dev0) (3.0.4)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas==0.25.3->pysradb==0.10.3.dev0) (1.12.0)\n", "Building wheels for collected packages: pysradb\n", " Building wheel for pysradb (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for pysradb: filename=pysradb-0.10.3.dev0-cp36-none-any.whl size=147411 sha256=6ccd6874b7cde11cb10eae96cb14e86f9cdfe5f1b02b16a3c7eb20879afd6a62\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-z9xalsuu/wheels/d5/24/42/81dccabc3a4aac9757e23b7175ad7270090a4b3c203cd4fc8f\n", "Successfully built pysradb\n", "\u001b[31mERROR: google-colab 1.0.0 has requirement requests~=2.21.0, but you'll have requests 2.22.0 which is incompatible.\u001b[0m\n", "\u001b[31mERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.\u001b[0m\n", "Installing collected packages: tqdm, requests, xmltodict, pysradb\n", " Found existing installation: tqdm 4.28.1\n", " Uninstalling tqdm-4.28.1:\n", " Successfully uninstalled tqdm-4.28.1\n", " Found existing installation: requests 2.21.0\n", " Uninstalling requests-2.21.0:\n", " Successfully uninstalled requests-2.21.0\n", "Successfully installed pysradb-0.10.3.dev0 requests-2.22.0 tqdm-4.41.1 xmltodict-0.12.0\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "requests", "tqdm" ] } } }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "Dg62g5OM_qbn", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "295c4af3-0a46-4f70-b695-ab395422edec" }, "source": [ "!pysradb --version" ], "execution_count": 2, "outputs": [ { "output_type": "stream", "text": [ "pysradb 0.10.3-dev0\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "Vi6ELv2KF_V4", "colab_type": "code", "colab": {} }, "source": [ "from pysradb.sraweb import SRAweb\n", "\n", "db = SRAweb()" ], "execution_count": 0, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "xLZ431sPKJcX", "colab_type": "text" }, "source": [ "## Example of a record missing \"SAMPLE_ATTRIBUES\" \n", "\n", "It also has an \"auxillary\" contig file: https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=SRR5146869" ] }, { "cell_type": "code", "metadata": { "id": "x5OmbCI-GFcA", "colab_type": "code", "outputId": "8f445628-1d50-4845-b918-7a6d9dbe636d", "colab": { "base_uri": "https://localhost:8080/", "height": 966 } }, "source": [ "df = db.sra_metadata(\"SRP096127\", detailed=True)\n", "df" ], "execution_count": 4, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
study_accessionexperiment_accessionexperiment_titleexperiment_descorganism_taxidorganism_namelibrary_strategylibrary_sourcelibrary_selectionsample_accessionsample_titleinstrumenttotal_spotstotal_sizerun_accessionrun_total_spotsrun_total_basesrun_aliassra_url_altsra_urlexperiment_aliassource_namecell typegroup
0SRP096127SRX2467007GSM2448483: normal.ct-970; Homo sapiens; Bisul...GSM2448483: normal.ct-970; Homo sapiens; Bisul...9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1899466N/AIllumina HiSeq 250055954750734487SRR514905955954783216675GSM2448483_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2448483blood serumblood serumhealthy control
1SRP096127SRX2467006GSM2448482: normal.ct-969; Homo sapiens; Bisul...GSM2448482: normal.ct-969; Homo sapiens; Bisul...9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1899465N/AIllumina HiSeq 250044157740899268SRR514905844157765549383GSM2448482_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2448482blood serumblood serumhealthy control
2SRP096127SRX2467005GSM2448481: normal.ct-968; Homo sapiens; Bisul...GSM2448481: normal.ct-968; Homo sapiens; Bisul...9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1899464N/AIllumina HiSeq 250056337850951134SRR514905756337883839813GSM2448481_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2448481blood serumblood serumhealthy control
3SRP096127SRX2467004GSM2448480: normal.ct-967; Homo sapiens; Bisul...GSM2448480: normal.ct-967; Homo sapiens; Bisul...9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1899463N/AIllumina HiSeq 250042287839223860SRR514905642287862753430GSM2448480_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2448480blood serumblood serumhealthy control
4SRP096127SRX2467003GSM2448479: normal.ct-966; Homo sapiens; Bisul...GSM2448479: normal.ct-966; Homo sapiens; Bisul...9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1899461N/AIllumina HiSeq 250051725446881651SRR514905551725477004865GSM2448479_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2448479blood serumblood serumhealthy control
...........................................................................
2186SRP096127SRX2464821GSM2446284: HCC.ct-5; Homo sapiens; Bisulfite-SeqGSM2446284: HCC.ct-5; Homo sapiens; Bisulfite-Seq9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1897280N/AIllumina HiSeq 2500103320483576370SRR51468731033204196635123GSM2446284_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2446284blood serumblood serumhepatocellular carcinoma patient
2187SRP096127SRX2464820GSM2446283: HCC.ct-4; Homo sapiens; Bisulfite-SeqGSM2446283: HCC.ct-4; Homo sapiens; Bisulfite-Seq9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1897279N/AIllumina HiSeq 250084085368410342SRR5146872840853159822416GSM2446283_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2446283blood serumblood serumhepatocellular carcinoma patient
2188SRP096127SRX2464819GSM2446282: HCC.ct-3; Homo sapiens; Bisulfite-SeqGSM2446282: HCC.ct-3; Homo sapiens; Bisulfite-Seq9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1897278N/AIllumina HiSeq 250088572471407675SRR5146871885724166270272GSM2446282_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2446282blood serumblood serumhepatocellular carcinoma patient
2189SRP096127SRX2464818GSM2446281: HCC.ct-2; Homo sapiens; Bisulfite-SeqGSM2446281: HCC.ct-2; Homo sapiens; Bisulfite-Seq9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1897277N/AIllumina HiSeq 250077568462094237SRR5146870775684145671062GSM2446281_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2446281blood serumblood serumhepatocellular carcinoma patient
2190SRP096127SRX2464817GSM2446280: HCC.ct-1; Homo sapiens; Bisulfite-SeqGSM2446280: HCC.ct-1; Homo sapiens; Bisulfite-Seq9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1897276N/AIllumina HiSeq 2500112403189769302SRR51468691124031212986785GSM2446280_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2446280blood serumblood serumhepatocellular carcinoma patient
\n", "

2191 rows × 24 columns

\n", "
" ], "text/plain": [ " study_accession ... group\n", "0 SRP096127 ... healthy control\n", "1 SRP096127 ... healthy control\n", "2 SRP096127 ... healthy control\n", "3 SRP096127 ... healthy control\n", "4 SRP096127 ... healthy control\n", "... ... ... ...\n", "2186 SRP096127 ... hepatocellular carcinoma patient\n", "2187 SRP096127 ... hepatocellular carcinoma patient\n", "2188 SRP096127 ... hepatocellular carcinoma patient\n", "2189 SRP096127 ... hepatocellular carcinoma patient\n", "2190 SRP096127 ... hepatocellular carcinoma patient\n", "\n", "[2191 rows x 24 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 4 } ] }, { "cell_type": "code", "metadata": { "id": "DDyQEQcFQCNW", "colab_type": "code", "outputId": "e7b3d325-49a2-4c97-d3c9-16b4de5fea62", "colab": { "base_uri": "https://localhost:8080/", "height": 966 } }, "source": [ "df_contig_subset = df.loc[df[\"sra_url_alt\"].str.contains(\"contig\")]\n", "df_contig_subset" ], "execution_count": 5, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
study_accessionexperiment_accessionexperiment_titleexperiment_descorganism_taxidorganism_namelibrary_strategylibrary_sourcelibrary_selectionsample_accessionsample_titleinstrumenttotal_spotstotal_sizerun_accessionrun_total_spotsrun_total_basesrun_aliassra_url_altsra_urlexperiment_aliassource_namecell typegroup
0SRP096127SRX2467007GSM2448483: normal.ct-970; Homo sapiens; Bisul...GSM2448483: normal.ct-970; Homo sapiens; Bisul...9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1899466N/AIllumina HiSeq 250055954750734487SRR514905955954783216675GSM2448483_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2448483blood serumblood serumhealthy control
1SRP096127SRX2467006GSM2448482: normal.ct-969; Homo sapiens; Bisul...GSM2448482: normal.ct-969; Homo sapiens; Bisul...9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1899465N/AIllumina HiSeq 250044157740899268SRR514905844157765549383GSM2448482_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2448482blood serumblood serumhealthy control
2SRP096127SRX2467005GSM2448481: normal.ct-968; Homo sapiens; Bisul...GSM2448481: normal.ct-968; Homo sapiens; Bisul...9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1899464N/AIllumina HiSeq 250056337850951134SRR514905756337883839813GSM2448481_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2448481blood serumblood serumhealthy control
3SRP096127SRX2467004GSM2448480: normal.ct-967; Homo sapiens; Bisul...GSM2448480: normal.ct-967; Homo sapiens; Bisul...9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1899463N/AIllumina HiSeq 250042287839223860SRR514905642287862753430GSM2448480_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2448480blood serumblood serumhealthy control
4SRP096127SRX2467003GSM2448479: normal.ct-966; Homo sapiens; Bisul...GSM2448479: normal.ct-966; Homo sapiens; Bisul...9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1899461N/AIllumina HiSeq 250051725446881651SRR514905551725477004865GSM2448479_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2448479blood serumblood serumhealthy control
...........................................................................
2186SRP096127SRX2464821GSM2446284: HCC.ct-5; Homo sapiens; Bisulfite-SeqGSM2446284: HCC.ct-5; Homo sapiens; Bisulfite-Seq9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1897280N/AIllumina HiSeq 2500103320483576370SRR51468731033204196635123GSM2446284_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2446284blood serumblood serumhepatocellular carcinoma patient
2187SRP096127SRX2464820GSM2446283: HCC.ct-4; Homo sapiens; Bisulfite-SeqGSM2446283: HCC.ct-4; Homo sapiens; Bisulfite-Seq9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1897279N/AIllumina HiSeq 250084085368410342SRR5146872840853159822416GSM2446283_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2446283blood serumblood serumhepatocellular carcinoma patient
2188SRP096127SRX2464819GSM2446282: HCC.ct-3; Homo sapiens; Bisulfite-SeqGSM2446282: HCC.ct-3; Homo sapiens; Bisulfite-Seq9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1897278N/AIllumina HiSeq 250088572471407675SRR5146871885724166270272GSM2446282_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2446282blood serumblood serumhepatocellular carcinoma patient
2189SRP096127SRX2464818GSM2446281: HCC.ct-2; Homo sapiens; Bisulfite-SeqGSM2446281: HCC.ct-2; Homo sapiens; Bisulfite-Seq9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1897277N/AIllumina HiSeq 250077568462094237SRR5146870775684145671062GSM2446281_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2446281blood serumblood serumhepatocellular carcinoma patient
2190SRP096127SRX2464817GSM2446280: HCC.ct-1; Homo sapiens; Bisulfite-SeqGSM2446280: HCC.ct-1; Homo sapiens; Bisulfite-Seq9606Homo sapiensBisulfite-SeqGENOMICRANDOMSRS1897276N/AIllumina HiSeq 2500112403189769302SRR51468691124031212986785GSM2446280_r1https://sra-download.ncbi.nlm.nih.gov/traces/s...https://sra-download.st-va.ncbi.nlm.nih.gov/so...GSM2446280blood serumblood serumhepatocellular carcinoma patient
\n", "

1654 rows × 24 columns

\n", "
" ], "text/plain": [ " study_accession ... group\n", "0 SRP096127 ... healthy control\n", "1 SRP096127 ... healthy control\n", "2 SRP096127 ... healthy control\n", "3 SRP096127 ... healthy control\n", "4 SRP096127 ... healthy control\n", "... ... ... ...\n", "2186 SRP096127 ... hepatocellular carcinoma patient\n", "2187 SRP096127 ... hepatocellular carcinoma patient\n", "2188 SRP096127 ... hepatocellular carcinoma patient\n", "2189 SRP096127 ... hepatocellular carcinoma patient\n", "2190 SRP096127 ... hepatocellular carcinoma patient\n", "\n", "[1654 rows x 24 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 5 } ] }, { "cell_type": "code", "metadata": { "id": "QXPobpr-PqsR", "colab_type": "code", "colab": {} }, "source": [ "db.download(df=df_contig_subset, url_col=\"sra_url_alt\")" ], "execution_count": 0, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "Y-RVanikKONY", "colab_type": "text" }, "source": [ "## Example with a fastq file (submitted through ENA)\n", "\n", "https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=ERR1520686" ] }, { "cell_type": "code", "metadata": { "id": "TwIoS6ImKEHb", "colab_type": "code", "outputId": "4e8fa031-9f88-4bb0-f994-b4bc3aae9341", "colab": { "base_uri": "https://localhost:8080/", "height": 300 } }, "source": [ "df = db.sra_metadata(\"ERP015299\", detailed=True)\n", "df" ], "execution_count": 0, "outputs": [ { "output_type": "error", "ename": "AttributeError", "evalue": "ignored", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msra_metadata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'ERP015299'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdetailed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pysradb/sraweb.py\u001b[0m in \u001b[0;36msra_metadata\u001b[0;34m(self, srp, sample_attribute, detailed, expand_sample_attributes, output_read_lengths, **kwargs)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0;31m# detailed_record[\"run_total_spots\"] = run_set[\"@total_spots\"]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 397\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msample_attribute\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msample_attributes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 398\u001b[0;31m \u001b[0mdict_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msample_attribute\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 399\u001b[0m \u001b[0mdetailed_record\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdict_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 400\u001b[0m \u001b[0mdetailed_records\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdetailed_record\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'values'" ] } ] } ] }