{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "04.SRA-to-fastq-conda.ipynb", "provenance": [], "collapsed_sections": [], "authorship_tag": "ABX9TyM0msA7wrU4z6plJCRVwnkf", "include_colab_link": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/saketkc/pysradb/blob/develop/notebooks/04.SRA_to_fastq_conda.ipynb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Converting SRA to FASTQ using Conda\n", "\n", "This notebook demonstrates how to convert SRA files to FASTQ format using conda and parallel-fastq-dump." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Install pysradb if not already installed\n", "try:\n", " import pysradb\n", "\n", " print(f\"pysradb {pysradb.__version__} is already installed\")\n", "except ImportError:\n", " print(\"Installing pysradb from GitHub...\")\n", " import sys\n", "\n", " !{sys.executable} -m pip install -q git+https://github.com/saketkc/pysradb\n", " print(\"pysradb installed successfully!\")" ] }, { "cell_type": "markdown", "metadata": { "id": "iP2m8GEdate2", "colab_type": "text" }, "source": [ "## Install Conda" ] }, { "cell_type": "code", "metadata": { "id": "CSbjMUkfZYgB", "colab_type": "code", "outputId": "a9953581-3914-4d73-c5a3-4d6a9090c38b", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 } }, "source": [ "!wget -c https://repo.continuum.io/archive/Anaconda3-5.1.0-Linux-x86_64.sh\n", "!chmod +x Anaconda3-5.1.0-Linux-x86_64.sh\n", "!bash ./Anaconda3-5.1.0-Linux-x86_64.sh -b -f -p /usr/local\n", "\n", "import sys\n", "\n", "sys.path.append(\"/usr/local/lib/python3.6/site-packages/\")\n", "\n", "!conda config --add channels defaults\n", "!conda config --add channels bioconda\n", "!conda config --add channels conda-forge" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "--2020-02-13 07:19:36-- https://repo.continuum.io/archive/Anaconda3-5.1.0-Linux-x86_64.sh\n", "Resolving repo.continuum.io (repo.continuum.io)... 104.18.201.79, 104.18.200.79, 2606:4700::6812:c94f, ...\n", "Connecting to repo.continuum.io (repo.continuum.io)|104.18.201.79|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 577996269 (551M) [application/x-sh]\n", "Saving to: ‘Anaconda3-5.1.0-Linux-x86_64.sh’\n", "\n", "Anaconda3-5.1.0-Lin 100%[===================>] 551.22M 131MB/s in 4.3s \n", "\n", "2020-02-13 07:19:46 (127 MB/s) - ‘Anaconda3-5.1.0-Linux-x86_64.sh’ saved [577996269/577996269]\n", "\n", "PREFIX=/usr/local\n", "installing: python-3.6.4-hc3d631a_1 ...\n", "Python 3.6.4 :: Anaconda, Inc.\n", "installing: ca-certificates-2017.08.26-h1d4fec5_0 ...\n", "installing: conda-env-2.6.0-h36134e3_1 ...\n", "installing: intel-openmp-2018.0.0-hc7b2577_8 ...\n", "installing: libgcc-ng-7.2.0-h7cc24e2_2 ...\n", "installing: libgfortran-ng-7.2.0-h9f7466a_2 ...\n", "installing: libstdcxx-ng-7.2.0-h7a57d05_2 ...\n", "installing: bzip2-1.0.6-h9a117a8_4 ...\n", "installing: expat-2.2.5-he0dffb1_0 ...\n", "installing: gmp-6.1.2-h6c8ec71_1 ...\n", "installing: graphite2-1.3.10-hf63cedd_1 ...\n", "installing: icu-58.2-h9c2bf20_1 ...\n", "installing: jbig-2.1-hdba287a_0 ...\n", "installing: jpeg-9b-h024ee3a_2 ...\n", "installing: libffi-3.2.1-hd88cf55_4 ...\n", "installing: libsodium-1.0.15-hf101ebd_0 ...\n", "installing: libtool-2.4.6-h544aabb_3 ...\n", "installing: libxcb-1.12-hcd93eb1_4 ...\n", "installing: lzo-2.10-h49e0be7_2 ...\n", "installing: mkl-2018.0.1-h19d6760_4 ...\n", "installing: ncurses-6.0-h9df7e31_2 ...\n", "installing: openssl-1.0.2n-hb7f436b_0 ...\n", "installing: patchelf-0.9-hf79760b_2 ...\n", "installing: pcre-8.41-hc27e229_1 ...\n", "installing: pixman-0.34.0-hceecf20_3 ...\n", "installing: tk-8.6.7-hc745277_3 ...\n", "installing: unixodbc-2.3.4-hc36303a_1 ...\n", "installing: xz-5.2.3-h55aa19d_2 ...\n", "installing: yaml-0.1.7-had09818_2 ...\n", "installing: zlib-1.2.11-ha838bed_2 ...\n", "installing: glib-2.53.6-h5d9569c_2 ...\n", "installing: hdf5-1.10.1-h9caa474_1 ...\n", "installing: libedit-3.1-heed3624_0 ...\n", "installing: libpng-1.6.34-hb9fc6fc_0 ...\n", "installing: libssh2-1.8.0-h9cfc8f7_4 ...\n", "installing: libtiff-4.0.9-h28f6b97_0 ...\n", "installing: libxml2-2.9.7-h26e45fe_0 ...\n", "installing: mpfr-3.1.5-h11a74b3_2 ...\n", "installing: pandoc-1.19.2.1-hea2e7c5_1 ...\n", "installing: readline-7.0-ha6073c6_4 ...\n", "installing: zeromq-4.2.2-hbedb6e5_2 ...\n", "installing: dbus-1.12.2-hc3f9b76_1 ...\n", "installing: freetype-2.8-hab7d2ae_1 ...\n", "installing: gstreamer-1.12.4-hb53b477_0 ...\n", "installing: libcurl-7.58.0-h1ad7b7a_0 ...\n", "installing: libxslt-1.1.32-h1312cb7_0 ...\n", "installing: mpc-1.0.3-hec55b23_5 ...\n", "installing: sqlite-3.22.0-h1bed415_0 ...\n", "installing: curl-7.58.0-h84994c4_0 ...\n", "installing: fontconfig-2.12.4-h88586e7_1 ...\n", "installing: gst-plugins-base-1.12.4-h33fb286_0 ...\n", "installing: alabaster-0.7.10-py36h306e16b_0 ...\n", "installing: asn1crypto-0.24.0-py36_0 ...\n", "installing: attrs-17.4.0-py36_0 ...\n", "installing: backports-1.0-py36hfa02d7e_1 ...\n", "installing: beautifulsoup4-4.6.0-py36h49b8c8c_1 ...\n", "installing: bitarray-0.8.1-py36h14c3975_1 ...\n", "installing: boto-2.48.0-py36h6e4cd66_1 ...\n", "installing: cairo-1.14.12-h77bcde2_0 ...\n", "installing: certifi-2018.1.18-py36_0 ...\n", "installing: chardet-3.0.4-py36h0f667ec_1 ...\n", "installing: click-6.7-py36h5253387_0 ...\n", "installing: cloudpickle-0.5.2-py36_1 ...\n", "installing: colorama-0.3.9-py36h489cec4_0 ...\n", "installing: contextlib2-0.5.5-py36h6c84a62_0 ...\n", "installing: dask-core-0.16.1-py36_0 ...\n", "installing: decorator-4.2.1-py36_0 ...\n", "installing: docutils-0.14-py36hb0f60f5_0 ...\n", "installing: entrypoints-0.2.3-py36h1aec115_2 ...\n", "installing: et_xmlfile-1.0.1-py36hd6bccc3_0 ...\n", "installing: fastcache-1.0.2-py36h14c3975_2 ...\n", "installing: filelock-2.0.13-py36h646ffb5_0 ...\n", "installing: glob2-0.6-py36he249c77_0 ...\n", "installing: gmpy2-2.0.8-py36hc8893dd_2 ...\n", "installing: greenlet-0.4.12-py36h2d503a6_0 ...\n", "installing: heapdict-1.0.0-py36_2 ...\n", "installing: idna-2.6-py36h82fb2a8_1 ...\n", "installing: imagesize-0.7.1-py36h52d8127_0 ...\n", "installing: ipython_genutils-0.2.0-py36hb52b0d5_0 ...\n", "installing: itsdangerous-0.24-py36h93cc618_1 ...\n", "installing: jdcal-1.3-py36h4c697fb_0 ...\n", "installing: lazy-object-proxy-1.3.1-py36h10fcdad_0 ...\n", "installing: llvmlite-0.21.0-py36ha241eea_0 ...\n", "installing: locket-0.2.0-py36h787c0ad_1 ...\n", "installing: lxml-4.1.1-py36hf71bdeb_1 ...\n", "installing: markupsafe-1.0-py36hd9260cd_1 ...\n", "installing: mccabe-0.6.1-py36h5ad9710_1 ...\n", "installing: mistune-0.8.3-py36_0 ...\n", "installing: mkl-service-1.1.2-py36h17a0993_4 ...\n", "installing: mpmath-1.0.0-py36hfeacd6b_2 ...\n", "installing: msgpack-python-0.5.1-py36h6bb024c_0 ...\n", "installing: multipledispatch-0.4.9-py36h41da3fb_0 ...\n", "installing: numpy-1.14.0-py36h3dfced4_1 ...\n", "installing: olefile-0.45.1-py36_0 ...\n", "installing: pandocfilters-1.4.2-py36ha6701b7_1 ...\n", "installing: parso-0.1.1-py36h35f843b_0 ...\n", "installing: path.py-10.5-py36h55ceabb_0 ...\n", "installing: pep8-1.7.1-py36_0 ...\n", "installing: pickleshare-0.7.4-py36h63277f8_0 ...\n", "installing: pkginfo-1.4.1-py36h215d178_1 ...\n", "installing: pluggy-0.6.0-py36hb689045_0 ...\n", "installing: ply-3.10-py36hed35086_0 ...\n", "installing: psutil-5.4.3-py36h14c3975_0 ...\n", "installing: ptyprocess-0.5.2-py36h69acd42_0 ...\n", "installing: py-1.5.2-py36h29bf505_0 ...\n", "installing: pycodestyle-2.3.1-py36hf609f19_0 ...\n", "installing: pycosat-0.6.3-py36h0a5515d_0 ...\n", "installing: pycparser-2.18-py36hf9f622e_1 ...\n", "installing: pycrypto-2.6.1-py36h14c3975_7 ...\n", "installing: pycurl-7.43.0.1-py36hb7f436b_0 ...\n", "installing: pyodbc-4.0.22-py36hf484d3e_0 ...\n", "installing: pyparsing-2.2.0-py36hee85983_1 ...\n", "installing: pysocks-1.6.7-py36hd97a5b1_1 ...\n", "installing: pytz-2017.3-py36h63b9c63_0 ...\n", "installing: pyyaml-3.12-py36hafb9ca4_1 ...\n", "installing: pyzmq-16.0.3-py36he2533c7_0 ...\n", "installing: qt-5.6.2-h974d657_12 ...\n", "installing: qtpy-1.3.1-py36h3691cc8_0 ...\n", "installing: rope-0.10.7-py36h147e2ec_0 ...\n", "installing: ruamel_yaml-0.15.35-py36h14c3975_1 ...\n", "installing: send2trash-1.4.2-py36_0 ...\n", "installing: simplegeneric-0.8.1-py36_2 ...\n", "installing: sip-4.18.1-py36h51ed4ed_2 ...\n", "installing: six-1.11.0-py36h372c433_1 ...\n", "installing: snowballstemmer-1.2.1-py36h6febd40_0 ...\n", "installing: sortedcontainers-1.5.9-py36_0 ...\n", "installing: sphinxcontrib-1.0-py36h6d0f590_1 ...\n", "installing: sqlalchemy-1.2.1-py36h14c3975_0 ...\n", "installing: tblib-1.3.2-py36h34cf8b6_0 ...\n", "installing: testpath-0.3.1-py36h8cadb63_0 ...\n", "installing: toolz-0.9.0-py36_0 ...\n", "installing: tornado-4.5.3-py36_0 ...\n", "installing: typing-3.6.2-py36h7da032a_0 ...\n", "installing: unicodecsv-0.14.1-py36ha668878_0 ...\n", "installing: wcwidth-0.1.7-py36hdf4376a_0 ...\n", "installing: webencodings-0.5.1-py36h800622e_1 ...\n", "installing: werkzeug-0.14.1-py36_0 ...\n", "installing: wrapt-1.10.11-py36h28b7045_0 ...\n", "installing: xlrd-1.1.0-py36h1db9f0c_1 ...\n", "installing: xlsxwriter-1.0.2-py36h3de1aca_0 ...\n", "installing: xlwt-1.3.0-py36h7b00a1f_0 ...\n", "installing: babel-2.5.3-py36_0 ...\n", "installing: backports.shutil_get_terminal_size-1.0.0-py36hfea85ff_2 ...\n", "installing: bottleneck-1.2.1-py36haac1ea0_0 ...\n", "installing: cffi-1.11.4-py36h9745a5d_0 ...\n", "installing: conda-verify-2.0.0-py36h98955d8_0 ...\n", "installing: cycler-0.10.0-py36h93f1223_0 ...\n", "installing: cytoolz-0.9.0-py36h14c3975_0 ...\n", "installing: h5py-2.7.1-py36h3585f63_0 ...\n", "installing: harfbuzz-1.7.4-hc5b324e_0 ...\n", "installing: html5lib-1.0.1-py36h2f9c1c0_0 ...\n", "installing: jedi-0.11.1-py36_0 ...\n", "installing: networkx-2.1-py36_0 ...\n", "installing: nltk-3.2.5-py36h7532b22_0 ...\n", "installing: numba-0.36.2-np114py36hc6662d5_0 ...\n", "installing: numexpr-2.6.4-py36hc4a3f9a_0 ...\n", "installing: openpyxl-2.4.10-py36_0 ...\n", "installing: packaging-16.8-py36ha668100_1 ...\n", "installing: partd-0.3.8-py36h36fd896_0 ...\n", "installing: pathlib2-2.3.0-py36h49efa8e_0 ...\n", "installing: pexpect-4.3.1-py36_0 ...\n", "installing: pillow-5.0.0-py36h3deb7b8_0 ...\n", "installing: pyqt-5.6.0-py36h0386399_5 ...\n", "installing: python-dateutil-2.6.1-py36h88d3b88_1 ...\n", "installing: pywavelets-0.5.2-py36he602eb0_0 ...\n", "installing: qtawesome-0.4.4-py36h609ed8c_0 ...\n", "installing: scipy-1.0.0-py36hbf646e7_0 ...\n", "installing: setuptools-38.4.0-py36_0 ...\n", "installing: singledispatch-3.4.0.3-py36h7a266c3_0 ...\n", "installing: sortedcollections-0.5.3-py36h3c761f9_0 ...\n", "installing: sphinxcontrib-websupport-1.0.1-py36hb5cb234_1 ...\n", "installing: sympy-1.1.1-py36hc6d1c1c_0 ...\n", "installing: terminado-0.8.1-py36_1 ...\n", "installing: traitlets-4.3.2-py36h674d592_0 ...\n", "installing: zict-0.1.3-py36h3a3bf81_0 ...\n", "installing: astroid-1.6.1-py36_0 ...\n", "installing: bleach-2.1.2-py36_0 ...\n", "installing: clyent-1.2.2-py36h7e57e65_1 ...\n", "installing: cryptography-2.1.4-py36hd09be54_0 ...\n", "installing: cython-0.27.3-py36h1860423_0 ...\n", "installing: datashape-0.5.4-py36h3ad6b5c_0 ...\n", "installing: distributed-1.20.2-py36_0 ...\n", "installing: get_terminal_size-1.0.0-haa9412d_0 ...\n", "installing: gevent-1.2.2-py36h2fe25dc_0 ...\n", "installing: imageio-2.2.0-py36he555465_0 ...\n", "installing: isort-4.2.15-py36had401c0_0 ...\n", "installing: jinja2-2.10-py36ha16c418_0 ...\n", "installing: jsonschema-2.6.0-py36h006f8b5_0 ...\n", "installing: jupyter_core-4.4.0-py36h7c827e3_0 ...\n", "installing: matplotlib-2.1.2-py36h0e671d2_0 ...\n", "installing: navigator-updater-0.1.0-py36h14770f7_0 ...\n", "installing: nose-1.3.7-py36hcdf7029_2 ...\n", "installing: pandas-0.22.0-py36hf484d3e_0 ...\n", "installing: pango-1.41.0-hd475d92_0 ...\n", "installing: patsy-0.5.0-py36_0 ...\n", "installing: pyflakes-1.6.0-py36h7bd6a15_0 ...\n", "installing: pygments-2.2.0-py36h0d3125c_0 ...\n", "installing: pytables-3.4.2-py36h3b5282a_2 ...\n", "installing: pytest-3.3.2-py36_0 ...\n", "installing: scikit-learn-0.19.1-py36h7aa7ec6_0 ...\n", "installing: wheel-0.30.0-py36hfd4bba0_1 ...\n", "installing: astropy-2.0.3-py36h14c3975_0 ...\n", "installing: bkcharts-0.2-py36h735825a_0 ...\n", "installing: bokeh-0.12.13-py36h2f9c1c0_0 ...\n", "installing: flask-0.12.2-py36hb24657c_0 ...\n", "installing: jupyter_client-5.2.2-py36_0 ...\n", "installing: nbformat-4.4.0-py36h31c9010_0 ...\n", "installing: pip-9.0.1-py36h6c6f9ce_4 ...\n", "installing: prompt_toolkit-1.0.15-py36h17d85b1_0 ...\n", "installing: pylint-1.8.2-py36_0 ...\n", "installing: pyopenssl-17.5.0-py36h20ba746_0 ...\n", "installing: statsmodels-0.8.0-py36h8533d0b_0 ...\n", "installing: dask-0.16.1-py36_0 ...\n", "installing: flask-cors-3.0.3-py36h2d857d3_0 ...\n", "installing: ipython-6.2.1-py36h88c514a_1 ...\n", "installing: nbconvert-5.3.1-py36hb41ffb7_0 ...\n", "installing: seaborn-0.8.1-py36hfad7ec4_0 ...\n", "installing: urllib3-1.22-py36hbe7ace6_0 ...\n", "installing: ipykernel-4.8.0-py36_0 ...\n", "installing: odo-0.5.1-py36h90ed295_0 ...\n", "installing: requests-2.18.4-py36he2e5f8d_1 ...\n", "installing: scikit-image-0.13.1-py36h14c3975_1 ...\n", "installing: anaconda-client-1.6.9-py36_0 ...\n", "installing: blaze-0.11.3-py36h4e06776_0 ...\n", "installing: jupyter_console-5.2.0-py36he59e554_1 ...\n", "installing: notebook-5.4.0-py36_0 ...\n", "installing: qtconsole-4.3.1-py36h8f73b5b_0 ...\n", "installing: sphinx-1.6.6-py36_0 ...\n", "installing: anaconda-project-0.8.2-py36h44fb852_0 ...\n", "installing: jupyterlab_launcher-0.10.2-py36_0 ...\n", "installing: numpydoc-0.7.0-py36h18f165f_0 ...\n", "installing: widgetsnbextension-3.1.0-py36_0 ...\n", "installing: anaconda-navigator-1.7.0-py36_0 ...\n", "installing: ipywidgets-7.1.1-py36_0 ...\n", "installing: jupyterlab-0.31.5-py36_0 ...\n", "installing: spyder-3.2.6-py36_0 ...\n", "installing: _ipyw_jlab_nb_ext_conf-0.1.0-py36he11e457_0 ...\n", "installing: jupyter-1.0.0-py36_4 ...\n", "installing: anaconda-5.1.0-py36_2 ...\n", "installing: conda-4.4.10-py36_0 ...\n", "installing: conda-build-3.4.1-py36_0 ...\n", "installation finished.\n", "WARNING:\n", " You currently have a PYTHONPATH environment variable set. This may cause\n", " unexpected behavior when running the Python interpreter in Anaconda3.\n", " For best results, please verify that your PYTHONPATH only points to\n", " directories of packages that are compatible with the Python interpreter\n", " in Anaconda3: /usr/local\n", "Warning: 'defaults' already in 'channels' list, moving to the top\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "4XlnJIgobDIH", "colab_type": "text" }, "source": [ "## Install parallel-fastq-dump" ] }, { "cell_type": "code", "metadata": { "id": "XYc6vNwya_2I", "colab_type": "code", "outputId": "b96f8eef-8d05-45f0-a452-814296b6fc56", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 } }, "source": [ "!conda install -y parallel-fastq-dump" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "Solving environment: - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\bdone\n", "\n", "\n", "==> WARNING: A newer version of conda exists. <==\n", " current version: 4.4.10\n", " latest version: 4.8.2\n", "\n", "Please update conda by running\n", "\n", " $ conda update -n base conda\n", "\n", "\n", "\n", "## Package Plan ##\n", "\n", " environment location: /usr/local\n", "\n", " added / updated specs: \n", " - parallel-fastq-dump\n", "\n", "\n", "The following packages will be downloaded:\n", "\n", " package | build\n", " ---------------------------|-----------------\n", " bzip2-1.0.8 | h516909a_2 396 KB conda-forge\n", " parallel-fastq-dump-0.6.6 | py_0 8 KB bioconda\n", " python-3.7.1 | h5001a0f_0 26.8 MB conda-forge\n", " ca-certificates-2019.11.28 | hecc5488_0 145 KB conda-forge\n", " _libgcc_mutex-0.1 | conda_forge 3 KB conda-forge\n", " libgcc-ng-9.2.0 | h24d8f2e_2 8.2 MB conda-forge\n", " sqlite-3.28.0 | h8b20d00_0 1.9 MB conda-forge\n", " sra-tools-2.9.1_1 | h470a237_0 38.0 MB bioconda\n", " libgomp-9.2.0 | h24d8f2e_2 816 KB conda-forge\n", " certifi-2019.11.28 | py37_0 148 KB conda-forge\n", " ncurses-6.1 | hfc679d8_2 1.3 MB conda-forge\n", " pip-20.0.2 | py_2 1.0 MB conda-forge\n", " readline-7.0 | hf8c457e_1001 391 KB conda-forge\n", " xz-5.2.4 | h14c3975_1001 366 KB conda-forge\n", " libffi-3.2.1 | hfc679d8_5 51 KB conda-forge\n", " zlib-1.2.11 | h516909a_1006 105 KB conda-forge\n", " openssl-1.0.2u | h516909a_0 3.2 MB conda-forge\n", " tk-8.6.10 | hed695b0_0 3.2 MB conda-forge\n", " setuptools-45.2.0 | py37_0 654 KB conda-forge\n", " _openmp_mutex-4.5 | 0_gnu 435 KB conda-forge\n", " wheel-0.34.2 | py_1 24 KB conda-forge\n", " ------------------------------------------------------------\n", " Total: 87.1 MB\n", "\n", "The following NEW packages will be INSTALLED:\n", "\n", " _libgcc_mutex: 0.1-conda_forge conda-forge\n", " _openmp_mutex: 4.5-0_gnu conda-forge\n", " libgomp: 9.2.0-h24d8f2e_2 conda-forge\n", " parallel-fastq-dump: 0.6.6-py_0 bioconda \n", " sra-tools: 2.9.1_1-h470a237_0 bioconda \n", "\n", "The following packages will be UPDATED:\n", "\n", " bzip2: 1.0.6-h9a117a8_4 --> 1.0.8-h516909a_2 conda-forge\n", " ca-certificates: 2017.08.26-h1d4fec5_0 --> 2019.11.28-hecc5488_0 conda-forge\n", " certifi: 2018.1.18-py36_0 --> 2019.11.28-py37_0 conda-forge\n", " libffi: 3.2.1-hd88cf55_4 --> 3.2.1-hfc679d8_5 conda-forge\n", " libgcc-ng: 7.2.0-h7cc24e2_2 --> 9.2.0-h24d8f2e_2 conda-forge\n", " ncurses: 6.0-h9df7e31_2 --> 6.1-hfc679d8_2 conda-forge\n", " openssl: 1.0.2n-hb7f436b_0 --> 1.0.2u-h516909a_0 conda-forge\n", " pip: 9.0.1-py36h6c6f9ce_4 --> 20.0.2-py_2 conda-forge\n", " python: 3.6.4-hc3d631a_1 --> 3.7.1-h5001a0f_0 conda-forge\n", " readline: 7.0-ha6073c6_4 --> 7.0-hf8c457e_1001 conda-forge\n", " setuptools: 38.4.0-py36_0 --> 45.2.0-py37_0 conda-forge\n", " sqlite: 3.22.0-h1bed415_0 --> 3.28.0-h8b20d00_0 conda-forge\n", " tk: 8.6.7-hc745277_3 --> 8.6.10-hed695b0_0 conda-forge\n", " wheel: 0.30.0-py36hfd4bba0_1 --> 0.34.2-py_1 conda-forge\n", " xz: 5.2.3-h55aa19d_2 --> 5.2.4-h14c3975_1001 conda-forge\n", " zlib: 1.2.11-ha838bed_2 --> 1.2.11-h516909a_1006 conda-forge\n", "\n", "\n", "Downloading and Extracting Packages\n", "bzip2 1.0.8: 100% 1.0/1 [00:00<00:00, 4.95it/s] \n", "parallel-fastq-dump 0.6.6: 100% 1.0/1 [00:00<00:00, 25.61it/s]\n", "python 3.7.1: 100% 1.0/1 [00:08<00:00, 8.74s/it] \n", "ca-certificates 2019.11.28: 100% 1.0/1 [00:00<00:00, 12.07it/s]\n", "_libgcc_mutex 0.1: 100% 1.0/1 [00:00<00:00, 22.60it/s]\n", "libgcc-ng 9.2.0: 100% 1.0/1 [00:02<00:00, 2.84s/it] \n", "sqlite 3.28.0: 100% 1.0/1 [00:00<00:00, 1.35it/s] \n", "sra-tools 2.9.1_1: 100% 1.0/1 [00:13<00:00, 30.18s/it] \n", "libgomp 9.2.0: 100% 1.0/1 [00:00<00:00, 3.97it/s] \n", "certifi 2019.11.28: 100% 1.0/1 [00:00<00:00, 11.58it/s]\n", "ncurses 6.1: 100% 1.0/1 [00:01<00:00, 1.22s/it] \n", "pip 20.0.2: 100% 1.0/1 [00:00<00:00, 1.83it/s] \n", "readline 7.0: 100% 1.0/1 [00:00<00:00, 4.98it/s] \n", "xz 5.2.4: 100% 1.0/1 [00:00<00:00, 5.48it/s] \n", "libffi 3.2.1: 100% 1.0/1 [00:00<00:00, 16.06it/s]\n", "zlib 1.2.11: 100% 1.0/1 [00:00<00:00, 14.34it/s]\n", "openssl 1.0.2u: 100% 1.0/1 [00:01<00:00, 2.86s/it] \n", "tk 8.6.10: 100% 1.0/1 [00:01<00:00, 1.28s/it] \n", "setuptools 45.2.0: 100% 1.0/1 [00:00<00:00, 2.78it/s] \n", "_openmp_mutex 4.5: 100% 1.0/1 [00:00<00:00, 7.34it/s] \n", "wheel 0.34.2: 100% 1.0/1 [00:00<00:00, 21.98it/s]\n", "Preparing transaction: / \b\b- \b\b\\ \b\bdone\n", "Verifying transaction: / \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\bdone\n", "Executing transaction: | \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\bdone\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "Ch5Ir5IQay4Q", "colab_type": "text" }, "source": [ "## Install latest pysradb" ] }, { "cell_type": "code", "metadata": { "id": "45_Z9nJPa0q4", "colab_type": "code", "outputId": "d9578939-0b66-4333-92fa-7a5ec4e8329e", "colab": { "base_uri": "https://localhost:8080/", "height": 684 } }, "source": [ "pip install git+https://github.com/saketkc/pysradb" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "Collecting git+https://github.com/saketkc/pysradb\n", " Cloning https://github.com/saketkc/pysradb to /tmp/pip-req-build-bd1zhhoz\n", " Running command git clone -q https://github.com/saketkc/pysradb /tmp/pip-req-build-bd1zhhoz\n", "Collecting pandas==0.25.3\n", " Using cached pandas-0.25.3-cp37-cp37m-manylinux1_x86_64.whl (10.4 MB)\n", "Collecting tqdm==4.41.1\n", " Using cached tqdm-4.41.1-py2.py3-none-any.whl (56 kB)\n", "Collecting requests==2.22.0\n", " Using cached requests-2.22.0-py2.py3-none-any.whl (57 kB)\n", "Collecting xmltodict==0.12.0\n", " Using cached xmltodict-0.12.0-py2.py3-none-any.whl (9.2 kB)\n", "Collecting python-dateutil>=2.6.1\n", " Using cached python_dateutil-2.8.1-py2.py3-none-any.whl (227 kB)\n", "Collecting numpy>=1.13.3\n", " Using cached numpy-1.18.1-cp37-cp37m-manylinux1_x86_64.whl (20.1 MB)\n", "Collecting pytz>=2017.2\n", " Using cached pytz-2019.3-py2.py3-none-any.whl (509 kB)\n", "Collecting idna<2.9,>=2.5\n", " Using cached idna-2.8-py2.py3-none-any.whl (58 kB)\n", "Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1\n", " Using cached urllib3-1.25.8-py2.py3-none-any.whl (125 kB)\n", "Collecting chardet<3.1.0,>=3.0.2\n", " Using cached chardet-3.0.4-py2.py3-none-any.whl (133 kB)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/site-packages (from requests==2.22.0->pysradb==0.10.3.dev0) (2019.11.28)\n", "Collecting six>=1.5\n", " Using cached six-1.14.0-py2.py3-none-any.whl (10 kB)\n", "Building wheels for collected packages: pysradb\n", " Building wheel for pysradb (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for pysradb: filename=pysradb-0.10.3.dev0-py3-none-any.whl size=147407 sha256=b498f377cda436cca6ee34c470c8aabcbe9a75f5fe8af7a5e6c56796c1be9041\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-a6fjccpo/wheels/3f/06/98/98805e85e0909f2d0920ce73557c06d3802e4baaa2616920e8\n", "Successfully built pysradb\n", "Installing collected packages: six, python-dateutil, numpy, pytz, pandas, tqdm, idna, urllib3, chardet, requests, xmltodict, pysradb\n", "Successfully installed chardet-3.0.4 idna-2.8 numpy-1.18.1 pandas-0.25.3 pysradb-0.10.3.dev0 python-dateutil-2.8.1 pytz-2019.3 requests-2.22.0 six-1.14.0 tqdm-4.41.1 urllib3-1.25.8 xmltodict-0.12.0\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "chardet", "idna", "pandas" ] } } }, "metadata": { "tags": [] } } ] }, { "cell_type": "markdown", "metadata": { "id": "IPNGntg4bOGI", "colab_type": "text" }, "source": [ "## Get metadata" ] }, { "cell_type": "code", "metadata": { "id": "jGAxadtxbPoM", "colab_type": "code", "outputId": "dd123c11-bf3e-45a1-e260-b569ab72960c", "colab": { "base_uri": "https://localhost:8080/", "height": 71 } }, "source": [ "!pysradb metadata --detailed \tSRP063852" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "study_accession experiment_accession experiment_title experiment_desc organism_taxid organism_name library_strategy library_source library_selection sample_accession sample_title instrument total_spots total_size run_accession run_total_spots run_total_bases run_alias sra_url experiment_alias source_name cell line\n", "SRP063852 SRX1254413 GSM1887643: ribosome profiling; Homo sapiens; miRNA-Seq GSM1887643: ribosome profiling; Homo sapiens; miRNA-Seq 9606 Homo sapiens miRNA-Seq TRANSCRIPTOMIC size fractionation SRS1072728 N/A Illumina HiSeq 2000 31967082 626381849 SRR2433794 31967082 916773615 GSM1887643_r1 https://sra-download.st-va.ncbi.nlm.nih.gov/sos2/sra-pub-run-3/SRR2433794/SRR2433794.1 GSM1887643 HEK293 HEK293 \n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "7gdqjIpabSpK", "colab_type": "text" }, "source": [ "## Download data" ] }, { "cell_type": "code", "metadata": { "id": "aZOe5BSrbU3H", "colab_type": "code", "outputId": "ce236993-6074-4986-9555-cf66818abc0e", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 } }, "source": [ "!pysradb download -y -p SRP063852" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "The following files will be downloaded: \n", "\n", "study_accession experiment_accession experiment_title experiment_desc organism_taxid organism_name library_strategy library_source library_selection sample_accession sample_title instrument total_spots total_size run_accession run_total_spots run_total_bases run_alias srapath_url experiment_alias source_name cell line download_url\n", " SRP063852 SRX1254413 GSM1887643: ribosome profiling; Homo sapiens; miRNA-Seq GSM1887643: ribosome profiling; Homo sapiens; miRNA-Seq 9606 Homo sapiens miRNA-Seq TRANSCRIPTOMIC size fractionation SRS1072728 N/A Illumina HiSeq 2000 31967082 626381849 SRR2433794 31967082 916773615 GSM1887643_r1 https://sra-download.st-va.ncbi.nlm.nih.gov/sos2/sra-pub-run-3/SRR2433794/SRR2433794.1 GSM1887643 HEK293 HEK293 \n", "\n", "\n", "Total size: 626.4 MB\n", "\n", "\n", "SRP063852/SRX1254413/SRR2433794: 0% 0/1 [00:00