{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## Install biopython\n", "\n", "in the terminal run the following command:\n", "\n", "conda install -c https://conda.anaconda.org/anaconda biopython" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# importing some features of Biopython\n", "from Bio import SeqIO" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_1\r\n", "TGAGTTTGAATCCCCGCCTGGCCAGTCTCCGTGATTATTGACATACAACATTGCCTTTTG\r\n", "TTGTCATTAGTCTCTCAACACTGATTCAAATGGGAAAGTTATCAATTACTTGTAGGGATG\r\n", "TGTGCTGGAGTACTTGTTAACCATTCAGGGTCCCAGGAAGTAGTGGTTAAGTTAACTGCA\r\n", "ATCTCATTAAAATTATATCGACTTTAGGCTTCCATACACTACGCACAGTAGATCTGATGG\r\n", "TGTTGCATAGAACGTCACATCAGATTGCCCTTTCTGCCAGCCCATGTTTAATGTAAATGA\r\n", "TTTTCAGAAATTACAGAAGTCAGGGATGATACAGGGACTTTTTTTATTTATACATTAATG\r\n", "ACACAAGATCAGTCTGAGGTGACCTTTAATGCAATGCTGCCAGCTCTACTGTGTGTAGGG\r\n", "TATGGAAGTTGATCTGATTTTAATAAGATCGTCTGTAAGTCTTCGATGACATAAAAGTGA\r\n", "TGCTAAATATTCAAACAANNAAAAATAATCGGGCAAAATAATATTTGTTTACTTACTTGT\r\n" ] } ], "source": [ "# seeing a fasta file\n", "!head \"/Users/migueldelrio/mdelrio2-btae-nb/data/Geo_Female.fa\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "datafile=\"/Users/migueldelrio/mdelrio2-btae-nb/data/Geo_Female.fa\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# you have to know which contig you are looking for " ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# defining the contig to search\n", "search_contig= \"Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_105\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "> Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_105\t1999\n", "AGTTAAATTAAAGTCTTCACAGAGAACTGCGGGAATAACTTGAAAGAAGGTGACGTCGGGTAGTGGAGGAAATCCATGACAGTTTAGGACGTTTTTCGAACGGTTGGAACAGTAGTGAACATCAGCAAGTAGACGCTTGCACAGATGCATGTCCTGCAACGTGCAGATACGAGCCCGGGACAGAAACCGGATGCACTGTGTAAAGTGTGCGGCGACAAAGCGTCCGGTAAACACTACGGCGTTCAGAGCTGTGACGGATGCAGGGGATTTTTCAAGCGTAGCATCCGCCGAAGTTTAGAGTATGTATGTAAGGAGAACGGGAGCTGTATTGTGGACGTGGCCCGCCGGAACCAGTGTCAGGCATGCAGGTTCAAGAAATGTCTAGATGTCAATATGAATAAAGACGCGGTCCAACATGAACGCGCGCCTCGGTGCTACCAGTATCGGAGAGAAAGCCAAGATTTCCCCAGGGCAAGTGAAGAGCGGCATTCTCCCTACCCCACGGGGGAATACTTGGACAGAGCCAGGTCAGAATTTCGCCACTTTCCGTCATATTCATCACCAGCAGGGTTCACACCGGAGTACGGATTCAACGCTGGCTTCCACTCCAATGCATTCCACATGAACGACATCCGACGAGCGAGTCAACATCAGTACCCCAGTCACATTACACACAACATCAGCCCTCTGTCAGAAACCGGCAGCTCCAGTCATTTCCACTTAAACCGATATCCAAATGACGCACCTCCGCATTTTGGAAATTTTAACGGAAGGCTGGACCGAAGTTTTGGACTGACGAACGCATTTGCGCCTGGAAGACGACCTCACATTAATCATTTCCCTGCACCACCAGCAAGCATTTCCGGCAACTTCATGAACAACATCACACAAGGCAGGCCGTTGTTATTAGAGCCCAGACACGAACCAAGTAGAAACGGAAGTCATTCGAATCATTCTACGGAGATTGATGACATGAAGACGGAGAAAGCCGACTCCGAAGATGAACTCCCCGATAAGGTTTCTGTGTCCAGATCTCCCTCACCATCAACATCGGAAGGGGCAAGCCCTAAGTCTGTAGGAGACAGCGGTATCATGGCGGACCGAAGTCCCTCCCCTGCAGCGTCTAAGTCATCCACAGAATGTAATGATGTCAAAGATTCTCCGGACAACACTTCAAGCGGCAGTCCTATTGTTTCATCAACCGTGACGTCGTCTGCTCCAGATCACGTGCCGCCCTTTGTTCCTAAATACATGCCCAGCAATAGTCTATTCAACATCGCATCACTGTATCCCAACTACAACAACCGAAACGAAAGCCAGATGATGTCTAACTCTCCACACACGCCGTTGGATTCAACATACGAAACTGCGGCTAAATTGCTGTTTATGTCCATAAAATGGGCGCGCAATATCCCATCATTCCTCTCACTTCCGTTCCGGGATCAGGCCATTCTATTGGAGGAGTCCTGGAGTGAACATTTCATATTAAGTGCTGCGCAATGGTCACTTCCGCTAGATATAAGTGCTTTAATGGCGTCAAATGGGTACTCCGCAACCGACCAATCAGATCGTGCCATGATAATCAACGCCCAGCTGAAGACTTTACAGGAAGTGATGTCACGCTTCGCCGCAATGAGAGTGGATTCTACAGAGTACGCATGTCTGAAGGCTCTTGTTCTGTTTAAGTCGGAGATAAAAGGTCTGCGAGATTATCTCCATGTTGAGATGCTGCAGGATCAAGCCCAGGTGATGTTGGGCGACTACACGTACACAAACCACCCAACCAGCAAGACCCGCTTCGGCAAGCTCCTCCTCCTCCTCCCCATCCTCAGGACCATTAGCCCCCGCTCCATCGAGGAGGTGTTCTTCCGGCGCACCATTGGCAGCATACCTATTGAGCGACTCCTCTGTGACATGTTCAAATCTAGTTAACAAGTCAGATACATTTGTATGTTATGTATGGTGGGTTAGCAGAGCCAACACGAACTTATAACTGACC\n" ] } ], "source": [ "for seq_record in SeqIO.parse(datafile, \"fasta\"):\n", " if seq_record.id== search_contig:\n", " print(\"> \"+seq_record.id+ \"\\t\"+str(len(seq_record)))\n", " print(seq_record.seq)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.0" } }, "nbformat": 4, "nbformat_minor": 0 }