{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook includes efforts in trying to better understand the relationship of contigs that have similar annotations. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"Downloads___EchinoBase_1AFC08C4.png\"/" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">PMI_010847-RA\r\n", "ANIEIFLPRIEAVVLFGEPVRWETNLQLILDMLITNGQLNHTPDHVPYPNMPVLGCNVDLMWMSEAHLPRLGHGSFMVCLESLYKKITGRDLLYTVLTGKPSQITYHCAENVLSQQARAMGCQGPLRTLYAIGDNPMTDIYGANLYNQYLTAKAAKGLQKPGSRQPAAIRQGVVMSDPHDDIHIETSAQVDVSDHPVHEIDDHVCESEALMTDNFDDGYPVTMESVLVYTGLHSKPGEVGQMDEAHLELDHGHRDFEFNKALIQPTITAQHVRDAVDIIFDREGFK\r\n", ">PMI_010846-RA\r\n", "MAAAARSLQRILPRNCSHLKNCISVFNHPKRFFSIRTELHSKHPPFGILFDIDGVIVRGKRILHEAVEAFQKLSDGEGHLRVPVVFVTNAGNKLRQNKAKQLTEWLGIKIHPDQVVMSHSPLKMFPQFHDKHVLVSGQGPIKEIAKGLGFTRVTTVEEVRDMFPHLDMVDHQRRVPAPNV\r\n", ">PMI_010849-RA\r\n", "MFQTRSEYDRGVNTFSPEGRLFQVEYAIEAIKLGSTALGIQTSEGCVLAVEKRITSPLMEISSIEKIVEVDNHIGCAMSGLIADSRTLIDRARVEAQSHWFTYNEKMSIEAVTQAVSNLAMQFGDDDADSGAMIPHGSIRHIHSSMTLKEACKEALVILKQVMEEKLSSTNVEMATVTREKKFVMFTKEEIEAIVAEM\r\n", ">PMI_010848-RA\r\n", "MSYPYHMESDDSFDYLFKIVLIGDAGVGKTCVVQRFKSGTFLEKQHSTIGVDFTMKSLTVDGKKVKLQVWDTAGQERFRTITQSYYRSANGVIIAYDITKKETFNNVPRWIEDVHKYAGNSVAQVLIGNKKDLDELRQVDFEDAQAIAQHHGMLQCLETSAKDSTNVEQTFALLAQELKRRHAGGAALDGGDSLKLNYNAKRVDGWNCCG\r\n", ">PMI_010850-RA\r\n", "MQTSGGQDHLCINPEIYCKKIQGTSVRSTSSDLPQRKYQRQLSIAMATSLGARKPTSIQRPKRWSEEVEEAYRLQLAGYRDALEYHSVKGESGYIKKLQRKDGNYYYYNRSRECADKDVNKIKLYGY\r\n" ] } ], "source": [ "!head ./misc/Pm_proteins.fasta" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "29805\r\n" ] } ], "source": [ "!fgrep -c \">\" ./misc/Pm_proteins.fasta" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating blast database" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!makeblastdb \\\n", "-in ./misc/Pm_proteins.fasta \\\n", "-dbtype prot \\\n", "-out ./misc/Pm_proteins" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Blasting**" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!blastx \\\n", "-query ./data/Phel_transcriptome.fasta \\\n", "-db ./data/Pm_proteins \\\n", "-out ./wd/Phel_blastx_Pm_e50.tab \\\n", "-evalue 1E-50 \\\n", "-max_target_seqs 1 \\\n", "-max_hsps 1 \\\n", "-outfmt 6 \\\n", "-num_threads 6" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Identifying the # of Phel contigs that match to the same PMI protein:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 4 PMI_020736-RA\r\n", " 4 PMI_023204-RA\r\n", " 4 PMI_025724-RA\r\n", " 4 PMI_028024-RA\r\n", " 5 PMI_000929-RA\r\n", " 5 PMI_001602-RA\r\n", " 5 PMI_003081-RA\r\n", " 5 PMI_017390-RA\r\n", " 5 PMI_019456-RA\r\n", " 6 PMI_000110-RA\r\n", " 6 PMI_013600-RA\r\n", " 7 PMI_010001-RA\r\n", " 7 PMI_020859-RA\r\n", " 8 PMI_015266-RA\r\n", " 10 PMI_029345-RA\r\n" ] } ], "source": [ "!cut -f2 ./wd/Phel_blastx_Pm_e50.tab | sort | uniq -c | sort \\\n", "> ./wd/Duphits_Phel_blastx_Pm_e50.tab\n", "!tail -15 ./wd/Duphits_Phel_blastx_Pm_e50.tab \n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Identifying specific Phel contigs and where the match knowing that columns are `qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore`" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Phel_contig_16086\tPMI_000929-RA\t35.38\t359\t205\t9\t16\t1056\t41\t384\t2e-59\t 211\r\n", "Phel_contig_2292\tPMI_000929-RA\t38.33\t600\t323\t13\t5150\t3441\t1175\t1757\t1e-108\t 383\r\n", "Phel_contig_6114\tPMI_000929-RA\t59.97\t657\t214\t4\t2252\t354\t1125\t1756\t0.0\t 813\r\n", "Phel_contig_619\tPMI_000929-RA\t45.31\t651\t313\t8\t5890\t4061\t1109\t1757\t2e-169\t 570\r\n", "Phel_contig_6408\tPMI_000929-RA\t45.16\t558\t271\t10\t1586\t3\t1136\t1688\t9e-151\t 481\r\n" ] } ], "source": [ "!fgrep \"PMI_000929-RA\" ./wd/Phel_blastx_Pm_e50.tab" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "This 'fgrep' can be done repeatedly and overlaps determined manually." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }