{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Lets look at the fasta file" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">gi|768001213|ref|XM_011527774.1| PREDICTED: Homo sapiens DNA (cytosine-5-)-methyltransferase 1 (DNMT1), transcript variant X3, mRNA\r\n", "AAAATGGACCGTGGATTCCCCCGTAGCTCCCTGGTGGCTAGAAACTAGGCGGGGTGGGGGTTCTCTTTTG\r\n", "ATCCCCAAATACAGCAAGCTTTGGGCTCAAAGATTTGGAAAGAGACAGCTTAACAGAAAAGGAATGTGTG\r\n", "AAGGAGAAATTGAATCTCTTGCACGAATTTCTGCAAACAGAAATAAAGAATCAGTTATGTGACTTGGAAA\r\n", "CCAAATTACGTAAAGAAGAATTATCCGAGGAGGGCTACCTGGCTAAAGTCAAATCCCTTTTAAATAAAGA\r\n", "TTTGTCCTTGGAGAACGGTGCTCATGCTTACAACCGGGAAGTGAATGGACGTCTAGAAAACGGGAACCAA\r\n", "GCAAGAAGTGAAGCCCGTAGAGTGGGAATGGCAGATGCCAACAGCCCCCCCAAACCCCTTTCCAAACCTC\r\n", "GCACGCCCAGGAGGAGCAAGTCCGATGGAGAGGCTAAGCCTGAACCTTCACCTAGCCCCAGGATTACAAG\r\n", "GAAAAGCACCAGGCAAACCACCATCACATCTCATTTTGCAAAGGGCCCTGCCAAACGGAAACCTCAGGAA\r\n", "GAGTCTGAAAGAGCCAAATCGGATGAGTCCATCAAGGAAGAAGACAAAGACCAGGATGAGAAGAGACGTA\r\n" ] } ], "source": [ "!head /Users/sr320/git-repos/course-btea/data/Hsapien_DNMT.fasta" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!blastn \\\n", "-query /Users/sr320/git-repos/course-btea/data/Hsapien_DNMT.fasta \\\n", "-db /Users/sr320/Desktop/big-data/db/Geo_Female \\\n", "-task blastn \\\n", "-outfmt 6 \\\n", "-out /Users/sr320/Desktop/big-data/DNMT_blastn_GeoFemale.out" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23384\t70.89\t962\t258\t9\t3846\t4802\t1744\t800\t1e-125\t 452\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_16683\t72.09\t326\t91\t0\t3438\t3763\t69\t394\t4e-43\t 178\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_58769\t71.83\t213\t58\t2\t1392\t1603\t2\t213\t6e-22\t 107\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_26850\t70.99\t131\t38\t0\t1242\t1372\t174\t304\t2e-09\t66.2\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_244\t81.82\t44\t4\t1\t2091\t2134\t1382\t1343\t0.021\t42.8\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_15879\t82.50\t40\t7\t0\t5079\t5118\t40\t79\t0.074\t41.0\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_14775\t96.00\t25\t1\t0\t958\t982\t2494\t2470\t0.074\t41.0\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_62777\t95.83\t24\t1\t0\t2177\t2200\t61\t38\t0.26\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_44078\t89.29\t28\t3\t0\t489\t516\t16\t43\t0.91\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_32312\t95.65\t23\t1\t0\t175\t197\t136\t158\t0.91\t37.4\r\n" ] } ], "source": [ "!head /Users/sr320/Desktop/big-data/DNMT_blastn_GeoFemale.out" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23384\r\n", "GAGATGAAAATGCAATAAACATTATTTCCATACCTTTATCTAGGAATTACCAATTTATCT\r\n", "TACAATCTTAATGTAAACATTTATAGATCATGCAAGTTTTACAATTAATCAGTTAGGTAG\r\n", "TATAACTTGTATTCATTTAATTTCTCAATTTCATGTACATATAATATTATATTCCATACA\r\n", "TACATTTAGTCAAATCAATTGTAACAATGATCCAAATTTGAATTGTGACAAAACGCAAAA\r\n", "TCGATCCATACCAGAAATCACTACATGTCTTTCATTCCATTCATTTTGTAAACTACAATA\r\n", "TATTTCCATATAACAGGGAAACTTTTGAACACAGTTGTGGTTTATATTCATTCTGATGTA\r\n", "TATTCATGACAAATTCCTGAAGAAGCTGGACCTAGTATGTTGATATTACCAAACTTATGA\r\n", "ACATGATTTCCGTCCAAACTAGGGTTTCAGGTTGTTTTATATCCTTTATTAATTACTCGT\r\n", "CTGTATAACAACCTGAATATGTATCTTGTTATGGCATTTGTGTCACGTGTGCCATTTTTA\r\n", "TGATCTACACCGCAATGTCAATAGAATCAGTCCAATGTCAGGTAAATCAAAATGCCCATT\r\n", "TCTTTTAAACACAGTCAACTTTAGTTTTCTAGTCATATTGCTCAACTGTCCTTTCTTAAC\r\n", "TTGTAGAACTTCCTGCAGTTTCCTCTACCTTCACCTGTTCAACTTCCATTTCTCCATCTT\r\n", "TCTCTTCCTTCTTTACATCATCTTTCTTCTCTTCTTCGGCTACAGTTCTGAGGTTTGTTG\r\n", "TTGTGGCAGCCAGGCACTTCTTAATCTCCAGACCAATAGCTCTGGCCATTGGTGGAGGTA\r\n", "CGGCATTGCCAACCTGTCTGTGGCGATCCAGGATGGAACCAAAGAATCTGTAAGTGTCCG\r\n", "GGAATCCTTGGGATCGGGAACACTCTCTAACACTGACCACGCGGTGTTGTTCAGGATGTA\r\n", "ATACACGACCCTGTTTTCCCATTGGTTCAGGATTAGTGACTGTCGTACTGAAGAATCCAT\r\n", "CCCACTCCAGTCTCCCGTACAGACCTGCCCAGTGGTTGTGCCGGTTCCCTGTATGTGGCA\r\n", "GACACCAGGGTACCAGGGTGTTGAACTGACGGTCCATAGGCTCGCATGATTTACCTTCAG\r\n", "CACAGGTACAAACACCTCTCAGGTTTCCATTGTCACTCTTACCATTCTTCTTGTCATGGT\r\n", "GAGTGTAACGTAATTTCTTCGACATTGTTCCGTCCGACAGTCTGACCTCCATGTTAGGAA\r\n", "GGTCTCTCCAGTCTGACCCTGGGGCCAGGGGAATATGTTGCATACGGGCATGAACAAGGG\r\n", "GGTTCATGTCTTTACAGATATGGTCCCGCAAAACGGGCTGGTGCTGTTTCCCTCTGATCA\r\n", "ATCTCTGGAAGTGCGACACCGTGTCGCTGTTGTATGATATCTCCTCACGTTTGTGTCCGT\r\n", "TTTTAATCTCTGGCAGGTCAGACATAGTGTCCCTCACAGTGATTGTTCTATATGGTGCAG\r\n", "AATCTGTACGCATGATGTTGGACACAAACTTCTTGTCATCTACCATTACAGACAGGGTCA\r\n", "TGGCCCTTGGAGCAAACACATGACAGGGTTCTGGATAATATGGTAGCTTCTCTCCTGGTG\r\n", "CCGCAGCCAGAATTATGGCCCTTCTCCTGGTCTGTGCCACGCCGTAACTACCTGCCTGCA\r\n", "GTACCCCGAATGTACACTGGTACCCCATCTTGATGAGACAACGGAGAGCCAGCTTGAGCA\r\n", "CCAT\r\n", ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23385\r\n", "TTTGCAATTGCTGAGATTGATTACAAAATTTGTATACTGTTACTGAAAGTTACAAAAATT\r\n", "ATATATATTTTATCTTGAAAATAATACAGCCTCATATAAAATTGGACAGCAGAGTAGTGA\r\n", "ATATAGGATGTTGTCGGTTAAGGCTATAGGTCACAGGTTTGATCACAGGCTCGAACATTA\r\n", "TAGCCTCTCAAATATTCATCACTCATTGTGTTTCTTTTTTTTTTTTGAATGGGGTTTAAT\r\n", "GCCATTTTCAACACATTTCAGGTTATATATGGCATATCCAGTTTTACTGATGGTGGAGAC\r\n", "CTTAGA\r\n", ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23386\r\n", "TAGTGGTCACTTCTATTAAGCATCCACTTGTCTTAAGCAGCCATAATTTCTTTTTCCCAT\r\n", "TTCTATGTTTAAAAAGCAAATAACCTGTTTTAAACAACAACCTGTCTTAAAGGGTCACTT\r\n", "TTGCAAATATCCTTGACTGGCTGCTAAAGACAGGTTCATTGTATACTCATACATAACCAA\r\n", "CATCCTCCTGCATTGCAAGTAACTGGTCAACTTTCTCACATACAATCGTCTGACCAAGTT\r\n", "TGATTCTAAAACCTGACTCCACCG\r\n", ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23387\r\n", "GTTAAAAAACAATTACCTTGGAAAACATGAACAGAATTAACAAATATAGCTCATTTGGCC\r\n", "ACATGTGAAAATAGTTCTTAAATCAGGTACAAAATACAAGACAAGAATATGCACAATATA\r\n", "TAAATGTCAAAAGCACTATTGGAAGCCTCCTCTGAAACAATGTTACTCTTTAATACTAAT\r\n", "GAAGCACTTGCTAAATTGCACTCTTATATTTAATACCATATCATCTTCAATAAGCACCAA\r\n", "TTGTAGTAATTTTGTTACATGAACAATTACACACACATTACTACAATGACCGGTACAGAC\r\n", "ATAACGGTACAGACTTTTCAAACAAAACTGCAATGACCAGTACGGACATTACAGCAAATA\r\n", "CTTTCACAAACATTACTAAAATGATAGGTAGACATAAC\r\n", ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23388\r\n", "TTCAAGCCATGACCCTAGAGTCAAAATTGACCACACCCCAGTGACATATCAATTAAAACT\r\n", "GTTGAAATTTTAATTAAACAATAAGCAACCATCCCAATTAATTTATATTGGTATAAATGG\r\n", "TCCATATGAGTTTCAATTCTGTATAACAGTTAAGACTGGGCTGTTTATAATGGATTTTAT\r\n", "ATCCATAAGGGACGAGTAGAGGCCCAAATAAAGTATTAAATTTGTCTGGTATTAAATATA\r\n", "TTAACTATGCCCNNNNNNNNNNNNNNNNNNNNNACCTTTTTTTTATTGGATTTTTTTGTC\r\n", "GCCCAGGAAATCTGGCAACTCTTAAGAATGGTAATTGTTTAGACTGAAAAGCCTCATGTG\r\n", "ATTGGATAATAATGTAGTTCATTATTATATGCGTTGCTTAATTATTTCCAAATCATTTCA\r\n", "AACTACCTGAGAGAAAGAATTTCATAATTTTTAATGTATTTGTTTCAT\r\n", ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23389\r\n", "AGCAAGGCTTCAAAAGTGTTCATGACTCAATTGTGTGGGAGCTCTCAACAACTTATTTCA\r\n", "ACATGGCCACCCTGCTGCAGGATTATGCCCCTTTGTCATCGGTAAACAAGGGAGAGGTTG\r\n", "AGAAAGAAATTGTTACATTGATGAATAAATCACTAAAATACTGTAAAACAGAAGTGGGGA\r\n", "ATGCAAGCCAGCCAATGTACCAGTACCGTGCGGCCACAATACATCACCGACTGGCATCCT\r\n", "TGTTCCATAGCACAGTCAGAGGAAATTGTTCAGACCAGAAGAAGAAACACCTAAGACATT\r\n", "TGTGTGAGAGTCACTACAACCAGGCTGTGAAACTGTTCCGGGTGATGGAGTGTCACACAG\r\n", "AAGGTTTGCAGGTCCAGTTGGAATACGTAGCTCTGCTTGAATACTGTTTAACATTACAAG\r\n", "GAAATCCAGGAAGTCGTCTTAGATTGGTTCTGCAGATGTTGCATTGTCTCACAGATTGCC\r\n", "AGGAGTCTCTGAAGGGCTTTATTGTGCTGCTTGATGACAAAGAATATGAAACCAATCTAC\r\n", "AGGGAGAAGGGGAAACAATTACAAAGATAATGGAATCTAGACTACAGTTTGTGCTGTTGC\r\n", "AGTTAATGAAGGCGTATAGTATTGTAACGTCAAAGAAAAGTAAGGAGAAACTTTGCCTTG\r\n", "TTGACGAGTTGAAAGTTCTTTATGCCAAAAGTATACAACGGACTTCAGTTGGAAAGACAG\r\n", "AGGATGCTTTAAGGGACCGGTGTTTGTTCTTACAAGGGATATTAGAACAATCTTACATTT\r\n", "TTTATCAAAGA\r\n", ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23390\r\n", "GTTGGCTTACTGAGTCATACACGTTTTACGTCCTAATCTCGGCCACTCCCCTGACTAAGT\r\n", "ATAATATATTTTTAATCAATGTGAAAATAATTTACAAACAGTAAAAAGTAATCATACAAA\r\n", "GTTGGCCGCAACTGAAACTGGAAACTTACACACTGTTTACAAACAACTTTATAATATAAA\r\n", "TGTAAGGTATTTCCGATACACCTGTTTGTCATTACCGATTTAAATAAATCTTGCTTCAGT\r\n", "AATTTATACACAGAATGTGACACACATACATTGGAAATACGACATCTGAGACTTGCGGGT\r\n", "ACATT\r\n", ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23391\r\n", "TATTATTTTTAGTTGATAAGGTGTATGTCGATTTCCTTTGTTTCTAATTTTATAAAGTTT\r\n", "ACCAGAGCCAGTCCATTCCATCTCTTGCATATCTGCAGCTTTTTTTGACACTTTAGATTT\r\n", "TTTTGCTGACCTGTTTGACGTTGCAATGGGCTCAGTTTTGATGGCTAAAGTTGGTTGACC\r\n", "CTTCTTAGCACCTGCTTCCAATTTAACATTTTCTTTAACTTCAGTTCTCTCACTTTCATC\r\n", "TTNNCAAAGCATTTTCACCCCTTTCTTGCATGTCTTGATTTTCATCTTCAGGTTTTTTGT\r\n", "TTGCTTCTTCATCTTTATCTTTATTCTGTGGACATTCGCTGTTCTTGAGCTCTGTCTCTT\r\n", "TTCTCAGCACCTCCTCTTCACTTTTTTTCTCTACTTTATTAGGCACAGATTTTGCTGGCT\r\n", "TCCTTTTAGTATTTGGATTTGTTTCAACTTTGGAAACAGTTTCTGACGATTTGGCCATGC\r\n", "TATCAAC\r\n", ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23392\r\n", "TATTGTTCCAGTAGTCAAGGTGGAAAACTACTTGTTTATTTGTTGTCTAAACACAACTAA\r\n", "TTTATTAGAAAAAGGTTACGCCTCGGATAAGACTTTCCATAAAAACGTTTTTAAAACCTT\r\n", "ACATATTTTGAAGAAAAAAAACGCAGATACACCTACAAATAAGGCCTTTAAAATGCAATA\r\n", "TCACAATGGTATTATACAATTTGTTGTATGAGACCTTTANNNNNNNNNNNNNNNNNNNNN\r\n", "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTT\r\n", "GTTGTATGAGACTTTTAAAATGCAATATCACAATTGTATTAAACAATTGATTGTATGAGA\r\n", "CCTTTAAAATGCAATATCACAATGGTAAAAATAGTTAATTTAAACTGATTGCAACACAAA\r\n" ] } ], "source": [ "!grep -A 100 \"Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23384\" \\\n", "/Users/sr320/Desktop/big-data/db/Geo_Female.fa" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23\r\n" ] } ], "source": [ "!grep -w \"Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23\" \\\n", "/Users/sr320/Desktop/big-data/db/Geo_Female.fa" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_7892\t70.52\t1472\t394\t14\t3339\t4802\t2334\t3773\t0.0\t 672\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_7892\t65.76\t885\t271\t13\t1242\t2122\t216\t1072\t4e-50\t 201\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_7892\t75.47\t53\t10\t1\t2907\t2959\t1887\t1936\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_67788\t85.00\t40\t6\t0\t1912\t1951\t129\t168\t0.002\t46.4\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_42581\t88.24\t34\t4\t0\t2110\t2143\t196\t163\t0.007\t44.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_497\t81.82\t44\t4\t1\t2091\t2134\t667\t706\t0.024\t42.8\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_15090\t96.00\t25\t1\t0\t958\t982\t180\t204\t0.083\t41.0\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_14952\t85.29\t34\t5\t0\t2106\t2139\t307\t340\t0.29\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_11202\t95.83\t24\t1\t0\t2177\t2200\t627\t604\t0.29\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_52144\t81.58\t38\t7\t0\t1334\t1371\t165\t202\t1.0\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_30714\t89.29\t28\t3\t0\t1916\t1943\t223\t196\t1.0\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_26715\t77.08\t48\t11\t0\t3799\t3846\t106\t59\t1.0\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_7137\t95.65\t23\t1\t0\t3348\t3370\t744\t722\t1.0\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_4441\t89.29\t28\t3\t0\t489\t516\t436\t409\t1.0\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_64323\t87.88\t33\t2\t2\t669\t700\t235\t266\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_64033\t91.67\t24\t2\t0\t933\t956\t51\t74\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_55564\t86.21\t29\t4\t0\t5140\t5168\t82\t110\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_41776\t86.21\t29\t4\t0\t1890\t1918\t264\t236\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_36848\t91.67\t24\t2\t0\t1327\t1350\t49\t26\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_29401\t91.67\t24\t2\t0\t5133\t5156\t24\t1\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_13879\t86.21\t29\t4\t0\t951\t979\t399\t427\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_7592\t88.89\t27\t3\t0\t584\t610\t2647\t2673\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_5389\t86.21\t29\t4\t0\t5085\t5113\t508\t480\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_5095\t80.00\t40\t5\t1\t943\t979\t78\t39\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_3718\t100.00\t19\t0\t0\t4583\t4601\t503\t485\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_3213\t91.67\t24\t2\t0\t2177\t2200\t1666\t1643\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_544\t81.08\t37\t7\t0\t5082\t5118\t662\t626\t3.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tGeo_Pool_M_CTTGTA_L006_R1_001_val_1_(paired)_contig_421\t78.43\t51\t9\t2\t599\t647\t383\t433\t3.5\t35.6\r\n" ] } ], "source": [ "!blastn \\\n", "-query /Users/sr320/git-repos/course-btea/data/Hsapien_DNMT.fasta \\\n", "-db /Users/sr320/Desktop/big-data/db/Geo_Male \\\n", "-task blastn \\\n", "-outfmt 6 \\\n", "-out /Users/sr320/Desktop/big-data/DNMT_blastn_GMale.out\n", "!cat /Users/sr320/Desktop/big-data/DNMT_blastn_GMale.out" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_9003\t69.95\t1764\t476\t15\t3071\t4801\t2838\t4580\t0.0\t 773\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_9003\t65.66\t926\t275\t18\t1208\t2122\t954\t1847\t1e-47\t 194\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_9003\t70.73\t123\t30\t2\t2358\t2477\t2104\t2223\t2e-06\t57.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_24892\t96.15\t26\t1\t0\t779\t804\t1272\t1247\t0.049\t42.8\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_6739\t80.43\t46\t9\t0\t951\t996\t1668\t1713\t0.049\t42.8\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_71311\t87.50\t32\t4\t0\t831\t862\t355\t386\t0.17\t41.0\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_684\t90.00\t30\t3\t0\t950\t979\t3323\t3294\t0.17\t41.0\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_46559\t100.00\t21\t0\t0\t498\t518\t196\t176\t0.60\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_28543\t100.00\t21\t0\t0\t777\t797\t798\t818\t0.60\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_21846\t95.83\t24\t1\t0\t5093\t5116\t1160\t1137\t0.60\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_16355\t83.33\t36\t6\t0\t2168\t2203\t1110\t1075\t0.60\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_6110\t84.21\t38\t5\t1\t948\t984\t4613\t4576\t0.60\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_5264\t90.32\t31\t2\t1\t5148\t5178\t112\t141\t0.60\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_4740\t82.05\t39\t7\t0\t831\t869\t1857\t1819\t0.60\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_97293\t89.66\t29\t1\t1\t5043\t5071\t199\t173\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_86982\t100.00\t20\t0\t0\t4842\t4861\t30\t11\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_53456\t81.58\t38\t7\t0\t5076\t5113\t528\t565\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_28676\t82.50\t40\t6\t1\t3333\t3371\t991\t1030\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_20096\t86.67\t30\t4\t0\t2000\t2029\t1120\t1091\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_19663\t82.86\t35\t6\t0\t831\t865\t1619\t1585\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_16365\t100.00\t20\t0\t0\t777\t796\t1808\t1789\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_11130\t92.00\t25\t2\t0\t5151\t5175\t83\t59\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_4991\t89.29\t28\t3\t0\t3520\t3547\t2117\t2144\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_4668\t92.00\t25\t2\t0\t4145\t4169\t91\t115\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_4054\t92.00\t25\t2\t0\t848\t872\t826\t802\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_2720\t92.00\t25\t2\t0\t2110\t2134\t378\t402\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_2346\t92.00\t25\t2\t0\t1676\t1700\t1938\t1962\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_1880\t85.71\t35\t4\t1\t843\t876\t2888\t2854\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_1768\t92.00\t25\t2\t0\t2176\t2200\t912\t936\t2.1\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_81111\t75.47\t53\t10\t1\t725\t774\t217\t165\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_65809\t100.00\t19\t0\t0\t5164\t5182\t566\t548\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_55860\t88.89\t27\t3\t0\t3339\t3365\t97\t71\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_52223\t75.47\t53\t10\t1\t725\t774\t208\t156\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_50895\t95.45\t22\t1\t0\t172\t193\t182\t203\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_38484\t91.67\t24\t2\t0\t2177\t2200\t867\t844\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_34942\t86.21\t29\t4\t0\t2107\t2135\t1631\t1603\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_33359\t91.67\t24\t2\t0\t2177\t2200\t1815\t1838\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_28107\t91.67\t24\t2\t0\t4861\t4884\t408\t385\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_27001\t87.10\t31\t2\t1\t778\t808\t329\t357\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_25431\t91.67\t24\t2\t0\t3589\t3612\t848\t825\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_24742\t100.00\t19\t0\t0\t498\t516\t1251\t1233\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_16310\t87.88\t33\t1\t2\t179\t211\t1517\t1488\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_14315\t79.49\t39\t8\t0\t834\t872\t385\t423\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_10940\t87.10\t31\t3\t1\t3433\t3462\t928\t958\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_8377\t91.67\t24\t2\t0\t2177\t2200\t1585\t1562\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_8335\t86.21\t29\t4\t0\t954\t982\t4634\t4606\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_5225\t91.67\t24\t2\t0\t2177\t2200\t535\t558\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_2824\t88.89\t27\t3\t0\t5136\t5162\t547\t521\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_2813\t91.67\t24\t2\t0\t1925\t1948\t1171\t1148\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_1512\t81.08\t37\t7\t0\t950\t986\t1127\t1091\t7.3\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_106A_Male_Mix_TAGCTT_L004_R1_(paired)_contig_80\t100.00\t19\t0\t0\t681\t699\t2732\t2750\t7.3\t35.6\r\n" ] } ], "source": [ "!blastn \\\n", "-query /Users/sr320/git-repos/course-btea/data/Hsapien_DNMT.fasta \\\n", "-db /Users/sr320/Desktop/big-data/db/Oly_Male \\\n", "-task blastn \\\n", "-outfmt 6 \\\n", "-out /Users/sr320/Desktop/big-data/DNMT_blastn_OMale.out\n", "!cat /Users/sr320/Desktop/big-data/DNMT_blastn_OMale.out" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_8192\t70.01\t1764\t475\t15\t3071\t4801\t2775\t4517\t0.0\t 778\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_8192\t66.45\t611\t172\t13\t1518\t2122\t1201\t1784\t4e-34\t 149\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_8192\t71.54\t123\t29\t2\t2358\t2477\t2041\t2160\t2e-07\t60.8\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_8192\t67.38\t141\t46\t0\t1208\t1348\t945\t1085\t0.001\t48.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_35411\t77.94\t68\t9\t3\t710\t774\t380\t444\t0.001\t48.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_2928\t88.24\t34\t4\t0\t3339\t3372\t291\t258\t0.013\t44.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_20201\t96.15\t26\t1\t0\t779\t804\t1230\t1205\t0.044\t42.8\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_4794\t80.43\t46\t9\t0\t951\t996\t1637\t1682\t0.044\t42.8\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_4543\t90.00\t30\t3\t0\t950\t979\t1196\t1225\t0.15\t41.0\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_27900\t100.00\t21\t0\t0\t777\t797\t1804\t1784\t0.54\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_16964\t90.32\t31\t2\t1\t5148\t5178\t1116\t1087\t0.54\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_13874\t100.00\t21\t0\t0\t498\t518\t778\t798\t0.54\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_11565\t100.00\t21\t0\t0\t777\t797\t2348\t2368\t0.54\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_10032\t82.05\t39\t7\t0\t831\t869\t1988\t1950\t0.54\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_1090\t95.83\t24\t1\t0\t5093\t5116\t1987\t1964\t0.54\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_569\t84.21\t38\t5\t1\t948\t984\t1584\t1621\t0.54\t39.2\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_73900\t82.50\t40\t6\t1\t3333\t3371\t299\t260\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_28891\t82.86\t35\t6\t0\t831\t865\t1617\t1583\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_28589\t92.00\t25\t2\t0\t848\t872\t363\t387\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_24728\t85.71\t35\t4\t1\t843\t876\t481\t515\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_22409\t92.00\t25\t2\t0\t4145\t4169\t91\t115\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_16884\t81.58\t38\t7\t0\t5076\t5113\t536\t573\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_16322\t86.67\t30\t4\t0\t2000\t2029\t776\t805\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_11843\t92.00\t25\t2\t0\t2110\t2134\t373\t397\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_11566\t100.00\t20\t0\t0\t777\t796\t2408\t2427\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_11506\t92.00\t25\t2\t0\t2176\t2200\t924\t948\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_4604\t92.00\t25\t2\t0\t5151\t5175\t2283\t2307\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_1117\t92.00\t25\t2\t0\t1676\t1700\t5970\t5946\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_214\t89.29\t28\t3\t0\t3520\t3547\t2833\t2806\t1.9\t37.4\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_81724\t87.88\t33\t2\t2\t5139\t5169\t209\t177\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_71446\t88.89\t27\t3\t0\t3339\t3365\t96\t70\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_39104\t95.45\t22\t1\t0\t4919\t4940\t106\t127\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_37837\t87.10\t31\t3\t1\t3433\t3462\t866\t836\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_36308\t91.67\t24\t2\t0\t1925\t1948\t349\t326\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_26430\t87.88\t33\t1\t2\t179\t211\t3179\t3208\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_20785\t91.67\t24\t2\t0\t3589\t3612\t1549\t1572\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_18108\t91.67\t24\t2\t0\t4861\t4884\t731\t708\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_16998\t91.67\t24\t2\t0\t2177\t2200\t438\t461\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_15109\t95.45\t22\t1\t0\t172\t193\t355\t334\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_13875\t100.00\t19\t0\t0\t498\t516\t702\t720\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_12560\t86.21\t29\t4\t0\t2107\t2135\t8\t36\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_12560\t86.21\t29\t4\t0\t2107\t2135\t71\t99\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_11939\t86.21\t29\t4\t0\t954\t982\t1525\t1553\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_10085\t100.00\t19\t0\t0\t5164\t5182\t1336\t1318\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_9546\t81.08\t37\t7\t0\t950\t986\t46\t82\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_9065\t79.49\t39\t8\t0\t834\t872\t272\t234\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_7593\t91.67\t24\t2\t0\t2177\t2200\t196\t219\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_5567\t100.00\t19\t0\t0\t681\t699\t6814\t6796\t6.5\t35.6\r\n", "gi|768001213|ref|XM_011527774.1|\tfiltered_108A_Female_Mix_GGCTAC_L004_R1_(paired)_contig_3022\t88.89\t27\t3\t0\t5136\t5162\t7307\t7333\t6.5\t35.6\r\n" ] } ], "source": [ "!blastn \\\n", "-query /Users/sr320/git-repos/course-btea/data/Hsapien_DNMT.fasta \\\n", "-db /Users/sr320/Desktop/big-data/db/Oly_Female \\\n", "-task blastn \\\n", "-outfmt 6 \\\n", "-out /Users/sr320/Desktop/big-data/DNMT_blastn_OFemale.out\n", "!cat /Users/sr320/Desktop/big-data/DNMT_blastn_OFemale.out" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "Converted 64711 FASTA records in 640388 lines to tabular format\r\n", "Total sequence length: 32662304\r\n", "\r\n" ] } ], "source": [ "!perl -e '$count=0; $len=0; while(<>) {s/\\r?\\n//; s/\\t/ /g; if (s/^>//) { if ($. != 1) {print \"\\n\"} s/ |$/\\t/; $count++; $_ .= \"\\t\";} else {s/ //g; $len += length($_)} print $_;} print \"\\n\"; warn \"\\nConverted $count FASTA records in $. lines to tabular format\\nTotal sequence length: $len\\n\\n\";' \\\n", "/Users/sr320/Desktop/big-data/db/Geo_Female.fa > /Users/sr320/Desktop/big-data/db/Geo_Female.tab\n" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_1\t\tTGAGTTTGAATCCCCGCCTGGCCAGTCTCCGTGATTATTGACATACAACATTGCCTTTTGTTGTCATTAGTCTCTCAACACTGATTCAAATGGGAAAGTTATCAATTACTTGTAGGGATGTGTGCTGGAGTACTTGTTAACCATTCAGGGTCCCAGGAAGTAGTGGTTAAGTTAACTGCAATCTCATTAAAATTATATCGACTTTAGGCTTCCATACACTACGCACAGTAGATCTGATGGTGTTGCATAGAACGTCACATCAGATTGCCCTTTCTGCCAGCCCATGTTTAATGTAAATGATTTTCAGAAATTACAGAAGTCAGGGATGATACAGGGACTTTTTTTATTTATACATTAATGACACAAGATCAGTCTGAGGTGACCTTTAATGCAATGCTGCCAGCTCTACTGTGTGTAGGGTATGGAAGTTGATCTGATTTTAATAAGATCGTCTGTAAGTCTTCGATGACATAAAAGTGATGCTAAATATTCAAACAANNAAAAATAATCGGGCAAAATAATATTTGTTTACTTACTTGTGGCGTTGTAATATATTTTGATGTAATCAGGCATGTATAATTGCCCTGTTTACTCTATTGTTTAACCAATCAAAACTGTCGGTAATA\r\n", "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_2\t\tCAGTATACAGATTTTGTAATCCAAGTACTTCTAAAAATCTAACACAATTTAGCCCGTGTGGTTCCGGCTTAGAATAGAATCACTGCCCATCTCTGCAGAGGGGTAGGAATCCTCTGAATGGGTCTCCTGATCATTTTATCATTTTTGTCATGTTCATTATTGTGGTTAAATTAACAGCCCCTGCGTGGTTCCAGGAAGCCATATGAGTGATGGTTTGAATAGAATCACTGTGGCTTGAACATGTTGTAAGAAGCGACAATTTGAGAAACACAATGTCGTAAGAAGCGACAATTTGAGTAATACGATAGAATTTGCTGAAACTCTGCATTTCTGTAGGTATCATGGGTCTATGTTCATTTTGCATATGACCGCGGCATAGTCTAGTGGTAGAGCGTCCACCTTGCATGTGGAAGGTCGTGGGTTCAATCCC\r\n", "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_3\t\tGTAAATGCCTGAATAGCTGCTTTTCCTGCCAATTTATGGTCGTTTCGGACTAACAATGCTTAATATATTTTAATTACATCTGATTTTATGACAGAAATGAACATATCAATTTGTATTTTAAGATATTTTCAAATATTTCCAAGACTGGTATTACGTCACTAATGAAGAAGCAGACGGTAATATTATGAGGGTAGTATCTTCATATGACG\r\n", "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_4\t\tTCCATCTCATTCATATTTTATTTCAAAATCATACAGAACAGTTTGAAAAAAAACACAACTGAAATTGCATACAATTCTGTAAATATGAACTTTGTTGCTGATAATTCCATCTCATTCATATTTTATTTCCGTGCAACTCTGAACAATAATTTCAATTTTATTTTTTGTTTAAATCTACTAAATATATAAACATTTAACATCCACATGTACTTCATGCACATCTCGACAAAATTCCAGTTGTACTCGTCCAAACTTCACTGACCATTCAGTTTCACTAGCATATAAGTACTGTTCACAAACAAGTACATCATGGATTATTTAGAACACAATCTTCAACTTAAACTGAAACATGGGAGTGCTGCTGCATTTCAATAAATGTCTGATCCATGATTCATTTAATAAATAACACAAGTGCAAATCGTATTAGCTGGTCTCTTTGGGCAAGTCCATAATCTGCTTAAAGTATCAAGATTTTGTGTCCATGGCTTCTTTGGGTGCCTCCTCCTTTGGAGCATCTTCTTTCTTGTCATGTCCATTCACTTTCTTATCACTGTCACCATCCCCAGACTCATCCTGTTTCAACTTCTTAACTTTCTCCTCATCTACGTCTTCACCAGACTTCCTCTTTCTCTTGATGAGATGACTAATGTCCAAGGTCTTCACTTCTTCCTTTGATGATGATGCTTCCATTGGTGCCGTCTTTTCTGTTGTGGAACCAGACGTTAATCCACCCATTGCTTCCTTGGCCAGATTTTTAAGATCTTCCATATTTTTCTTCTCATCTTCACAGTCTTCAATCTTGGCTTTAATGTCGGGTAGGATCTCCTTCAGGTCTTCAACCTCTTTGTTTGCTTTGTAGACAGGTGTGTCAAAGTCCTTACTCTCTTTCTCCTCCTTAGTCGCCGCATCCAGAACCTTAGTTAAGTTGGAGATTCTATCCTCAATCACCTTAATGGCTGCTCTGTAACTATCTACAGACTCTGTAAATTGTTTGTTGAAACAGTATGCCAAACCAAGCTGATAATGTGTTTCAGCTAGTAATCTGTTGTCAGCCTCCAAATTTGCCTTTTGTATATCATAACATGTATTGAAATCTTTGATAGCCTCTTCATATTGTTCTGTTTCTAAACTGACTTCACCCAATTTGAGATGCGACTCTGCAGTCCTAAGTTTGGCTTCTTTCGATTCGTCTTTGATATAAATAAGTTTGGCCAGTTCCAGCATCTCCCAGGCCAGCTGTAGGTTGGAAACATCATCTGGATTCTCCCCATCTTGTGAGCCATCCTGTGATGTATTGTCAGCTGTATCTTCTCCCTCCGCAGCGGCATCTCCCGCTTCTCCTTCCTCTCCCTCCTCCTCAGTTCCTTCCTCACCTTCTGTACCTTCATCCCCCTCCTCACTACTACCCTCCTTCTCTTCCTCATCTTCATCTTTTTTTTCTTTCATTTCTATGTCTTTATCATCTTTCTTCTTTTCCTCTTTCTCTACTTTTTTCTCTTCAAGTTCCTTTTCCCCATCTTTCTCTTCAGTTTTGTCC\r\n", "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_5\t\tGCAATATTTATGTTCCTTGCCTGCGTGATATACCCAAGTGGATGGGACAACGACGAGCTCCAAAAGATCTGCGGAGAGGAGTCTGGCAAATATCGTATGGGGCAATGCAACATCCGGTGGGCCTACATTCTCGCAATTCTTGGCATTTTCGACGCAGCAATCCTTGCTATTCTTGCGTTCTTCCTTGCTTCTAAACGAGCAAAGATTGAAATGTACACCGCATCAGGAGCTGTCACAAAGTCTGAGCTGAACGGATTTGGTGCCGAAACCATGTCCAAGAGGTCGATGCCAATTCAGCCAGCCATGGTCAGTGTTCCAAACCCTGATGGAGGACGAGAAAGCTACTCTGAATATTCCCAGGTGCAAAAGCGCGGTGGCTCATCAAACTTTAACCTTTAAAGCAAAAAAAATGTTAACATGATTTAGCATCTATAGGGAAAGAACACTTAAGTAAGGTTGCCAAGCTACATCTTAAATATACACATCAAGCAACATTATATATTTTTTACTAAAAAAAACTACAGTTAACAGGAAAGGACTTTACTTTGTCAGAGAAGTTTTCAGAAATAAAGACAAGACCCTTTGTATCTATTCCTACTAGAAATTGAGTGAATAACATCCGAGGGTCCAGGGACATGCATCTCCGAGAGCCCGACATTCGGAGTTNNNNNNNNNNNNNNNNGTAAAATTGTCTCGGGATTGTAATTCTCCCAAAAATTAGAATGTCCCCACCTAGTATATTGCATTCCTTGATCATTCTTTTATAAAGTCTCCGATTTGTACATCATTTTTTGTGGTTAGTAATAAAGTAAACTTCTTTTTCGATATAATTGCATTTAAAATTTAATAATACTACAGGAGATCATTAAGTATTATGATTGTGCGCGAAACATTTAATGATCAAATTCTTAATACATGCTTCGTTTCATTTCGTTCATCGGCAATCAGTGTATATAAATGCTTCTTTTCTACATTTGTATTGTATAAAAAGTTATCACAAAACCTGTACAGATGCGATGTACTGATTTAAATAGAAAGTAAATGGTTTTAAGGTAATTGCGTTTCTTTGTTAATATTTGATATTATTCTTTAGAGTATCTATTTTTATATAAAGTTTATTCACATGTACCAGTGTTCACCTGTTGAAAGTTACGTAAATAGGATATAAACAAATACATATATACATACATTATATGCTTGGACTTTTTTTTNNNNGACAAAATGTGTTCTATGCATTTAGGAATGTTTTCTAAACCACTCTCGAGTCATGGTGAAATTGTAAATAGCAAGACACAACGAGACTTCTTAAGTTTCATATATCATATTAACTAAACTGTACAACGAATGGCACTCGTNNAAAAAAAAAAGTAAACAAGTTACAGCAAATGTCAAGAATACAATAAGATTATGACATGTATATTCTTTTCAAGCATAGCTTATATCATGACTGCTTTATGTATATAGTATCGAAATCCCTATAAAGAGAAAATTACTTTGGCCTAATATTTGGAACTTTCAAATTTACACCAAATCAAAATGTTACGGCATACGTATTCATAACTGGATATAATTCTGTATTTAGCAAGTATTGAAACCACTGTTATTACCACTTCTGTATTAAAAAGCCTAGTTTGAAAAGTTGAAAAGAGTTGCTAACTTGTTTAAAAATGCTGAAAACCTTTTTTTAAGAAATATATAAACTCAAGTGGTCAGGTTAGTGGTCCAGCTAGTGGGTGCAGCTAGTGGTACAGTTTGTGGTCTGTTAGCTTCTTTCGATCTAAAGTCAATACCCTTTCATTTCTAAATAAACTTTCCAATTATATAATTATCTTTACTATTAATAAAATAACCAACCTTCTTCCAGGAGTAGCACATTTATTTGTAAGAAATTTCAATAATTTTCTGTCTCAGCTTCTGTATTAAATATAAGTGGATTTATCAATGTTTCAGTGTAAAATGTTTGTATACCAATCAACAAATAAAAATTCTGTGAAAGCTGTTAATTCATAAAGGGAGCAAATAAAAAGACTACTAAAATAGTTGTATTTATTTACAAAATGAGATTACATGTGTGCTTAATGCCAGTATATGTTATAATCAGAATAATGTATGTTTCAGATTTTACAAAAAAAAGTTTACTGTTTTGGAAAATATTTTAGGACAACAGAATAGCCTTGTTTATTTCACTGCAGGAATGGTTTTAGACACAAATCTTTCATATATACAAATGTTTATATATCTTGTAACTGTTCTTAACAATAAACAATTAGGCCAAAAAAAAAAAAAAA\r\n", "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_6\t\tGAGGGTGAAATACTACTGAAGTAACAGCAATGCAATGTAGGTTCTTGTTTATGTGTCTGATTTACGAATAAACTCGAAAGAACAAAACACATAACGAGTACACTATTCATTTTATGTTTATTTTGAGTAAAGGTAACACGTGTTAACTGGTTATAATATTATGNNATGTTAATAGCGTACATTTTGTACCATAGCTACTGCCTTAATTAATACTATGAAGATAGTTCATTTATGGCTTTTATACGAATTTTCTACATTTTACATAATCTCATATCTCGTGTTTGTTTGTGTGTTTTATAACATTGTATAGCCCTGCTTTCTATATTAGTTTGCGTACTGTATGTAATATTTGTTAGGTTATTTCTTTTTTGTGGGTGGGATGCGAGGGTAGTGGCATTTATTGTTTTTGTCGCCTTTTACGTACATGTTAATATTATAATTTTGTAATACTATTTTATAAAAGCAC\r\n", "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_7\t\tGACCTTGTTTGATCATTTCATCCTCCTTTTTCAAGACCGATCAGTGGCCAGGTCGCAGGAAGAATACCTTGTTGGATTTTCCTTTATTCTTCGCCTGATAAAACAATAAATTACTCAAGATGGCAAAGAGGCAAGATATCAGTTTTGGTGAGAATTTCGCCCTTAGCGGTGCTGCTGCCATTATCTCAAAAACAGCTGCTGCTCCCATTGAAAGGGTCAAGCTCCTTGTCCAGAACCAAGATGAAATGATCAAACAAGGTCGTCTTGACAAGCCATACAAGGGTGTGATTGATTGCACAACAAGGACATTTAGAAGTGAAGGTGTTTTACCATTTTGGCGTGGTAACTTGGCTAACTGCATTCGGTACTTCCCCACACAGGCTCTCAACTTCGCCTTCAAGGATCAGATTAAGGCTATGTTTAAATCCAGCAAAAATGAAGGTTATTTTCTGAAATTCTCAAAGAATGTCGCGTCTGGTGGTGCAGCTGGAGCCATGTCCCTCGTCTTTGTGTACTCCCTTGATTACTGCAGAACCCGTTTAGCCAATGACGCCAAGGCCGGAAAGAAGGGTGGTGAGCGTCAGTTTAATGGTATGGTTGATGTATACAAGAAGACCATTGCTTCAGATGGTATCCAGGGTCTTTACAGAGGTTTTGTAATTTCCTGCGTTGGAATCATTGTATACAGAGGTTGCTACTTTGGCTTCTACGATACCTTGAAACCGATAATCCTTGGGGACAGCAAGAGCTTCATGGCATCATTTGCTCTTGGTTACATCGTCACAATTAGTGCAGGTCTGGTATCTTATCCAATCGACACAATTCGTCGAAGAATGATGATGACATCAGGACAGGCTGTCAAGTACAAGGGATCAATTGATTGCACACTCCAGATCGTCAAGGGTGAAGGATTCATGTCCTTGATGAAGGGAGCTGGTGCCAATATTCTCAGAGGTGTGGCTGGTGCTGGTGTGTTGTCCGGTTTTGACAAGTTCAAGGAAGTTTACCTTGCATGGCGTCTTGGCAACTAATGATAATGCAACTTAGGTGACAATTTATATAGGGGGAAGGAATATGGATTGATTCCATTATTAACTTTCACGTGTGAGACTGAGTGTCATAACATAAAAGGAAGAACTGGGTCATGTAGCAGCAAATGTGATACTTTGTTATTTTTAAACGTAGATCATGTTGCAGCTATATATAAATTGGTACAGGATTTTGACTGTACAAACACCATGTAAAAAAAACAAATTTTGTGGAATATATAGAAATGATTTATAGATATTGTCTATTGTTTTCTTTTGAAATTTATTAGTCATTTGAGTCGAGATGTTGGTCAAGGGTTGACATTCAACTTTTTAGCTCGCCGGAACGAAGTCCAAGGAGAGCTATTGCCATGACTCCGGCAGCATCATCGGCGTTGTCTGTGGACACCAAGGTTAAACTTTTGTGTTCAAGTTTAAGATATCTATGCAACCACTCGGATGTGAATGAAACTTTGCATGTGTGTTCACCATCATCAATGGAACACACACAGACGAGAACCATAACTCTGGATTGCATTTTGGCACAGTTATGACCCTTTTTGGACTTAGACACCCAGAGGCGAGTTTGTTGTCTTTTGACAACTCTTGTTCATAAGTGTATTCTGTATCATACAGTATGAAAAGTTGCTTGCCCATTTTAAGCTTGCCGGAACTAAGTCCAAGGAGAGCTATTGTCATGTCGTCGTCTGTAAACACCCAGGTTAAACTTTTACATTTAAGTTTCAGATTTCTATGCAAGCATTCTTTGGATGTGATTGAAACTTGCATGTGTGTTCACCATCATCAATGGAACACACAGAGACAAGAACCGTAACTCGGGGTTGCATTTTGTCACAGGTATGGCCCTTTTTTGAATTGGTCACCCAGGTTGAACTTTTGCGTTTAGTTGCGTTAAGTTTAAGATATCTATGGAACCACTCATTGGATATGAATGAAACTTCCCATGTGTGTTGGAGCACACAGAGACAAGAACCGTAACTCGGGGTTGCATTTTGGCAGAGTTATGGTCCTTTTGGACTTAATTTTTTGTCTGTAGACACCCAGGTTAAACTTTGGTTTGTATTTGCGTGGGTGGTGATTTTTATCCCAGCGNNGTCTTCGGACAACTCTTGTCGATTGCTGTTGTTAGCAAAAAATCTAAAATTTCAATATTTCTGATTATTTGCTAACTTATGAAATGCTTAATTAAAGGGACCATATTGGCAAGTAACACAAATACGCTGTTAATCCAGACTTCCTTTGTGCAGTGTGCACAAAATAATGCTTATGTACTGCATATTACCTTTATTTACTTGAAAGTTATCAAATTAACAGTAATTGAATATTGGTGAAAACTTCAATACGTCTCGCAATAAAGTAATGCGCAATGTTTACATTGAACTC\r\n", "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_8\t\tCCATGAAAATGCTTTAATTGGAATTATTCAGTTATGATGAGTAAGAAGTGACGAGGATGATGATTAATGATGATTGATGATGATTGGTGATGATCGATCATATTGATTACAAACTTAATTATGTATGTTGAACAAAATAACATATCTCTAAATATTGTGCAAACATCATCACTAGGCAAACTGTGCCAACATCATCACCAGGCAAACTGTTAATTATCAAAACGAATCTAACAATTGGAAAGATAGTGTGAGTAATAAAACACAAAAACATGGTCTGATCACAAATACATCATCATGTATCAAACTGAGACTCTTAATATAAATGTTTAACACAAATGATTTTCAAATCAAGATAACACTTGCTTAATTTAAAATCTAATCTTGGCTCAAGTGAAAAAAGCATTTGAATTAATTGGCTCTGAATCATTTTCCTCAGGCACTCCTTGAAGTAAAACAACTGGTAATTTTAAATGAGGATTTAAAAATGAAAAAAAAAATATATATATATTATTACTGCAAAGTCAAATTCAGACATAAAGTTTGGAATATTCAATATTGAATCCTGTAAAATGTGTAGTAACATAGCTTCTGCACTATGATAAATATGGATCAAAGGTCCCCTTTTGCACACCTTCGCAAGTCTTAACATTTTTCTATGAGCTTTAAGGACAGCAAAAGGCATTTACCTCATTTAGTTTAAACTTTAATGATTTTCTCATCAATTCTTTAATAACTCCAATCTTACCCTGCATTACTTTATATGTTTAAGAATTGAACAAATTTAGTACAAAGGTTAGTATATAAATCCAATTTCCCTTTCCTCATATTCAAATTATCAAACTGGTGGGAGAGTTGATTTATGATTCTTATTCTTAATAATAGACAAATGTCATAATTGTTTCTTGGTAAAACAGCCATATTTAGACTTTCATGAAGAATCTTTTTACTTCCAATATCTGGTTTATTAAATTAAGTTCCATTTTCCTTTTCCTGATTGTTTTATCAAATATGCCTTTTTTATAACTACAGTTAAACCTGTCTTAAGCAGCCAGTCGACGGAGGAGCATAATTTGCCTTTTAAGACAGGTCATTCATTGCTTAAGACAGGTAAATATGCTCTGAAAGCATGCGAATGGGAAATAGCAATTATGGCTGATTAAGACTAGTGGCTGCTTAATAGAGGTAACCTCTTACACAGATTTTACTGTATTTTTACAAATGCACGATCGTGATACACTATCCCCTTTGTTTAATCATGATCTAAGAATGTCACTGTCCCATGTTCATATTTTACTATCTGTACTCCAAATAAGAGAGTAATTTATCTGTTTGTTTGTCACAGAAGTATTTGTTTAGTTTACACACTGACTGAGACGTAATACACAAACACCAGATATCAGATAGCGAGGACATCACTCTGTATTTCATGGCTGAGTTGGGTTTGTAGATCACTAAGTTTCTTCTTGAGGTTTGACAGGGCAGAGAACACGATGTTTTCTGGTCTGAGTGATCCACATGACTCCACGTTGTAATAGTATTTGTTAGGAAAGCCAGTTGGGTCAAACGGAGCTTGATGTTCATCTTCTTCCAACTCTGAAAATTCACTTTTTGGCCATTCTTCCGGTTTTGGGTATGTTGTGTGACGCAGTGCATTATCTGGGTCATATTCAAAAGACACTCCACAAGTGGGATTCCATTTAGCATGTTCCTTGGCAAACCCCTTACGAGCAAATGCCTTCACCTTCAGCTCCTGGCCCTTACGCAACTTAACAATCAGAATGTCATCTGTGTCATCATATTCACTGGGTTCTTGAACGTTTCTAGATGTCACCGGTACAATTTTTGGGTTATTGGAGATGAGATCTCTACTTGTAATATGTCGTGTCACGTCATCCTGGCACTTTACTTCCAGTGTAAATTCTACGGAACAGTCAGGACAAAACTCATCACAGGTACAATCCCTGCTGTACTGCATCCTGTCCACAATATCGTCACTGGTAAGTGGAATCAAACCTATACGATGGGAGATGAACTCGTCAAACAGTACTGTGGAGTTCGACTCTATCTGGACCCAGTCTATAGCCATGGTTGGAACCTCTGCTATGCATACTCTCCGCAATGCATTGGCAACACTCAGGTCAGTGTCCTCTAATATAAATTTTATATTTTCATCCGAAACTTGTGTTATTTGGACAGTAGGCTGATTTGCGTAAGGCATTTTTAATTAAACTCTGTCGTAATTTCTTTTTCTTTTATTTTAAGCTGTTCGG\r\n", "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_9\t\tCATCTTTTATTTTTTTTAAATAAAGAACACAAATGAACTTCGATTCAACGACGACATTTATTCCACAAATATACTAAAATAACATTCAACATGAGAAAATTTGGCTGCAGTGATTAGCTTAAATATGAGAAAAACAACTCTCAACAAAATGAGATGAATTAATGTCAATGATATGATACAACACCCAAAATCAAATATTTATGCATCTTCAGTTTTCTCCTCGGACGTTTCTTCAGCTTTTGATTCTTCCGTTTTGGTCTTTTTCTTCGGACTGACTTCCTCGGAGTCTGCTTCTTCCTCATCCAATGTTCGTTTTTCAGATTCTGTTGATTCTGCTGCATTTTGGTTCACATTCTCTGCTTCTTCCTTTTCTTGACTAGTTCCATTCTCCTCTTTTGACTCTTCTTCTTTTTTTTCTTCTTCTCCAACATCTTCTTTGGCCTTTTCTGCTTCTTCAACTGCCCGTTTTACATCTTCGACTGTTTTCGTCTTTATTTCAGGTGTTTCAGTCGTCTCAGTGACAGTCTGGACTTCTTCTTTTTCGGAAGGCATTTTGTGTGTTTAATTAGATTTTTTTCTCTCACAACGTGCGCTTTCGTTTTTCCCGTGAATTCAGGTGTTTCAGTCGTCTCAGT\r\n", "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_10\t\tTCGAGATGACGTCATATCCGCTGGCGTTTTTAAGCGTGCGCACATGCTTCCCAATATACGTCAGAAGTCACCCAGGACTCGTTCGGTTAGGACACATCGACAGATCTTAGAAAAAGGCTCATTTTTTTATTCAAGTTCACATCAAACAAAAATCATGGCAAAGGCCCCTGCTGTAGGTATTGATTTAGGGACCACCTACTCCTGTGTGGGAGTGTTCCAACACGGCAAAGTAGAAATCATCGCAAATGACCAGGGAAACAGAACCACTCCCAGCTATGTAGCGTTTACAGACTCCGAAAGATTGATTGGTGATGCAGCCAAAAACCAGGTTGCAATGAATCCCAGTAACACAATATTTGATGCCAAACGATTGATTGGTAGGAAGTTTGACGATGCCGCTGTGCAGTCCGATATGAAGTTTTGGCCCTTTGAAGTTGCAAGTGATGGTGGCAAACCAAAATTGAACGTAGAATATAAGGGTGAACAGAAGTCATTCTTCCCAGAAGAAGTCTCCTCAATGGTTCTAACAAAAATGAAGGAGACAGCTGAAGCTTACCTAGGAAAGACTGTTACAAACGCTGTTGTCACAGTGCCTGCTTACTTCAACGATTCACAACGACAGGCAACCAAGGATGCTGGTACCATATCGGGTCTAAATGTTTTGCGTATAATTAATGAACCCACAGCTGCTGCTATTGCTTACGGTCTGGACAAAAAGGTGGGTGGCGAGAGAAATGTTCTAATTTTTGACTTGGGCGGTGGTACCTTTGATGTCTCCATTCTAACTATTGAAGATGGAATCTTTGAAGTAAAATCAACTTCTGGGGATACACATTTAGGTGGAGAAGATTTTGACAACAGAATGGTGAACCACTTCACACAAGAATTCAAGCGTAAGCACAAGAAAGACATGAGTAGTAATAAGAGAGCTGTCAGACGTCTCAGAACAGCATGTGAACGTGCAAAGAGAACTCTGTCATCAAGTACACAGGCTAGTATTGAAATTGACTCGCTCTTTGAAGGTGTTGATTTCTATACCAGCATCACCAGAGCTCGATTTGAAGAGTTGAATGCAGATTTATTCAGAGGCACCCTTGAGCCAGTTGAGAAATCCATCAGAGATGCCAAAATGGACAAATCAGCGATTAATGAAATTGTTCTTGTTGGTGGTTCCACTCGTATCCCAAAGATCCAGAAACTTTTACAAGACTTCTTCAATGGCAAGGAACTGAACAAAAGTATTAACCCAGATGAAGCTGTTGCTTATGGTGCAGCTGTACAGGCAGCCATTCTGCATGGAGACAAGTCTGAGGAGGTTCAGGATCTCTTGTTGTTGGATGTTACTCCTCTCTCTCTGGGTATTGAAACTGCTGGTGGTGTGATGACATCCCTCATCAAACGTAACACAACTATCCCTACCAAGCAAACACAGACATTTACCACTTACTCGGACAACCAACCTGGTGTACTTATCCAAGTATATGAGGGTGAGAGAGCTATGACCAAAGACAACAATCTCCTTGGAAAGTTCGAGCTTACTGGTATTCCACCAGCTCCTCGTGGTGTCCCACAAATTGAAGTTACCTTTGATATAGATGCTAACGGTATCCTGAATGTATCCGCAACAGACAAGAGCACAGGGAAGGAGAACAAGATCACCATTACCAACGACAAAGGTCGACTAAGCAAGGAAGATATTGACCGCATGGTAAACGATGCAGAAAAGTACAAGGATGAAGATGAACAGCAAAAGGATCGTATCCAATCCAAGAATGGTTTGGAGAGTTATGCTTTCAATATGAAGTCGACGGTGGACGATGAGAAATTGAAAGAGAAGATCTCCGAAGAGGACAAGAAGCAAATTACTGACAAGTGCACAGAAGTTATTAGCTGGTTGGATGCTAACCAACTTGCCGAAAAGGACGAGTTTGAAGACAAACAGAAGGAGTTGGAAAAAATTTGCAACCCAATCGTTACCAAACTATATGCAGGCGCTGGTGGAGCTCCGGGTGGTATGCCAGGTGGTATGCCTGGTGATGCAGGCGCTGGTGGTGCTGCCCCAGGTGGCGGTTCCAGTGGTGGCCCAACCATTGAGGAGGTTGATTAATTAAGTGTATGGTTAACTGAACATTAGAGATCCTGGGGTATTTAAAAATGTGGTTTTTGTTAACAAACTGAACAAAATTAAGATCATAACTTCATAAACATGACATTGTGCAAATTGTTATTAATACCAATCTTGTAATATTCCATTTAATTTCATCTTTTGATAATTTTATTGTGGACTAAAATAGTACTCATTCCATCCATTCAACATGTTATTTATATTGTTGTTAATTGAGTTTTGTGAGAACAAAAAAAAAGTTCTTGTGATCATGAATAAAAGAGAAATTGAGCCTGAAAAAAAAAAAAAAAAAAAA\r\n" ] } ], "source": [ "!head /Users/sr320/Desktop/big-data/db/Geo_Female.tab\n" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23384\t\tGAGATGAAAATGCAATAAACATTATTTCCATACCTTTATCTAGGAATTACCAATTTATCTTACAATCTTAATGTAAACATTTATAGATCATGCAAGTTTTACAATTAATCAGTTAGGTAGTATAACTTGTATTCATTTAATTTCTCAATTTCATGTACATATAATATTATATTCCATACATACATTTAGTCAAATCAATTGTAACAATGATCCAAATTTGAATTGTGACAAAACGCAAAATCGATCCATACCAGAAATCACTACATGTCTTTCATTCCATTCATTTTGTAAACTACAATATATTTCCATATAACAGGGAAACTTTTGAACACAGTTGTGGTTTATATTCATTCTGATGTATATTCATGACAAATTCCTGAAGAAGCTGGACCTAGTATGTTGATATTACCAAACTTATGAACATGATTTCCGTCCAAACTAGGGTTTCAGGTTGTTTTATATCCTTTATTAATTACTCGTCTGTATAACAACCTGAATATGTATCTTGTTATGGCATTTGTGTCACGTGTGCCATTTTTATGATCTACACCGCAATGTCAATAGAATCAGTCCAATGTCAGGTAAATCAAAATGCCCATTTCTTTTAAACACAGTCAACTTTAGTTTTCTAGTCATATTGCTCAACTGTCCTTTCTTAACTTGTAGAACTTCCTGCAGTTTCCTCTACCTTCACCTGTTCAACTTCCATTTCTCCATCTTTCTCTTCCTTCTTTACATCATCTTTCTTCTCTTCTTCGGCTACAGTTCTGAGGTTTGTTGTTGTGGCAGCCAGGCACTTCTTAATCTCCAGACCAATAGCTCTGGCCATTGGTGGAGGTACGGCATTGCCAACCTGTCTGTGGCGATCCAGGATGGAACCAAAGAATCTGTAAGTGTCCGGGAATCCTTGGGATCGGGAACACTCTCTAACACTGACCACGCGGTGTTGTTCAGGATGTAATACACGACCCTGTTTTCCCATTGGTTCAGGATTAGTGACTGTCGTACTGAAGAATCCATCCCACTCCAGTCTCCCGTACAGACCTGCCCAGTGGTTGTGCCGGTTCCCTGTATGTGGCAGACACCAGGGTACCAGGGTGTTGAACTGACGGTCCATAGGCTCGCATGATTTACCTTCAGCACAGGTACAAACACCTCTCAGGTTTCCATTGTCACTCTTACCATTCTTCTTGTCATGGTGAGTGTAACGTAATTTCTTCGACATTGTTCCGTCCGACAGTCTGACCTCCATGTTAGGAAGGTCTCTCCAGTCTGACCCTGGGGCCAGGGGAATATGTTGCATACGGGCATGAACAAGGGGGTTCATGTCTTTACAGATATGGTCCCGCAAAACGGGCTGGTGCTGTTTCCCTCTGATCAATCTCTGGAAGTGCGACACCGTGTCGCTGTTGTATGATATCTCCTCACGTTTGTGTCCGTTTTTAATCTCTGGCAGGTCAGACATAGTGTCCCTCACAGTGATTGTTCTATATGGTGCAGAATCTGTACGCATGATGTTGGACACAAACTTCTTGTCATCTACCATTACAGACAGGGTCATGGCCCTTGGAGCAAACACATGACAGGGTTCTGGATAATATGGTAGCTTCTCTCCTGGTGCCGCAGCCAGAATTATGGCCCTTCTCCTGGTCTGTGCCACGCCGTAACTACCTGCCTGCAGTACCCCGAATGTACACTGGTACCCCATCTTGATGAGACAACGGAGAGCCAGCTTGAGCACCAT\r\n" ] } ], "source": [ "!grep \"Geo_Pool_F_GGCTAC_L006_R1_001_val_1_(paired)_contig_23384\" /Users/sr320/Desktop/big-data/db/Geo_Female.tab" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }