{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculating CpG ratio for *Pocillopora damicornis* transcriptome" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This workflow calculates CpG ratio, or CpG O/E, for contigs in the *Pocillopora damicornis* [transcriptome](http://2ei.univ-perp.fr/telechargement/transcriptomes/blast2go_fasta_Pdamv2.zip). CpG ratio is an estimate of germline DNA methylation.\n", "\n", "This workflow is an extension of another IPython notebook workflow, `Pdam_blast_anno.ipynb`, that generates an annotation of the same transcriptome. This workflow assumes that you have created the directories and files specified in the annotation workflow.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/jd/Documents/Projects/Coral-CpG-ratio-MS/data/Pdam\n" ] } ], "source": [ "cd ../data/Pdam" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">Locus_1685_Transcript_1/2_Confidence_1.000_Length_7457_transcripts_v2_1|spectrin alpha chain\n", "tatacgattttatgccgtggaggtgttttcttgcagaagtttcaaattatgtcctaattgtagtgtagaacggactattggacataatttgaaacttctgcaagaaaacacctccacggcataaaatcgtatatggcgatacatgaaccactgcttcaggaacactgtcttccttttgatgtttaaaccatgcactggcccacatttttgtttgctaatataacagaacttttccgctatccctaaccaagtaacaactcatcactctaaacataataacatgcactgaagaaacattacattagtaattctcttgaacactgtgatcagttaatctgttgatgatagcttcagtaatacctagcgagctgctggggtattgtcatcttccagttacattaaaccgtgaggtgtttcgtttcttcagaaataaggaacataccacaattactgcaaatctatgccggactgactaatttagctagaagagaagagttcctcgcagaaagtcttgtagtcgtaagctcccggaacttctcggcccttgtcgtcaacgtaaggattcatacgatcaatacagaagtcggcctgctccttggtgagagactgatagagctcagcctcagtaacatagagacgttttccgccctcagtgagcgccttgaaggcgttgatgacttcttgactagagccgacattttctgtttcacgactgatcatgaaggccatgtactctcccatcgacaccacaccgtcaccgttgggatccaccgttctgaggatgctttggaactccggatcttcttcgccctcttctacaatggagagatcgtagccaagagaacggaggcatgacttgaattcttgatgatccaggtaaccagtcttatccttgtcaaagtgcttgaacatgatagtgaattctttgagtgtatcctcagaaacgccagtggtattgcgggcctggatttgctgttcaagattgtgtttcatgcgcatggcaagttgatccagctgatcccactgctgagcaaggtccactgtgctatgttcggtatacttgttgtccaggataagagcctcttccatttgagccccaagatcctccaggatcctaaggtcctctttcctgtctgctatctcagcactcttcttcttgacctcggccagttgatcttcaaggtccccttgaccgtcaaccattgcaacccttgtatcagagagccaagcgtggaacgagttggcagcttgtgcaaattcctgcctgaggttgtcattatactcctgacgctgggcttctttgcgcaaatcatcctcgcgttcctcaataatcttctgcagattttcccacgtgtcttccaatgcttccatagtaaaccaagtataagggttgatggagacgttatagctcttgatctgacgatcaagcttcctcaacatcataatgtcgtcttccgcttggtccagagaagcgcggaactgggtgtggccatcttgcaaagcacgtatctcctcaacagagttgcagcgcactggatcggtcagatcttcctccgcattctcgaaccaactgttaaaagcagaggctttcttggcaaacaacaggaacaagtcctccaccttcttgtattgatcctgggcatgttgaagtcgttcttttcgagtcttagagtcttccagcagctgctcccacctcctaataaggtcatcatgacgtttgatgatggcaggcgactgctcgtgttgtgattggacaagctcgtctttcagcgcagtcacacgtgcaatgccttcgttttcaaatgcttggagacctgagtcaaaggtttcctgtttggtgaaaagtgtttgcaccgaagacaaatcgcgccccagatcatctgaccgagcaattccttccttgtcaccaatccaggattccactacatcggccttccaattgaactgcaggaaagctgagttgtcattgagcttggacttgcgataggacgccattctctccaactcggtaagtttactcttaatggatgcaatcctgtggtcaatcaactcagattggtggttacccttttcaatgagcttgttgccagcatcttcaatgtcctgaactctttcccgatgaacctcaagatctgtctcgaatgcttcgtgcttctttaaaagaccttggactgcagccagtgtatcaccatagtcgtcacttccaacaagagtgttcttctcattgatccacgattcctcttcaccaacattagcgctgaactgttgatattccagagactcatctagcttgtgctgcctctgctcagccaggtttttcagttcgtcccagttttcctgcagctgatcacagcgagctttgatttcatcagagctactgtgtccttcgtcaataaatttacggccacacagtagtacagcctgaattctggcctcatgggtgttcaattcagcttccaaacgctgatgtttcttacgtaggttttgtacaccagtcaagtcctttccataatcttcagagctggtcaatagctttttctcctttatccaggactcttcgtcgtccacatctcggtaaaactggtgcaaagcgttggactcgtccaacttcttatggcgatctgcggccatgtcttttactttgtcgtagcgttctgcaataatgcgggatttgtcttgcagtgagtcagcatcaaaatggccaacctcagcgaagtgctgggtttgggcttgaagatcagtgatacgatcctcgtgagcagcaatatctgcttcaacaagttggtgcttcttgatcagattctgaacactggccagatctcgaccatgatcatcgtgagacaaggaagcttcaacctctcccagccagaaatccagttccttaacatttgtattaaattgctgttgttgattagattccttcagatgttgactcttctcattggacttctgtactaggtattcccactgctgctgaagtttggtgattctctctttcacaatctcctcacttccggcacacttgcgctgctcaatgagaccttcagctaggttgatggtctccaagactctctcctgattggctgagacttcagcttcaaacgcctgatgcttttggaacttggactgtaagttggttggatccttgtaagattcatccagcactgtctgtaacttttcactgacccatgcttcaatgtcctcagcatcacgactgaactgctgaatggttttggactcacccagttttgatcggcgttcaaccaaggctgccttcagagtagcccatctcgctaagactgcagaaatcctctctgcaatagctggagagtcataatgattgttatcaatcagccgatcagcattgtctttcagagcattgatcttcacgtcctgcactgcaagtgtctttgtgaagtcttcatgtttcttgatcagagcttcagcaccctccgctgcttctccgacatcttcgctctgaatgatggcctcacgtgtcgccatccactgctccagttgttcggcatctcgattgaataactgcaactccaaacattcatcgagtctttgcttgcgagaagcccaggccttttccagttcttctcgttctgtagccatggtctccagtttttcacggatgtcgggactagcataatgttctttgtccaataatttcttgccgaaatcttcaaacgactggaactcgctgtcacgcgcatcaatttctgcgcgatgttcctgatgcctgtccaacagagcttctgcactggccacatcctttgcaagttcatcagaagttaccaatgccataataccattgatccatgaagtaagatctctgaattcgttgaggaaatggaagtaatctgacgagtcactcagtttgccccttcttgcagccgcctgttctctgagattagcccacgcttcttctagctcgtgtctcttccattcaatgtcttgtgctgcattcggatgactggatgttaacttagcagcttccacgttgagttcacgaaccttctcttccaatgcggccaagtctctctccaaaacttcatgctttctctgcaaggcctggacactagccaaatcccgaccataatctgaggtggaaagagcagtgtctttctctgaaatccaagccttggtgtcatcagcatccctgtggaatttctgaatttcctgagcattatccaaattttccttcctctgctctgacatggtcttaagattggtccatttctggttcagcctctcaatcatctctcggatcatctcatattctctgtagtgttcaatctttagtttctcagccagtaatgtcagttctctgatacgaacctctttagaaatcatatccttttcaaagtcatcaaacttcttctgctgagcttccacatgttcgtagtcctttccgatttcctctgaggtgacaattgcttccttgtcattgatccaagactccagttcgtgagcatcccgcaacacactgtatctttgaatggaatcttccaatttcttcttacgttcttcgcctttgtccatcaggtcttggtatttatcatccaaagcctgctgtctgttagcaacagtgtccacttcaggagcctgcgacagaaggtcctgtgaggctttgtgggagtcaatcctcttgacatatgccgcaggaacaaaaccttgtctgtcattagtttctactttccaccaatccttgttgctagaattgagtagggtcagaatgtcacccttctgcatggacacctctcttgcagttttctcctgatagtcataaagagcaactacacattccttgtcagagatatctgtgacatgagccgctggcttgcagtgttgactttgttctctcagcccatctacaacagttccatatgctctcaagtctgacatgatggcatcatgtttggtaagcaatgcctgtgcactgtcttcatcttttccatagtcatcactggtaacaattggttctttctccttcaaccaggattctgcttcagcaacatcagctagatactgatgagcttgaagagagtcatccagatgtccttttcgcacatgtgccttatccttgaattccagccatttctggtcaaggtcatcaatcttctctttgatctcatcagcggcaaagtgcccattatctatcatttgaacaccattatcgcaaacagctctgacacgtggttcatgaccagcaatttcagtcattaaagcctggtgtttctttgacagattctgggctcctgtcaaatctcggccagtgtttgtggatgaagcaacaggttccttctcccttatccatgcctcctcatcttcaacatcatgaaggaatcgtttaagtctctcagcatcctggagctttgctttgcgagctaggagaggagcctgtagctgttggtatcggctatttaaaacctccttcttttctttgatggaaggagcatcaaagtggtcagcctccgcaaataaattagcttgtgcattaacaacctcaatcttttctgcacgcgctatgacatcagcctctatcatggcatgtttcttttggaggttttgcacacttgtcagatcctttcctacatcctcaagagcaagcaagttttcaacttcagtgaaccacaactcaacatcttcggcacctcggttgaactgctgttgctgtgctgcttctttcaatttcaaccctttatcatttgatctctcaaacagataggcccacaacttatgcagttcatcaagtctctctctgatttgatctgaggcatagtgttcatctccaatcagttgttcaccagtgttgtcaactgcatcaagccggctttgattggcattcaactcagcctcaaatgcttggtgcttctggatctttccttgcagattggttggatccttgtaagattcatcactggcaatcttcagcttttctgtaatccagctttttacttcatcacagtctctctcaaactgctgcagtttacgagattcctcaagtttcaacctgcgcgccttggacaactccccaatgttattacgcctttccattataccatctcttctctcacgcacctcatctgaggcatagtgattgctgtcaacaaggcggtttgcatactcgtcaatactattgatcttctctgcttgggcagcaaaagatttgtcaaagtcttcatgcttcctgatcagagcttctactccatcgagagaatcaccaaggttatcatcagcaaggaaggcctcttgtttagacatagtggcatctgcatgctcacaatcccgattaaaaagctgcagctccatacactgctcaaactgcacacgtctcctctcccacagctccaacagttccattttctcagtctccagactggcaagcttttctttgatctcatcagtagcataatgattggaattgacaagttcttctccatcatcggcaaacttcttaaatccatcctctgatgcatcaatataacccttatgttcctgatgacgttccagaagactttctgcactagctacatcttttgccagttcatcactttggatcaagactttcatttcatttataaaagaaatgtggtccctgtagtcactgatgaacctttgcagacgataagaatcctccaaccttgcttttcgcaccccagacttctccttaagatttccccaggcagttacaatttcatcttgcttagcagctatctcatctgcactttctggatatgcctcttgcagttgtgcagattctgaacccagggcagtgaccttatcttccactgcagccaaatctctttctagagcttcatgtttgcgtaacagagcattgacactggccagatcttttccataatcatctgatgacagaactttgtctttctcattgatccaattcttggtctcatcagcatcacgatagaagctgtgtatttgctgggcaccagccagtctcttctgacgtttgagtgccagcatcttcagcctttcccaagcttcattaacttccgcttgctttgtactgatcagttctatatctggatgaccctcatctccaagttgatgtgcaagctcattgatgtatgtgacccttgattcatttgcctgaatatccttcaagaagtcctcaaatttcttctgaagaacttccacatgttccaaatctcttcctacttcttcagaagtggcaattgcttctttttctaaaatccatgacataacttcctctgtttcatgcaagaagtggactcttttctgagtaaagagaagcatgcggcctttctctgctgatttggaaagcagcaattcccataacttgatgagtgagtcaagacgttccctgataagttcagaggcatagtgggactcactgatcatgccttcaccattttcttggagttcaataatggcattgctatgagctgaaatctctgcttcaaacg\n", "\n", "number of seqs =\n", "72890\n" ] } ], "source": [ "#fasta file\n", "!head -2 blast2go_fasta_Pdamv2.fasta\n", "!echo \n", "!echo number of seqs =\n", "!fgrep -c \">\" blast2go_fasta_Pdamv2.fasta" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">Locus_1685_Transcript_1/2_Confidence_1.000_Length_7457_transcripts_v2_1tspectrin\n", "tatacgattttatgccgtggaggtgttttcttgcagaagtttcaaattatgtcctaattgtagtgtagaacggactattggacataatttgaaacttctgcaagaaaacacctccacggcataaaatcgtatatggcgatacatgaaccactgcttcaggaacactgtcttccttttgatgtttaaaccatgcactggcccacatttttgtttgctaatataacagaacttttccgctatccctaaccaagtaacaactcatcactctaaacataataacatgcactgaagaaacattacattagtaattctcttgaacactgtgatcagttaatctgttgatgatagcttcagtaatacctagcgagctgctggggtattgtcatcttccagttacattaaaccgtgaggtgtttcgtttcttcagaaataaggaacataccacaattactgcaaatctatgccggactgactaatttagctagaagagaagagttcctcgcagaaagtcttgtagtcgtaagctcccggaacttctcggcccttgtcgtcaacgtaaggattcatacgatcaatacagaagtcggcctgctccttggtgagagactgatagagctcagcctcagtaacatagagacgttttccgccctcagtgagcgccttgaaggcgttgatgacttcttgactagagccgacattttctgtttcacgactgatcatgaaggccatgtactctcccatcgacaccacaccgtcaccgttgggatccaccgttctgaggatgctttggaactccggatcttcttcgccctcttctacaatggagagatcgtagccaagagaacggaggcatgacttgaattcttgatgatccaggtaaccagtcttatccttgtcaaagtgcttgaacatgatagtgaattctttgagtgtatcctcagaaacgccagtggtattgcgggcctggatttgctgttcaagattgtgtttcatgcgcatggcaagttgatccagctgatcccactgctgagcaaggtccactgtgctatgttcggtatacttgttgtccaggataagagcctcttccatttgagccccaagatcctccaggatcctaaggtcctctttcctgtctgctatctcagcactcttcttcttgacctcggccagttgatcttcaaggtccccttgaccgtcaaccattgcaacccttgtatcagagagccaagcgtggaacgagttggcagcttgtgcaaattcctgcctgaggttgtcattatactcctgacgctgggcttctttgcgcaaatcatcctcgcgttcctcaataatcttctgcagattttcccacgtgtcttccaatgcttccatagtaaaccaagtataagggttgatggagacgttatagctcttgatctgacgatcaagcttcctcaacatcataatgtcgtcttccgcttggtccagagaagcgcggaactgggtgtggccatcttgcaaagcacgtatctcctcaacagagttgcagcgcactggatcggtcagatcttcctccgcattctcgaaccaactgttaaaagcagaggctttcttggcaaacaacaggaacaagtcctccaccttcttgtattgatcctgggcatgttgaagtcgttcttttcgagtcttagagtcttccagcagctgctcccacctcctaataaggtcatcatgacgtttgatgatggcaggcgactgctcgtgttgtgattggacaagctcgtctttcagcgcagtcacacgtgcaatgccttcgttttcaaatgcttggagacctgagtcaaaggtttcctgtttggtgaaaagtgtttgcaccgaagacaaatcgcgccccagatcatctgaccgagcaattccttccttgtcaccaatccaggattccactacatcggccttccaattgaactgcaggaaagctgagttgtcattgagcttggacttgcgataggacgccattctctccaactcggtaagtttactcttaatggatgcaatcctgtggtcaatcaactcagattggtggttacccttttcaatgagcttgttgccagcatcttcaatgtcctgaactctttcccgatgaacctcaagatctgtctcgaatgcttcgtgcttctttaaaagaccttggactgcagccagtgtatcaccatagtcgtcacttccaacaagagtgttcttctcattgatccacgattcctcttcaccaacattagcgctgaactgttgatattccagagactcatctagcttgtgctgcctctgctcagccaggtttttcagttcgtcccagttttcctgcagctgatcacagcgagctttgatttcatcagagctactgtgtccttcgtcaataaatttacggccacacagtagtacagcctgaattctggcctcatgggtgttcaattcagcttccaaacgctgatgtttcttacgtaggttttgtacaccagtcaagtcctttccataatcttcagagctggtcaatagctttttctcctttatccaggactcttcgtcgtccacatctcggtaaaactggtgcaaagcgttggactcgtccaacttcttatggcgatctgcggccatgtcttttactttgtcgtagcgttctgcaataatgcgggatttgtcttgcagtgagtcagcatcaaaatggccaacctcagcgaagtgctgggtttgggcttgaagatcagtgatacgatcctcgtgagcagcaatatctgcttcaacaagttggtgcttcttgatcagattctgaacactggccagatctcgaccatgatcatcgtgagacaaggaagcttcaacctctcccagccagaaatccagttccttaacatttgtattaaattgctgttgttgattagattccttcagatgttgactcttctcattggacttctgtactaggtattcccactgctgctgaagtttggtgattctctctttcacaatctcctcacttccggcacacttgcgctgctcaatgagaccttcagctaggttgatggtctccaagactctctcctgattggctgagacttcagcttcaaacgcctgatgcttttggaacttggactgtaagttggttggatccttgtaagattcatccagcactgtctgtaacttttcactgacccatgcttcaatgtcctcagcatcacgactgaactgctgaatggttttggactcacccagttttgatcggcgttcaaccaaggctgccttcagagtagcccatctcgctaagactgcagaaatcctctctgcaatagctggagagtcataatgattgttatcaatcagccgatcagcattgtctttcagagcattgatcttcacgtcctgcactgcaagtgtctttgtgaagtcttcatgtttcttgatcagagcttcagcaccctccgctgcttctccgacatcttcgctctgaatgatggcctcacgtgtcgccatccactgctccagttgttcggcatctcgattgaataactgcaactccaaacattcatcgagtctttgcttgcgagaagcccaggccttttccagttcttctcgttctgtagccatggtctccagtttttcacggatgtcgggactagcataatgttctttgtccaataatttcttgccgaaatcttcaaacgactggaactcgctgtcacgcgcatcaatttctgcgcgatgttcctgatgcctgtccaacagagcttctgcactggccacatcctttgcaagttcatcagaagttaccaatgccataataccattgatccatgaagtaagatctctgaattcgttgaggaaatggaagtaatctgacgagtcactcagtttgccccttcttgcagccgcctgttctctgagattagcccacgcttcttctagctcgtgtctcttccattcaatgtcttgtgctgcattcggatgactggatgttaacttagcagcttccacgttgagttcacgaaccttctcttccaatgcggccaagtctctctccaaaacttcatgctttctctgcaaggcctggacactagccaaatcccgaccataatctgaggtggaaagagcagtgtctttctctgaaatccaagccttggtgtcatcagcatccctgtggaatttctgaatttcctgagcattatccaaattttccttcctctgctctgacatggtcttaagattggtccatttctggttcagcctctcaatcatctctcggatcatctcatattctctgtagtgttcaatctttagtttctcagccagtaatgtcagttctctgatacgaacctctttagaaatcatatccttttcaaagtcatcaaacttcttctgctgagcttccacatgttcgtagtcctttccgatttcctctgaggtgacaattgcttccttgtcattgatccaagactccagttcgtgagcatcccgcaacacactgtatctttgaatggaatcttccaatttcttcttacgttcttcgcctttgtccatcaggtcttggtatttatcatccaaagcctgctgtctgttagcaacagtgtccacttcaggagcctgcgacagaaggtcctgtgaggctttgtgggagtcaatcctcttgacatatgccgcaggaacaaaaccttgtctgtcattagtttctactttccaccaatccttgttgctagaattgagtagggtcagaatgtcacccttctgcatggacacctctcttgcagttttctcctgatagtcataaagagcaactacacattccttgtcagagatatctgtgacatgagccgctggcttgcagtgttgactttgttctctcagcccatctacaacagttccatatgctctcaagtctgacatgatggcatcatgtttggtaagcaatgcctgtgcactgtcttcatcttttccatagtcatcactggtaacaattggttctttctccttcaaccaggattctgcttcagcaacatcagctagatactgatgagcttgaagagagtcatccagatgtccttttcgcacatgtgccttatccttgaattccagccatttctggtcaaggtcatcaatcttctctttgatctcatcagcggcaaagtgcccattatctatcatttgaacaccattatcgcaaacagctctgacacgtggttcatgaccagcaatttcagtcattaaagcctggtgtttctttgacagattctgggctcctgtcaaatctcggccagtgtttgtggatgaagcaacaggttccttctcccttatccatgcctcctcatcttcaacatcatgaaggaatcgtttaagtctctcagcatcctggagctttgctttgcgagctaggagaggagcctgtagctgttggtatcggctatttaaaacctccttcttttctttgatggaaggagcatcaaagtggtcagcctccgcaaataaattagcttgtgcattaacaacctcaatcttttctgcacgcgctatgacatcagcctctatcatggcatgtttcttttggaggttttgcacacttgtcagatcctttcctacatcctcaagagcaagcaagttttcaacttcagtgaaccacaactcaacatcttcggcacctcggttgaactgctgttgctgtgctgcttctttcaatttcaaccctttatcatttgatctctcaaacagataggcccacaacttatgcagttcatcaagtctctctctgatttgatctgaggcatagtgttcatctccaatcagttgttcaccagtgttgtcaactgcatcaagccggctttgattggcattcaactcagcctcaaatgcttggtgcttctggatctttccttgcagattggttggatccttgtaagattcatcactggcaatcttcagcttttctgtaatccagctttttacttcatcacagtctctctcaaactgctgcagtttacgagattcctcaagtttcaacctgcgcgccttggacaactccccaatgttattacgcctttccattataccatctcttctctcacgcacctcatctgaggcatagtgattgctgtcaacaaggcggtttgcatactcgtcaatactattgatcttctctgcttgggcagcaaaagatttgtcaaagtcttcatgcttcctgatcagagcttctactccatcgagagaatcaccaaggttatcatcagcaaggaaggcctcttgtttagacatagtggcatctgcatgctcacaatcccgattaaaaagctgcagctccatacactgctcaaactgcacacgtctcctctcccacagctccaacagttccattttctcagtctccagactggcaagcttttctttgatctcatcagtagcataatgattggaattgacaagttcttctccatcatcggcaaacttcttaaatccatcctctgatgcatcaatataacccttatgttcctgatgacgttccagaagactttctgcactagctacatcttttgccagttcatcactttggatcaagactttcatttcatttataaaagaaatgtggtccctgtagtcactgatgaacctttgcagacgataagaatcctccaaccttgcttttcgcaccccagacttctccttaagatttccccaggcagttacaatttcatcttgcttagcagctatctcatctgcactttctggatatgcctcttgcagttgtgcagattctgaacccagggcagtgaccttatcttccactgcagccaaatctctttctagagcttcatgtttgcgtaacagagcattgacactggccagatcttttccataatcatctgatgacagaactttgtctttctcattgatccaattcttggtctcatcagcatcacgatagaagctgtgtatttgctgggcaccagccagtctcttctgacgtttgagtgccagcatcttcagcctttcccaagcttcattaacttccgcttgctttgtactgatcagttctatatctggatgaccctcatctccaagttgatgtgcaagctcattgatgtatgtgacccttgattcatttgcctgaatatccttcaagaagtcctcaaatttcttctgaagaacttccacatgttccaaatctcttcctacttcttcagaagtggcaattgcttctttttctaaaatccatgacataacttcctctgtttcatgcaagaagtggactcttttctgagtaaagagaagcatgcggcctttctctgctgatttggaaagcagcaattcccataacttgatgagtgagtcaagacgttccctgataagttcagaggcatagtgggactcactgatcatgccttcaccattttcttggagttcaataatggcattgctatgagctgaaatctctgcttcaaacg\n", ">Locus_1685_Transcript_2/2_Confidence_1.000_Length_7457_transcripts_v2_2tspectrin\n", "tatacgattttatgccgtggaggtgttttcttgcagaagtttcaaattatgtcctaattgtagtgtagaacggactattggacataatttgaaacttctgcaagaaaacacctccacggcataaaatcgtatatggcgatacatgaaccactgcttcaggaacactgtcttccttttgatgtttaaaccatgcactggcccacatttttgtttgctaatataacagaacttttccgctatccctaaccaagtaacaactcatcactctaaacataataacatgcactgaagaaacattacattagtaattctcttgaacactgtgatcagttaatctgttgatgatagcttcagtaatacctagcgagctgctggggtattgtcatcttccagttacattaaaccgtgaggtgtttcgtttcttcagaaataaggaacataccacaattactgcaaatctatgccggactgactaatttagctagaagagaagagttcctcgcagaaagtcttgtagtcgtaagctcccggaacttctcggcccttgtcgtcaacgtaaggattcatacgatcaatacagaagtcggcctgctccttagtgagagactgatagagctcagcctcagtaacatagagacgttttccaccctcagtgagcgccttgaaggcgttgatgacttcctggctagagccaacattttctgtttcacgactgatcatgaaggccatgtactctcccatcgacaccacaccgtcaccgttgggatccaccgttctgaggatgctttggaactccggatcttcttcgccctcttctacaatggagagatcgtagccaagagaacggaggcatgacttgaattcttgatgatccaggtaaccagtcttatccttgtcaaagtgcttgaacatgatagtgaattctttgagtgtatcctcagaaacgccagtggtattgcgggcctggatttgctgttcaagattgtgtttcatgcgcatggcaagttgatccagctgatcccactgctgagcaaggtccactgtgctatgttcggtatacttgttgtccaggataagagcctcttccatttgagccccaagatcctccaggatcctaaggtcctctttcctgtctgctatctcagcactcttcttcttgacctcggccagttgatcttcaaggtccccttgaccgtcaaccattgcaacccttgtatcagagagccaagcgtggaacgagttggcagcttgtgcaaattcctgcctgaggttgtcattatactcctgacgctgggcttctttgcgcaaatcatcctcgcgttcctcaataatcttctgcagattttcccacgtgtcttccaatgcttccatagtaaaccaagtataagggttgatggagacgttatagctcttgatctgacgatcaagcttcctcaacatcataatgtcgtcttccgcttggtccagagaagcgcggaactgggtgtggccatcttgcaaagcacgtatctcctcaacagagttgcagcgcactggatcggtcagatcttcctccgcattctcgaaccaactgttaaaagcagaggctttcttggcaaacaacaggaacaagtcctccaccttcttgtattgatcctgggcatgttgaagtcgttcttttcgagtcttagagtcttccagcagctgctcccacctcctaataaggtcatcatgacgtttgatgatggcaggcgactgctcgtgttgtgattggacaagctcgtctttcagcgcagtcacacgtgcaatgccttcgttttcaaatgcttggagacctgagtcaaaggtttcctgtttggtgaaaagtgtttgcaccgaagacaaatcgcgccccagatcatctgaccgagcaattccttccttgtcaccaatccaggattccactacatcggccttccaattgaactgcaggaaagctgagttgtcattgagcttggacttgcgataggacgccattctctccaactcggtaagtttactcttaatggatgcaatcctgtggtcaatcaactcagattggtggttacccttttcaatgagcttgttgccagcatcttcaatgtcctgaactctttcccgatgaacctcaagatctgtctcgaatgcttcgtgcttctttaaaagaccttggactgcagccagtgtatcaccatagtcgtcacttccaacaagagtgttcttctcattgatccacgattcctcttcaccaacattagcgctgaactgttgatattccagagactcatctagcttgtgctgcctctgctcagccaggtttttcagttcgtcccagttttcctgcagctgatcacagcgagctttgatttcatcagagctactgtgtccttcgtcaataaatttacggccacacagtagtacagcctgaattctggcctcatgggtgttcaattcagcttccaaacgctgatgtttcttacgtaggttttgtacaccagtcaagtcctttccataatcttcagagctggtcaatagctttttctcctttatccaggactcttcgtcgtccacatctcggtaaaactggtgcaaagcgttggactcgtccaacttcttatggcgatctgcggccatgtcttttactttgtcgtagcgttctgcaataatgcgggatttgtcttgcagtgagtcagcatcaaaatggccaacctcagcgaagtgctgggtttgggcttgaagatcagtgatacgatcctcgtgagcagcaatatctgcttcaacaagttggtgcttcttgatcagattctgaacactggccagatctcgaccatgatcatcgtgagacaaggaagcttcaacctctcccagccagaaatccagttccttaacatttgtattaaattgctgttgttgattagattccttcagatgttgactcttctcattggacttctgtactaggtattcccactgctgctgaagtttggtgattctctctttcacaatctcctcacttccggcacacttgcgctgctcaatgagaccttcagctaggttgatggtctccaagactctctcctgattggctgagacttcagcttcaaacgcctgatgcttttggaacttggactgtaagttggttggatccttgtaagattcatccagcactgtctgtaacttttcactgacccatgcttcaatgtcctcagcatcacgactgaactgctgaatggttttggactcacccagttttgatcggcgttcaaccaaggctgccttcagagtagcccatctcgctaagactgcagaaatcctctctgcaatagctggagagtcataatgattgttatcaatcagccgatcagcattgtctttcagagcattgatcttcacgtcctgcactgcaagtgtctttgtgaagtcttcatgtttcttgatcagagcttcagcaccctccgctgcttctccgacatcttcgctctgaatgatggcctcacgtgtcgccatccactgctccagttgttcggcatctcgattgaataactgcaactccaaacattcatcgagtctttgcttgcgagaagcccaggccttttccagttcttctcgttctgtagccatggtctccagtttttcacggatgtcgggactagcataatgttctttgtccaataatttcttgccgaaatcttcaaacgactggaactcgctgtcacgcgcatcaatttctgcgcgatgttcctgatgcctgtccaacagagcttctgcactggccacatcctttgcaagttcatcagaagttaccaatgccataataccattgatccatgaagtaagatctctgaattcgttgaggaaatggaagtaatctgacgagtcactcagtttgccccttcttgcagccgcctgttctctgagattagcccacgcttcttctagctcgtgtctcttccattcaatgtcttgtgctgcattcggatgactggatgttaacttagcagcttccacgttgagttcacgaaccttctcttccaatgcggccaagtctctctccaaaacttcatgctttctctgcaaggcctggacactagccaaatcccgaccataatctgaggtggaaagagcagtgtctttctctgaaatccaagccttggtgtcatcagcatccctgtggaatttctgaatttcctgagcattatccaaattttccttcctctgctctgacatggtcttaagattggtccatttctggttcagcctctcaatcatctctcggatcatctcatattctctgtagtgttcaatctttagtttctcagccagtaatgtcagttctctgatacgaacctctttagaaatcatatccttttcaaagtcatcaaacttcttctgctgagcttccacatgttcgtagtcctttccgatttcctctgaggtgacaattgcttccttgtcattgatccaagactccagttcgtgagcatcccgcaacacactgtatctttgaatggaatcttccaatttcttcttacgttcttcgcctttgtccatcaggtcttggtatttatcatccaaagcctgctgtctgttagcaacagtgtccacttcaggagcctgcgacagaaggtcctgtgaggctttgtgggagtcaatcctcttgacatatgccgcaggaacaaaaccttgtctgtcattagtttctactttccaccaatccttgttgctagaattgagtagggtcagaatgtcacccttctgcatggacacctctcttgcagttttctcctgatagtcataaagagcaactacacattccttgtcagagatatctgtgacatgagccgctggcttgcagtgttgactttgttctctcagcccatctacaacagttccatatgctctcaagtctgacatgatggcatcatgtttggtaagcaatgcctgtgcactgtcttcatcttttccatagtcatcactggtaacaattggttctttctccttcaaccaggattctgcttcagcaacatcagctagatactgatgagcttgaagagagtcatccagatgtccttttcgcacatgtgccttatccttgaattccagccatttctggtcaaggtcatcaatcttctctttgatctcatcagcggcaaagtgcccattatctatcatttgaacaccattatcgcaaacagctctgacacgtggttcatgaccagcaatttcagtcattaaagcctggtgtttctttgacagattctgggctcctgtcaaatctcggccagtgtttgtggatgaagcaacaggttccttctcccttatccatgcctcctcatcttcaacatcatgaaggaatcgtttaagtctctcagcatcctggagctttgctttgcgagctaggagaggagcctgtagctgttggtatcggctatttaaaacctccttcttttctttgatggaaggagcatcaaagtggtcagcctccgcaaataaattagcttgtgcattaacaacctcaatcttttctgcacgcgctatgacatcagcctctatcatggcatgtttcttttggaggttttgcacacttgtcagatcctttcctacatcctcaagagcaagcaagttttcaacttcagtgaaccacaactcaacatcttcggcacctcggttgaactgctgttgctgtgctgcttctttcaatttcaaccctttatcatttgatctctcaaacagataggcccacaacttatgcagttcatcaagtctctctctgatttgatctgaggcatagtgttcatctccaatcagttgttcaccagtgttgtcaactgcatcaagccggctttgattggcattcaactcagcctcaaatgcttggtgcttctggatctttccttgcagattggttggatccttgtaagattcatcactggcaatcttcagcttttctgtaatccagctttttacttcatcacagtctctctcaaactgctgcagtttacgagattcctcaagtttcaacctgcgcgccttggacaactccccaatgttattacgcctttccattataccatctcttctctcacgcacctcatctgaggcatagtgattgctgtcaacaaggcggtttgcatactcgtcaatactattgatcttctctgcttgggcagcaaaagatttgtcaaagtcttcatgcttcctgatcagagcttctactccatcgagagaatcaccaaggttatcatcagcaaggaaggcctcttgtttagacatagtggcatctgcatgctcacaatcccgattaaaaagctgcagctccatacactgctcaaactgcacacgtctcctctcccacagctccaacagttccattttctcagtctccagactggcaagcttttctttgatctcatcagtagcataatgattggaattgacaagttcttctccatcatcggcaaacttcttaaatccatcctctgatgcatcaatataacccttatgttcctgatgacgttccagaagactttctgcactagctacatcttttgccagttcatcactttggatcaagactttcatttcatttataaaagaaatgtggtccctgtagtcactgatgaacctttgcagacgataagaatcctccaaccttgcttttcgcaccccagacttctccttaagatttccccaggcagttacaatttcatcttgcttagcagctatctcatctgcactttctggatatgcctcttgcagttgtgcagattctgaacccagggcagtgaccttatcttccactgcagccaaatctctttctagagcttcatgtttgcgtaacagagcattgacactggccagatcttttccataatcatctgatgacagaactttgtctttctcattgatccaattcttggtctcatcagcatcacgatagaagctgtgtatttgctgggcaccagccagtctcttctgacgtttgagtgccagcatcttcagcctttcccaagcttcattaacttccgcttgctttgtactgatcagttctatatctggatgaccctcatctccaagttgatgtgcaagctcattgatgtatgtgacccttgattcatttgcctgaatatccttcaagaagtcctcaaatttcttctgaagaacttccacatgttccaaatctcttcctacttcttcagaagtggcaattgcttctttttctaaaatccatgacataacttcctctgtttcatgcaagaagtggactcttttctgagtaaagagaagcatgcggcctttctctgctgatttggaaagcagcaattcccataacttgatgagtgagtcaagacgttccctgataagttcagaggcatagtgggactcactgatcatgccttcaccattttcttggagttcaataatggcattgctatgagctgaaatctctgcttcaaacg\n", ">Locus_177_Transcript_12/12_Confidence_0.500_Length_6585_transcripts_v2_3tvitellogenin\n", "gctcgagtttgtgtgatgcctctttacagagagcacagattccttggtttggctcggcctcaaacaaagcttgcttcccacaggtgtaacaaagacgattttcatatgacgcggatgcagtccgtcactcttctcagcattatgcgaaggcgaattcaagggcaagtcactgatatagcgccatttctcctcatatcccttctgtggttcaatatcttcgatcatgtgttggaataacgtgacttgatccgtgttcatagggacggttgcgcgggtccattccacagggttcttggatgcatctttgtgaatatacacaactgtcgcgaggcgaacatgacggcgaccattcacgaatgtgatcttgaagagcttggggtagcgattatcatgcacagtctctaaacacccttgggaatttggcctcaccttaacgaatccctttccggtgacaaaaagtttcacaaattgatgatttccgtcgataaagaagactttgaagacagtaggtagctcgcttaagacgagttcattgtgctcattcacagacaggtattttccatttcctgccttcagagtccagttggcataacgatgatgatgatgtcccctggcaatctcagtaacttctccgatctcaaatccgtccagctctgtgctcctgtagattgaagaccaactcttggaacctgtgacaggtttcctgtacatcagccaggtgttgctcgccttgttagttgataaaatctcctgtctctcttgtggcaagtccatttctgtggtgtagctttgatttctgatgttgtactggaatctgccacctaatccagtagtgatattggtgtggatcgttagcttgctaccagtctccaactgaggcagagatactttcatttttccccaaagagaatgaagaacactaatgttcaattttccactaagagtgacttgatcgggaagaggaatgaagaatcggggttgaacctcagctttgaccgtggtgttcaaacgcaggaacaaaacgctgtccacagagaaatccaggggcagaccggcacacgagggctggcgtatcttcacttcaattggacggaggattttggagatattccattttttacctttcttcagcaggtccttcagatcgttgctaactctgatgactccatcttccattaactctttgatgtcatcataggtgaaataattatagaacagttcttttccaaaaactttgaagtagaaggaacctttggctttctcctcttcatgttcagcggttggcagctggtgttggatttccttgatttctttcccattcagactgttgtctaagcttcgtttcggtctgagcaagccaaacagactcttatcttcgctgtagtgtccccttggtcccatgacacgttggagcagctcttggattccctcaccacggatgccagtctcgaatagattcagagattttccaagcataccggtgtgtagtttgactttagagttgatagctctcgggatgaagcttgatggcgtggaaatcatgcgaagatcaactgcacttcccatctggagaagatcggagaagtcgccaaagtgaagccatttggagttcactgggtttatcttgtatctcttaagcgctcgaagggcgattcggagagcgattcgtgcggtttgagccattttttcgtcacatgggtatctagagaaagccatccccttgatgtaagacaccacaaaagatcgcacttgattggtgcgctcgtaatagagctgcttgactaccaagttgaacaccgctggaccaggtttgcagtcgcagatgatcacaaaacaagccatgcgcagttcggcgtcgttgtttggttgacggaagatctctaacaacactggcaataccttttgtgaaattttcggagcgattcttcgtagagcatacacagctgtgacgcgtaattccagagagttcctgttgtccttgataatttccaaaagaggttggtaaaaatctctgtgaccgaaattaccgatgcccttcaagatgaagatcttttcacggtaactggcatactgcaaacgactcttaatctcagccacttggttggcaaggcaagtgacgtccttgtctttgcagaagtcatgcatcaaggcaccggcagtaaggtaacaggttttcttcagagtctcgtctgcttggacaacagaaccagaacagagatccacaacggtcaagcacatctttcctgttgggtttggagttaaggctagaccacgaataagtgagactgccctcccagagtccagttccaatgtcttgattttttctgagatgagttcaaagacctcctccttgtacacataaggcagagcttccagcagccattgcctgcgtttctcgtcattgtagcagttgtcccagatcttgataagagtttttttgcaggtcctgcgtaaggtttccaccaaacgactgaactgccaagacatcaaattattctcctccacgctcttgatcaggtcgtcgatcaattttctcaccatgtttgcagtctgggtacgttgttcctcagtctctgctttgtcctcatcctcatccacagtcatttccaatgtcctagctgtaacaccactaacggtatagctttgggattcagacctcacttccttgaatttcaggtattgcttggtcaaagttctggcatttcctttcatctgtgttggtgcaaaaatgtatcttcccatcgttctacaatccttcaccacaaattgttcctcggttccgaaaagatcacatgatgctgacacagagatctgtaggggctcatcaatgacgctgcagtcctcgctaagacagcgcactcctggtacgtttgaaaacacgcggggttttccaatgcagttgtgcaggtttaaggtctttgtaatttgcaattccttgattgtgttggtctccagtgtgttgtgacgcacatcatacaggttttcacacattccatgagtgctgagctcccagttcttgtacagtcttggttcacggtgatgtaatgaagactcaggcttggtgaagttgtggtggatcatggaaagaactcctcgtttgatgttcaaaatgtactcaggctcatcgtcattgcagaagatctgaccaacttttccatctttgtactcaaacttgattggtcgttccagcaacggcttcaattcattggagactggggagacagcgtgtgacacagacttctacttcgaaacagacactttggcttatttgaagataagaaacccaattctgtacgaagttaacgtaacatccttcgaccaatcagagtcacacactgtctccccagtctccaatgaattgaagccgttgctggaacgaccaatcaagtttgagtacaaagatggaaaagttggtcagatcttctgcaatgacgatgagcctgagtacattttgaacatcaaacgaggagttctttccatgatccaccacaacttcaccaagcctgagtcttcattacatcaccgtgaaccaagactgtacaagaactgggagctcagcactcatggaatgtgtgaaaacctgtatgatgtgcgtcacaacacactggagaccaacacaatcaaggaattgcaaattacaaagaccttaaacctgcacaactgcattggaaaaccccgcgtgttttcaaacgtaccaggagtgcgctgtcttagcgaggactgcagcgtcattgatgagcccctacagatctctgtgtcagcatcatgtgatcttttcggaaccgaggaacaatttgtggtgaaggattgtagaacgatgggaagatacatttttgcaccaacacagatgaaaggaaatgccagaactttgaccaagcaatacctgaaattcaaggaagtgaggtctgaatcccaaagctataccgttagtggtgttacagctaggacattggaaatgactgtggatgaggatgaggacaaagcagagactgaggaacaacgtacccagactgcaaacatggtgagaaaattgatcgacgacctgatcaagagcgtggaggagaataatttgatgtcttggcagttcagtcgtttggtggaaaccttacgcaggacctgcaaaaaaactcttatcaagatctgggacaactgctacaatgacgagaaacgcaggcaatggctgctggaagctctgccttatgtgtacaaggaggaggtctttgaactcatctcagaaaaaatcaagacattggaactggactctgggagggcagtctcacttattcgtggtctagccttaactccaaacccaacaggaaagatgtgcttgaccgttgtggatctctgttctggttctgttgtccaagcagacgagactctgaagaaaacctgttaccttactgccggtgccttgatgcatgacttctgcaaagacaaggacgtcacttgccttgccaaccaagtggctgagattaagagtcgtttgcagtatgccagttaccgtgaaaagatcttcatcttgaagggcatcggtaatttcggtcacagagatttttaccaacctcttttggaaattatcaaggacaacaggaactctctggaattacgcgtcacagctgtgtatgctctacgaagaatcgctccgaaaatttcacaaaaggtattgccagtgttgttagagatcttccgtcaaccaaacaacgacgccgaactgcgcatggcttgttttgtgatcatctgcgactgcaaacctggtccagcggtgttcaacttggtagtcaagcagctctattacgagcgcaccaatcaagtgcgatcttttgtggtgtcttacatcaaggggatggctttctctagatacccatgtgacgaaaaaatggctcaaaccgcacgaatcgctctccgaatcgcccttcgagcgcttaagagatacaagataaacccagtgaactccaaatggcttcactttggcgacttctccgatcttctccagatgggaagtgcagttgatcttcgcatgatttccacgccatcaagcttcatcccgagagctatcaactctaaagtcaaactacacaccggtatgcttggaaaatctctgaatctattcgagactggcatccgtggtgagggaatccaagagctgctccaacgtgtcatgggaccaaggggacactacagcgaagataagagtctgtttggcttgctcagaccgaaacgaagcttagacaacagtctgaatgggaaagaaatcaaggaaatccaacaccagctgccaaccgctgaacatgaagaggagaaagccaaaggttccttctacttcaaagtttttggaaaagaactgttctataattatttcacctatgatgacatcaaagagttaatggaagatggagtcatcagagttagcaacgatctgaaggacctgctgaagaaaggtaaaaaatggaatatctccaaaatcctccgtccaattgaagtgaagatacgccagccctcgtgtgccggtctgcccctggatttctctgtggacagcgttttgttcctgcgtttgaacaccacggtcaaagctgaggttcaaccccgattcttcattcctcttcccgatcaagtcactcttagtggaaaattgaacattagtgttcttcattctctttggggaaaaatgaaagtatctctgcctcagttggagactggtagcaagctaacgatccacaccaatatcactactggattaggtggcagattccagtacaacatcagaaatcaaagctacaccacagaaatggacttgccacaagagagacaggagattttatcaactaacaaggcgagcaacacctggctgatgtacaggaaacctgtcacaggttccaagagttggtcttcaatctacaggagcacagagctggacggatttgagatcggagaagttactgagattgccaggggacatcatcatcatcgttatgccaactggactctgaaggcaggaaatggaaaatacctgtctgtgaatgagcacaatgaactcgtcttaagcgagctacctactgtcttcaaagtcttctttatcgacggaaatcatcaatttgtgaaactttttgtcaccggaaagggattcgttaaggtgaggccaaattcccaagggtgtttagagactgtgcatgataatcgctaccccaagctcttcaagatcacattcgtgaatggtcgccgtcatgttcgcctcgcgacagttgtgtatattcacaaagatgcatccaagaaccctgtggaatggacccgcgcaaccgtccctatgaacacggatcaagtcacgttattccaacacatgatcgaagatattgaaccacagaagggatatgaggagaaatggcgctatatcagtgacttgcccttgaattcgccttcgcataatgctgagaagagtgacggactgcatccgcgtcatatgaaaatcgtctttgttacacctgtgggaagcaagctttgtttgaggccgagccaaaccaaggaatctgtgctctctgtaaagaggcatcacacaaactcgagcgaactgtgttatggcaaacaaatgct\n", ">transcripts_v2_5t---NA---\n", "gtgaagatgaggaatctaaggaatggcctgtttgcaaattgagttactggatttctctggcaagtagttttggatcaatttttcatactatttttcttgtgtgataagcaaatgattgcattccctgagacagttttttccaggacggtgaaatcagtgaaacaagagcctgtatgtcactagtgactctgagtaaggccttcatttgagcccagcattgcagtggcacttgtttaagttcttggagaagtgactgtaatcaactgatgacctctgcacagcagcaagaactagatgcaggcaaaagttttaaagtccctcagctttgctctgtagcatgctacctaaagttttcagatagtgagaattcatctgatggaagaagctaaatctttggattactatgagagtcagaaagcagtgtgtaacatgggtattactcataatttcaaagtatgtcaaggatgtcattaaggtagtgaacattttcttgagcttttgagtcttttttttctcttgtttattctgaattcggttttcagcagttttgtggaggaatctgtcggcaatgatcagtggtgattttctgccatagtggtggtgttaaatttcaattggcaacttcttgaggtggcattccagcttttagcaaggtgcaagttccactatcacttttagatgttaaatcagcaggcaaacccaaccaatttggacatttcaccattcacgtggctttcagaaaaaggcttgcagcatatcagtaggactggtctgtatgggtctggcctcaggtagctgaatttaactgagataatcctgcagaaaaaaagaggacagaagtacacgtacaatgactgttggtggaacttcctcagaagtggtaatgttaggcaaccaaattcattcttccattgttttaaagaaagaatcagaaacactttgcaccatttcaattgatatgagcagtcctaattatctggtaaggaaatatttcaaacaagtttatgcatggactcagttattttgggtatattttttggatctcttttcaagtgtctttcagaatgtatacagtctgtgtacagtagcagtttcttggaaaagcatttttctcaattttgaacaagacttcttagcatccagcagctttccctggtccttgttaaaaaccttaaatgtgcatcaagcatgaaggaaggccctactaacccaaaggtgtgaacttaatgggtattaaacgttctccgccttacaatgggtgcagtttgtgtggaaactttttgccaggttttcagccccagcgtgtggttcacaattcactctattggtggaattagtgcattaggacaagggcaattttcagagtaatcctagggtcaggaggcacattttcagggtcaaatgtaagtgcgtgctgtaattcttctcaagtgagttttgtcctattggctaccctgtcagttttttttgtgagttgtgtgtgcatggtttttaatgaatttgttaggaatatgtatgcagagttagcaacgatggttttttaggactgtattgtacccaattcaagtccaaaatggaattgtcagaccataaagtggtttttcagaatttggcaagcacctcggaggcaaattcagtggtacacaaaagactgtattgtactcaattcaagtccaaaatggaattgtcagaccaccaagaaatttttcagaatttggcaagcacctcagaggcaaattcagtggtacacaaaagaagtgaatataccacatcccggagaacttaaaccttcttcagaaatgctgagtaaagttaaccactctgaagtccttcatcattttggaccaaagcaacgcttttacctgtttgatattaatatttacaatagtgcagaccatctggtaagggaaaccttcaaatcaaccatatatgagtatgtatttcctgtggatttcttttttaaggagtcttttttcaggggataagcctagttatttcagcagcactttaccaaaccaactgaaattatggagcagctttgaaaattatccaagtccttttgtgccaagttagcatcactgtggtaatatccagtccagttaagtggttaaaagttaacctcctagatcctcgaaagagttttgccatacctgtccaagttaagggccatgtgctgtcactcaagtgtagctgaacatgctgtgtcgtagagaaccacatccctcttgtgtatttatgtgccagtgcattgtctataccagtttcaggactgcgcaatgtaccgtgtattttgttgcaacaaatttatcagaacttgtcactctggggtaaaacaaccagcatattgggtgcattttcgacctattcctaaagagcaaattcatctacacacatcttgtattccatgtgttgtgccacctgccttttagatgtcaagtgtgtattgcttcatggaaagtgtctgtctgttcaacaaaaaacagcccactcatcaacaaacttttaagatgaacatttcaaaaaatctggttatgttactccttcaaagtctgtgattgtcttgctcccatgggcagtcccaagtcctagtcactgattctctaagtaggctgatcagaaatttaccagtacattcatggaggtgcaattcacatctgaatccttacttggtggttaaagattcttgtagaaggttaaattaaaggttggttttatgttttggttaatttgtagtatttgtaattagacaattcatatttctcaattaagttatccagggatgaggtaatataaagaccatactgtctctcaagaattgttcctatgtacttgattaagtgacttaattaataagatatagaattaagtttaacctagaagtatcatcaaaaccagtacatactttgtcagaccttacagatattgagtatcaccaagtaatggaagttttttggcttcggcaagtatttgttttggaatgttgccttttttgttgttaatatgaaaagtatgctttgtttgtctgtaattgcgtgtttttcatgtcatgagaatcagtggaagggtttactgtgaaataccatctactaagatttatttctcttagaaatgcatcattaactattgttaacatattagctccatacctatttatgcttgtgtataaagtcaaaactgtgaagagctttgtctgtgattggggggatatttttgttctcttttgattgttaccaggcaaatttgcattttttggatggaatgtcaagaatatgaaaaaggcaagcttaactttctgatctttagtttcattatagattactctttttttcattttgtgatctaacatgcattttgatagtggtagtttgtaattaacctattccacacgtttttttttttaaatttatttatttactgtaactgaaaatcttagttaggttggaatacttcattatgtaatctaggatttattatgggcaaacagtgcatatttttgtcaacctctaaagcaactaatgtgatagatgaagctcagtttcactcatttggtctgtgagcaaaagcagctgggaatactttttacaggataacaatcaaggcttaatcaaaaatttctgctctgaactgcaagttgtcatcatgcagcaatcatttggttgatgcaggtattgattttaactgttgccttttgaattgttaggtctttgaaatattttgcatactctgattaattcacagcgcttttttctggatcaacaattgttgccaggtcatctttgcccaaaatggttatggccttgacttgctgtttgctgtcggtcagttcagtagtttgtcttgttcaaaatttttatgcagaaatctgtgcatatatcctctctacacagtgttgtaatcttgttggaaatgaagggctttcaaatgcatcctcttcatccagttgatacaggtatcgatgtgaccatttggcatctgctttaattgtttctctgagatgcattacatactttgatcaattcatagtgatttctgttggaatatactagaattcaacttaattttgaagttttcttggcatcgactttcaaggctgagttggtcagtgattgaagttcaaaattgtcgcccagaatcatttcatttcatctcttcaagcagatttgtattttcactgggtgttgtcacccttccaagtgctgcctcgtcatttagtttacacaggtattcgcttgaccttttgccacctggagcattttttctttgaaatatattgcatactctgatctttttacagcattttttattgaatgtgctactgttgacagctcaacttggcctaagattttatggccttcactttccggcatgcataggccagttccatgatgtggcagttgaaatcgatgtgtagaaatctgtgtacatttcctctatatacagcttcgttaatcgattggatgcatatgtccttcaaatgcgatcttgtcattgtgttgattcaggtattcacttaacgttttgccttctgaactgtttttcttcgaaatgtatgacatactctggtcaattcacagcaatttctgtttgaatgtggaacggtggacagcccaacttggtcttggaacataatggccgtgacttcttttgtccatgaacgttagttttacgatttgtctagtttcaaactgttatgcagaaatccgtgcattatatcctctttacacagctttgaacatctcctctaagctagttggaatttgtgagcattttggacaaagccagatttttccattccatcggcacgctttcagaagccaattaccagtaaatcttgagatttgtcaagggaaaaaattctaagcaaactggatccgaaaagcagaggtacttcttcttgtcatctttttcaacttcagtgtttctctcgagaccatgtatgtttcgttagtatcttcaatagtttgtccagccaagcgcagcacaattaaggttgaggattaggtaccaacttttgaggtaattgcaatatgaattaccatatcattatttcaaattagtggcaatgtggtgcatgctttgttcagtcttgacagaacttttgttttttggtttgaacttgctataagcataactacacttgggcagctcaactccaatcatgaaayggtttccttgtacaaggtatttccagcatgaattatcacttcaaattattggcaatgcagtggatactttgcgaagtcgacagcatttttgtttctagttcattcatgtgttagtgattccttggttacctttatcatttgagatgtcgtcaaactttggctaaatcttggaaggctttcaagatatcaggtaccaaaatcaagtatctttcaaattccaatttgagtagatgtggtgtatgctctgttcacttttcacagcagtttaatgtgccagatggaatttacattaattgaacttggtcaccttcaagctaaggcctggtgtgaacatcgtggtgtaagtcattacttgccaatctatggacagtcgacaagcactttctcattggtgaatgcaaattttttggacattgtacttgtcagttgtggcatttattccaaaattttttttgtatgtgcaaatgttggatattttgtcaggttccacagtcttctggtaatttttctgtcccactccgaaataaggtggattgtgtgcctgcagtatcgaaagcatcaaacatttcctcttgacagtcagctgtttgtagtctttctaatcacatttgttcagataaactctggtgaactggatacctaaaattcaggtattgaatcttattttttgccactattatggtttgatttcaacatgcatgtcaacactatgtgtgttccatcagtatcttgagcagtttctgctgtggcaagctccacttcaactcggatatacaaagtaagacatgattttagtctcatttgccatcacattcagtttgaaatggttgtctgggaaacagtgtatactttgtaagtctcggcagcgttgtgaacaactcctacaggatgaaggttagttttcagatatgtatttgtttctggtcgcaaaggttttgtatatttctttttcaagtgtagtgaagcattggatgaagg\n", ">Locus_180_Transcript_15/16_Confidence_0.327_Length_6143_transcripts_v2_6t---NA---\n", "gtgaagatgaggaatctaaggaatggcctgtttgcaaattgagttactggatttctctggcaagtagttttggatcaatttttcatactatttttcttgtgtgataagcaaatgattgcattccctgagacagttttttccaggacggtgaaatcagtgaaacaagagcctgtatgtcactagtgactctgagtaaggccttcatttgagcccagcattgcagtggcacttgtttaagttcttggagaagtgactgtaatcaactgatgacctctgcacagcagcaagaactagatgcaggcaaaagttttaaagtccctcagctttgctctgtagcatgctacctaaagttttcagatagtgagaattcatctgatggaagaagctaaatctttggattactatgagagtcagaaagcagtgtgtaacatgggtattactcataatttcaaagtatgtcaaggatgtcattaaggtagtgaacattttcttgagcttttgagtcttttttttctcttgtttattctgaattcggttttcagcagttttgtggaggaatctgtcggcaatgatcagtggtgattttctgccatagtggtggtgttaaatttcaattggcaacttcttgaggtggcattccagcttttagcaaggtgcaagttccactatcacttttagatgttaaatcagcaggcaaacccaaccaatttggacatttcaccattcacgtggctttcagaaaaaggcttgcagcatatcagtaggactggtctgtatgggtctggcctcaggtagctgaatttaactgagataatcctgcagaaaaaaagaggacagaagtacacgtacaatgactgttggtggaacttcctcagaagtggtaatgttaggcaaccaaattcattcttccattgttttaaagaaagaatcagaaacactttgcaccatttcaattgatatgagcagtcctaattatctggtaaggaaatatttcaaacaagtttatgcatggactcagttattttgggtatattttttggatctcttttcaagtgtctttcagaatgtatacagtctgtgtacagtagcagtttcttggaaaagcatttttctcaattttgaacaagacttcttagcatccagcagctttccctggtccttgttaaaaaccttaaatgtgcatcaagcatgaaggaaggccctactaacccaaaggtgtgaacttaatgggtattaaacgttctccgccttacaatgggtgcagtttgtgtggaaactttttgccaggttttcagccccagcgtgtggttcacaattcactctattggtggaattagtgcattaggacaagggcaattttcagagtaatcctagggtcaggaggcacattttcagggtcaaatgtaagtgcgtgctgtaattcttctcaagtgagttttgtcctattggctaccctgtcagttttttttgtgagttgtgtgtgcatggtttttaatgaatttgttaggaatatgtatgcagagttagcaacgatggttttttaggactgtattgtacccaattcaagtccaaaatggaattgtcagaccataaagtggtttttcagaatttggcaagcacctcggaggcaaattcagtggtacacaaaagactgtattgtactcaattcaagtccaaaatggaattgtcagaccaccaagaaatttttcagaatttggcaagcacctcagaggcaaattcagtggtacacaaaagaagtgaatataccacatcccggagaacttaaaccttcttcagaaatgctgagtaaagttaaccactctgaagtccttcatcattttggaccaaagcaacgcttttacctgtttgatattaatatttacaatagtgcagaccatctggtaagggaaaccttcaaatcaaccatatatgagtatgtatttcctgtggatttcttttttaaggagtcttttttcaggggataagcctagttatttcagcagcactttaccaaaccaactgaaattatggagcagctttgaaaattatccaagtccttttgtgccaagttagcatcactgtggtaatatccagtccagttaagtggttaaaagttaacctcctagatcctcgaaagagttttgccatacctgtccaagttaagggccatgtgctgtcactcaagtgtagctgaacatgctgtgtcgtagagaaccacatccctcttgtgtatttatgtgccagtgcattgtctataccagtttcaggactgcgcaatgtaccgtgtattttgttgcaacaaatttatcagaacttgtcactctggggtaaaacaaccagcatattgggtgcattttcgacctattcctaaagagcaaattcatctacacacatcttgtattccatgtgttgtgccacctgccttttagatgtcaagtgtgtattgcttcatggaaagtgtctgtctgttcaacaaaaaacagcccactcatcaacaaacttttaagatgaacatttcaaaaaatctggttatgttactccttcaaagtctgtgattgtcttgctcccatgggcagtcccaagtcctagtcactgattctctaagtaggctgatcagaaatttaccagtacattcatggaggtgcaattcacatctgaatccttacttggtggttaaagattcttgtagaaggttaaattaaaggttggttttatgttttggttaatttgtagtatttgtaattagacaattcatatttctcaattaagttatccagggatgaggtaatataaagaccatactgtctctcaagaattgttcctatgtacttgattaagtgacttaattaataagatatagaattaagtttaacctagaagtatcatcaaaaccagtacatactttgtcagaccttacagatattgagtatcaccaagtaatggaagttttttggcttcggcaagtatttgttttggaatgttgccttttttgttgttaatatgaaaagtatgctttgtttgtctgtaattgcgtgtttttcatgtcatgagaatcagtggaagggtttactgtgaaataccatctactaagatttatttctcttagaaatgcatcattaactattgttaacatattagctccatacctatttatgcttgtgtataaagtcaaaactgtgaagagctttgtctgtgattggggggatatttttgttctcttttgattgttaccaggcaaatttgcattttttggatggaatgtcaagaatatgaaaaaggcaagcttaactttctgatctttagtttcattatagattactctttttttcattttgtgatctaacatgcattttgatagtggtagtttgtaattaacctattccacacgtttttttttttaaatttatttatttactgtaactgaaaatcttagttaggttggaatacttcattatgtaatctaggatttattatgggcaaacagtgcatatttttgtcaacctctaaagcaactaatgtgatagatgaagctcagtttcactcatttggtctgtgagcaaaagcagctgggaatactttttacaggataacaatcaaggcttaatcaaaaatttctgctctgaactgcaagttgtcatcatgcagcaatcatttggttgatgcaggtattgattttaactgttgccttttgaattgttaggtctttgaaatattttgcatactctgattaattcacagcgcttttttctggatcaacaattgttgccaggtcatctttgcccaaaatggttatggccttgacttgctgtttgctgtcggtcagttcagtagtttgtcttgttcaaaatttttatgcagaaatctgtgcatatatcctctctacacagtgttgtaatcttgttggaaatgaagggctttcaaatgcatcctcttcatccagttgatacaggtatcgatgtgaccatttggcatctgctttaattgtttctctgagatgcattacatactttgatcaattcatagtgatttctgttggaatatactagaattcaacttaattttgaagttttcttggcatcgactttcaaggctgagttggtcagtgattgaagttcaaaattgtcgcccagaatcatttcatttcatctcttcaagcagatttgtattttcactgggtgttgtcacccttccaagtgctgcctcgtcatttagtttacacaggtattcgcttgaccttttgccacctggagcattttttctttgaaatatattgcatactctgatctttttacagcattttttattgaatgtgctactgttgacagctcaacttggcctaagattttatggccttcactttccggcatgcataggccagttccatgatgtggcagttgaaatcgatgtgtagaaatctgtgtacatttcctctatatacagcttcgttaatcgattggatgcatatgtccttcaaatgcgatcttgtcattgtgttgattcaggtattcacttaacgttttgccttctgaactgtttttcttcgaaatgtatgacatactctggtcaattcacagcaatttctgtttgaatgtggaacggtggacagcccaacttggtcttggaacataatggccgtgacttcttttgtccatgaacgttagttttacgatttgtctagtttcaaactgttatgcagaaatccgtgcattatatcctctttacacagctttgaacatctcctctaagctagttggaatttgtgagcattttggacaaagccagatttttccattccatcggcacgctttcagaagccaattaccagtaaatcttgagatttgtcaagggaaaaaattctaagcaaactggatccgaaaagcagaggtacttcttcttgtcatctttttcaacttcagtgtttctctcgagaccatgtatgtttcgttagtatcttcaatagtttgtccagccaagcgcagcacaattaaggttgaggattaggtaccaacttttgaggtaattgcaatatgaattaccatatcattatttcaaattagtggcaatgtggtgcatgctttgttcagtcttgacagaacttttgttttttggtttgaacttgctataagcataactacacttgggcagctcaactccaatcatgaaacggtttccttgtacaaggtatttccagcatgaattatcacttcaaattattggcaatgcagtggatactttgcgaagtcgacagcatttttgtttctagttcattcatgtgttagtgattccttggttacctttatcatttgagatgtcgtcaaactttggctaaatcttggaaggctttcaagatatcaggtaccaaaatcaagtatctttcaaattccaatttgagtagatgtggtgtatgctctgttcacttttcacagcagtttaatgtgccagatggaatttacattaattgaacttggtcaccttcaagctaaggcctggtgtgaacatcgtggcgtaagtcattacttgccaatctatggacagtcgacaagctctttctcgttggtgaatgcaaattttttggacattgtacttgtcagttgtggcattcattccaaaattttttttgtatgtgaaaatgttggatattttgtcaggttccacagtcttctggtaatttttctgtcccactccgaaataaggtggattgtgtgcctgcagtatcgaaagcatcaaacatttcctcttgacagtcagctgtttgtagtctttctaatcacatttgttcagataaactctggtgaactggatacctaaaattcaggtattgaatcttattttttgccactattatggtttgatttcaacatgcatgtcaacactatgtgtgttccatcagtatcttgagcagtttctgctgtggcaagctccacttcaactcggatatacaaagtaagacatgattttagtctcatttgccatcacattcagtttgaaatggttgtctgggaaacagtgtatactttgtaagtctcggcagcgttgtgaacaactcctacaggatgaaggttagttttcagatatgtatttgtttctggtcgcaaaggttttgtatatttctttttcaagtgtagtgaagcattggatgaagg\n", ">Locus_9682_Transcript_1/1_Confidence_1.000_Length_116_transcripts_v2_72920t---NA---\n", "ataaaaataccaggaattgaggaagcaagcagcggcatgccggagcagacaggcaaaaaggaaaaccagaatacgaaaagaaaggaagacgggaatctgcaagacgctttggtgga\n", ">Locus_9570_Transcript_1/1_Confidence_1.000_Length_112_transcripts_v2_72921t---NA---\n", "gctagtttcaggtgtgcattcattgaataaatgtatttgtatttagtacgagtgtataataaagcagtaaatacaaatacatttattcaatgaatgcacacctgaaactagc\n", ">Locus_9787_Transcript_1/1_Confidence_0.714_Length_111_transcripts_v2_72922t---NA---\n", "tgcgtagctcggtggatgtatagagaatgggaattcagtttcagattaggtatgagaccatggatatttgtagnnnnnnnnnnncagcactctcagcacctgttgtagcag\n", ">transcripts_v2_72923t---NA---\n", "ggacgatgaggannnnnnnnnnnnnnctgatgacagtaacgatgatgatcttgatgatgatagcgttgacgagaacgacgaggatgaagactatgaagtga\n", ">Locus_9072_Transcript_1/1_Confidence_0.667_Length_101_transcripts_v2_72924t---NA---\n", "ttcttgaagatttttttaagacaatcgtgttcagttgtaataatttttacataagtaatctaaatattattttttnnnnnnnnnnnnnnnnnagtcaaggg\n" ] } ], "source": [ "#Removing pipes from fasta and replacing with tab, then printing first line w/out comments and looking at contig names\n", "!sed 's/|/\\t/g' blast2go_fasta_Pdamv2.fasta | awk '{print $1}' > Pdam.fasta\n", "!head -10 Pdam.fasta\n", "!tail -10 Pdam.fasta" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "Converted 72890 FASTA records in 145780 lines to tabular format\r\n", "Total sequence length: 28141387\r\n", "\r\n" ] } ], "source": [ "#Converting FASTA to tabular format and placing output file in analyses directory\n", "!perl -e '$count=0; $len=0; while(<>) {s/\\r?\\n//; s/\\t/ /g; if (s/^>//) { if ($. != 1) {print \"\\n\"} s/ |$/\\t/; $count++; $_ .= \"\\t\";} else {s/ //g; $len += length($_)} print $_;} print \"\\n\"; warn \"\\nConverted $count FASTA records in $. lines to tabular format\\nTotal sequence length: $len\\n\\n\";' \\\n", "Pdam.fasta > ../../analyses/Pdam/fasta2tab" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/jd/Documents/Projects/Coral-CpG-ratio-MS/analyses/Pdam\n" ] } ], "source": [ "cd ../../analyses/Pdam" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Locus_1685_Transcript_1/2_Confidence_1.000_Length_7457_transcripts_v2_1tspectrin\t\ttatacgattttatgccgtggaggtgttttcttgcagaagtttcaaattatgtcctaattgtagtgtagaacggactattggacataatttgaaacttctgcaagaaaacacctccacggcataaaatcgtatatggcgatacatgaaccactgcttcaggaacactgtcttccttttgatgtttaaaccatgcactggcccacatttttgtttgctaatataacagaacttttccgctatccctaaccaagtaacaactcatcactctaaacataataacatgcactgaagaaacattacattagtaattctcttgaacactgtgatcagttaatctgttgatgatagcttcagtaatacctagcgagctgctggggtattgtcatcttccagttacattaaaccgtgaggtgtttcgtttcttcagaaataaggaacataccacaattactgcaaatctatgccggactgactaatttagctagaagagaagagttcctcgcagaaagtcttgtagtcgtaagctcccggaacttctcggcccttgtcgtcaacgtaaggattcatacgatcaatacagaagtcggcctgctccttggtgagagactgatagagctcagcctcagtaacatagagacgttttccgccctcagtgagcgccttgaaggcgttgatgacttcttgactagagccgacattttctgtttcacgactgatcatgaaggccatgtactctcccatcgacaccacaccgtcaccgttgggatccaccgttctgaggatgctttggaactccggatcttcttcgccctcttctacaatggagagatcgtagccaagagaacggaggcatgacttgaattcttgatgatccaggtaaccagtcttatccttgtcaaagtgcttgaacatgatagtgaattctttgagtgtatcctcagaaacgccagtggtattgcgggcctggatttgctgttcaagattgtgtttcatgcgcatggcaagttgatccagctgatcccactgctgagcaaggtccactgtgctatgttcggtatacttgttgtccaggataagagcctcttccatttgagccccaagatcctccaggatcctaaggtcctctttcctgtctgctatctcagcactcttcttcttgacctcggccagttgatcttcaaggtccccttgaccgtcaaccattgcaacccttgtatcagagagccaagcgtggaacgagttggcagcttgtgcaaattcctgcctgaggttgtcattatactcctgacgctgggcttctttgcgcaaatcatcctcgcgttcctcaataatcttctgcagattttcccacgtgtcttccaatgcttccatagtaaaccaagtataagggttgatggagacgttatagctcttgatctgacgatcaagcttcctcaacatcataatgtcgtcttccgcttggtccagagaagcgcggaactgggtgtggccatcttgcaaagcacgtatctcctcaacagagttgcagcgcactggatcggtcagatcttcctccgcattctcgaaccaactgttaaaagcagaggctttcttggcaaacaacaggaacaagtcctccaccttcttgtattgatcctgggcatgttgaagtcgttcttttcgagtcttagagtcttccagcagctgctcccacctcctaataaggtcatcatgacgtttgatgatggcaggcgactgctcgtgttgtgattggacaagctcgtctttcagcgcagtcacacgtgcaatgccttcgttttcaaatgcttggagacctgagtcaaaggtttcctgtttggtgaaaagtgtttgcaccgaagacaaatcgcgccccagatcatctgaccgagcaattccttccttgtcaccaatccaggattccactacatcggccttccaattgaactgcaggaaagctgagttgtcattgagcttggacttgcgataggacgccattctctccaactcggtaagtttactcttaatggatgcaatcctgtggtcaatcaactcagattggtggttacccttttcaatgagcttgttgccagcatcttcaatgtcctgaactctttcccgatgaacctcaagatctgtctcgaatgcttcgtgcttctttaaaagaccttggactgcagccagtgtatcaccatagtcgtcacttccaacaagagtgttcttctcattgatccacgattcctcttcaccaacattagcgctgaactgttgatattccagagactcatctagcttgtgctgcctctgctcagccaggtttttcagttcgtcccagttttcctgcagctgatcacagcgagctttgatttcatcagagctactgtgtccttcgtcaataaatttacggccacacagtagtacagcctgaattctggcctcatgggtgttcaattcagcttccaaacgctgatgtttcttacgtaggttttgtacaccagtcaagtcctttccataatcttcagagctggtcaatagctttttctcctttatccaggactcttcgtcgtccacatctcggtaaaactggtgcaaagcgttggactcgtccaacttcttatggcgatctgcggccatgtcttttactttgtcgtagcgttctgcaataatgcgggatttgtcttgcagtgagtcagcatcaaaatggccaacctcagcgaagtgctgggtttgggcttgaagatcagtgatacgatcctcgtgagcagcaatatctgcttcaacaagttggtgcttcttgatcagattctgaacactggccagatctcgaccatgatcatcgtgagacaaggaagcttcaacctctcccagccagaaatccagttccttaacatttgtattaaattgctgttgttgattagattccttcagatgttgactcttctcattggacttctgtactaggtattcccactgctgctgaagtttggtgattctctctttcacaatctcctcacttccggcacacttgcgctgctcaatgagaccttcagctaggttgatggtctccaagactctctcctgattggctgagacttcagcttcaaacgcctgatgcttttggaacttggactgtaagttggttggatccttgtaagattcatccagcactgtctgtaacttttcactgacccatgcttcaatgtcctcagcatcacgactgaactgctgaatggttttggactcacccagttttgatcggcgttcaaccaaggctgccttcagagtagcccatctcgctaagactgcagaaatcctctctgcaatagctggagagtcataatgattgttatcaatcagccgatcagcattgtctttcagagcattgatcttcacgtcctgcactgcaagtgtctttgtgaagtcttcatgtttcttgatcagagcttcagcaccctccgctgcttctccgacatcttcgctctgaatgatggcctcacgtgtcgccatccactgctccagttgttcggcatctcgattgaataactgcaactccaaacattcatcgagtctttgcttgcgagaagcccaggccttttccagttcttctcgttctgtagccatggtctccagtttttcacggatgtcgggactagcataatgttctttgtccaataatttcttgccgaaatcttcaaacgactggaactcgctgtcacgcgcatcaatttctgcgcgatgttcctgatgcctgtccaacagagcttctgcactggccacatcctttgcaagttcatcagaagttaccaatgccataataccattgatccatgaagtaagatctctgaattcgttgaggaaatggaagtaatctgacgagtcactcagtttgccccttcttgcagccgcctgttctctgagattagcccacgcttcttctagctcgtgtctcttccattcaatgtcttgtgctgcattcggatgactggatgttaacttagcagcttccacgttgagttcacgaaccttctcttccaatgcggccaagtctctctccaaaacttcatgctttctctgcaaggcctggacactagccaaatcccgaccataatctgaggtggaaagagcagtgtctttctctgaaatccaagccttggtgtcatcagcatccctgtggaatttctgaatttcctgagcattatccaaattttccttcctctgctctgacatggtcttaagattggtccatttctggttcagcctctcaatcatctctcggatcatctcatattctctgtagtgttcaatctttagtttctcagccagtaatgtcagttctctgatacgaacctctttagaaatcatatccttttcaaagtcatcaaacttcttctgctgagcttccacatgttcgtagtcctttccgatttcctctgaggtgacaattgcttccttgtcattgatccaagactccagttcgtgagcatcccgcaacacactgtatctttgaatggaatcttccaatttcttcttacgttcttcgcctttgtccatcaggtcttggtatttatcatccaaagcctgctgtctgttagcaacagtgtccacttcaggagcctgcgacagaaggtcctgtgaggctttgtgggagtcaatcctcttgacatatgccgcaggaacaaaaccttgtctgtcattagtttctactttccaccaatccttgttgctagaattgagtagggtcagaatgtcacccttctgcatggacacctctcttgcagttttctcctgatagtcataaagagcaactacacattccttgtcagagatatctgtgacatgagccgctggcttgcagtgttgactttgttctctcagcccatctacaacagttccatatgctctcaagtctgacatgatggcatcatgtttggtaagcaatgcctgtgcactgtcttcatcttttccatagtcatcactggtaacaattggttctttctccttcaaccaggattctgcttcagcaacatcagctagatactgatgagcttgaagagagtcatccagatgtccttttcgcacatgtgccttatccttgaattccagccatttctggtcaaggtcatcaatcttctctttgatctcatcagcggcaaagtgcccattatctatcatttgaacaccattatcgcaaacagctctgacacgtggttcatgaccagcaatttcagtcattaaagcctggtgtttctttgacagattctgggctcctgtcaaatctcggccagtgtttgtggatgaagcaacaggttccttctcccttatccatgcctcctcatcttcaacatcatgaaggaatcgtttaagtctctcagcatcctggagctttgctttgcgagctaggagaggagcctgtagctgttggtatcggctatttaaaacctccttcttttctttgatggaaggagcatcaaagtggtcagcctccgcaaataaattagcttgtgcattaacaacctcaatcttttctgcacgcgctatgacatcagcctctatcatggcatgtttcttttggaggttttgcacacttgtcagatcctttcctacatcctcaagagcaagcaagttttcaacttcagtgaaccacaactcaacatcttcggcacctcggttgaactgctgttgctgtgctgcttctttcaatttcaaccctttatcatttgatctctcaaacagataggcccacaacttatgcagttcatcaagtctctctctgatttgatctgaggcatagtgttcatctccaatcagttgttcaccagtgttgtcaactgcatcaagccggctttgattggcattcaactcagcctcaaatgcttggtgcttctggatctttccttgcagattggttggatccttgtaagattcatcactggcaatcttcagcttttctgtaatccagctttttacttcatcacagtctctctcaaactgctgcagtttacgagattcctcaagtttcaacctgcgcgccttggacaactccccaatgttattacgcctttccattataccatctcttctctcacgcacctcatctgaggcatagtgattgctgtcaacaaggcggtttgcatactcgtcaatactattgatcttctctgcttgggcagcaaaagatttgtcaaagtcttcatgcttcctgatcagagcttctactccatcgagagaatcaccaaggttatcatcagcaaggaaggcctcttgtttagacatagtggcatctgcatgctcacaatcccgattaaaaagctgcagctccatacactgctcaaactgcacacgtctcctctcccacagctccaacagttccattttctcagtctccagactggcaagcttttctttgatctcatcagtagcataatgattggaattgacaagttcttctccatcatcggcaaacttcttaaatccatcctctgatgcatcaatataacccttatgttcctgatgacgttccagaagactttctgcactagctacatcttttgccagttcatcactttggatcaagactttcatttcatttataaaagaaatgtggtccctgtagtcactgatgaacctttgcagacgataagaatcctccaaccttgcttttcgcaccccagacttctccttaagatttccccaggcagttacaatttcatcttgcttagcagctatctcatctgcactttctggatatgcctcttgcagttgtgcagattctgaacccagggcagtgaccttatcttccactgcagccaaatctctttctagagcttcatgtttgcgtaacagagcattgacactggccagatcttttccataatcatctgatgacagaactttgtctttctcattgatccaattcttggtctcatcagcatcacgatagaagctgtgtatttgctgggcaccagccagtctcttctgacgtttgagtgccagcatcttcagcctttcccaagcttcattaacttccgcttgctttgtactgatcagttctatatctggatgaccctcatctccaagttgatgtgcaagctcattgatgtatgtgacccttgattcatttgcctgaatatccttcaagaagtcctcaaatttcttctgaagaacttccacatgttccaaatctcttcctacttcttcagaagtggcaattgcttctttttctaaaatccatgacataacttcctctgtttcatgcaagaagtggactcttttctgagtaaagagaagcatgcggcctttctctgctgatttggaaagcagcaattcccataacttgatgagtgagtcaagacgttccctgataagttcagaggcatagtgggactcactgatcatgccttcaccattttcttggagttcaataatggcattgctatgagctgaaatctctgcttcaaacg\r\n", "Locus_1685_Transcript_2/2_Confidence_1.000_Length_7457_transcripts_v2_2tspectrin\t\ttatacgattttatgccgtggaggtgttttcttgcagaagtttcaaattatgtcctaattgtagtgtagaacggactattggacataatttgaaacttctgcaagaaaacacctccacggcataaaatcgtatatggcgatacatgaaccactgcttcaggaacactgtcttccttttgatgtttaaaccatgcactggcccacatttttgtttgctaatataacagaacttttccgctatccctaaccaagtaacaactcatcactctaaacataataacatgcactgaagaaacattacattagtaattctcttgaacactgtgatcagttaatctgttgatgatagcttcagtaatacctagcgagctgctggggtattgtcatcttccagttacattaaaccgtgaggtgtttcgtttcttcagaaataaggaacataccacaattactgcaaatctatgccggactgactaatttagctagaagagaagagttcctcgcagaaagtcttgtagtcgtaagctcccggaacttctcggcccttgtcgtcaacgtaaggattcatacgatcaatacagaagtcggcctgctccttagtgagagactgatagagctcagcctcagtaacatagagacgttttccaccctcagtgagcgccttgaaggcgttgatgacttcctggctagagccaacattttctgtttcacgactgatcatgaaggccatgtactctcccatcgacaccacaccgtcaccgttgggatccaccgttctgaggatgctttggaactccggatcttcttcgccctcttctacaatggagagatcgtagccaagagaacggaggcatgacttgaattcttgatgatccaggtaaccagtcttatccttgtcaaagtgcttgaacatgatagtgaattctttgagtgtatcctcagaaacgccagtggtattgcgggcctggatttgctgttcaagattgtgtttcatgcgcatggcaagttgatccagctgatcccactgctgagcaaggtccactgtgctatgttcggtatacttgttgtccaggataagagcctcttccatttgagccccaagatcctccaggatcctaaggtcctctttcctgtctgctatctcagcactcttcttcttgacctcggccagttgatcttcaaggtccccttgaccgtcaaccattgcaacccttgtatcagagagccaagcgtggaacgagttggcagcttgtgcaaattcctgcctgaggttgtcattatactcctgacgctgggcttctttgcgcaaatcatcctcgcgttcctcaataatcttctgcagattttcccacgtgtcttccaatgcttccatagtaaaccaagtataagggttgatggagacgttatagctcttgatctgacgatcaagcttcctcaacatcataatgtcgtcttccgcttggtccagagaagcgcggaactgggtgtggccatcttgcaaagcacgtatctcctcaacagagttgcagcgcactggatcggtcagatcttcctccgcattctcgaaccaactgttaaaagcagaggctttcttggcaaacaacaggaacaagtcctccaccttcttgtattgatcctgggcatgttgaagtcgttcttttcgagtcttagagtcttccagcagctgctcccacctcctaataaggtcatcatgacgtttgatgatggcaggcgactgctcgtgttgtgattggacaagctcgtctttcagcgcagtcacacgtgcaatgccttcgttttcaaatgcttggagacctgagtcaaaggtttcctgtttggtgaaaagtgtttgcaccgaagacaaatcgcgccccagatcatctgaccgagcaattccttccttgtcaccaatccaggattccactacatcggccttccaattgaactgcaggaaagctgagttgtcattgagcttggacttgcgataggacgccattctctccaactcggtaagtttactcttaatggatgcaatcctgtggtcaatcaactcagattggtggttacccttttcaatgagcttgttgccagcatcttcaatgtcctgaactctttcccgatgaacctcaagatctgtctcgaatgcttcgtgcttctttaaaagaccttggactgcagccagtgtatcaccatagtcgtcacttccaacaagagtgttcttctcattgatccacgattcctcttcaccaacattagcgctgaactgttgatattccagagactcatctagcttgtgctgcctctgctcagccaggtttttcagttcgtcccagttttcctgcagctgatcacagcgagctttgatttcatcagagctactgtgtccttcgtcaataaatttacggccacacagtagtacagcctgaattctggcctcatgggtgttcaattcagcttccaaacgctgatgtttcttacgtaggttttgtacaccagtcaagtcctttccataatcttcagagctggtcaatagctttttctcctttatccaggactcttcgtcgtccacatctcggtaaaactggtgcaaagcgttggactcgtccaacttcttatggcgatctgcggccatgtcttttactttgtcgtagcgttctgcaataatgcgggatttgtcttgcagtgagtcagcatcaaaatggccaacctcagcgaagtgctgggtttgggcttgaagatcagtgatacgatcctcgtgagcagcaatatctgcttcaacaagttggtgcttcttgatcagattctgaacactggccagatctcgaccatgatcatcgtgagacaaggaagcttcaacctctcccagccagaaatccagttccttaacatttgtattaaattgctgttgttgattagattccttcagatgttgactcttctcattggacttctgtactaggtattcccactgctgctgaagtttggtgattctctctttcacaatctcctcacttccggcacacttgcgctgctcaatgagaccttcagctaggttgatggtctccaagactctctcctgattggctgagacttcagcttcaaacgcctgatgcttttggaacttggactgtaagttggttggatccttgtaagattcatccagcactgtctgtaacttttcactgacccatgcttcaatgtcctcagcatcacgactgaactgctgaatggttttggactcacccagttttgatcggcgttcaaccaaggctgccttcagagtagcccatctcgctaagactgcagaaatcctctctgcaatagctggagagtcataatgattgttatcaatcagccgatcagcattgtctttcagagcattgatcttcacgtcctgcactgcaagtgtctttgtgaagtcttcatgtttcttgatcagagcttcagcaccctccgctgcttctccgacatcttcgctctgaatgatggcctcacgtgtcgccatccactgctccagttgttcggcatctcgattgaataactgcaactccaaacattcatcgagtctttgcttgcgagaagcccaggccttttccagttcttctcgttctgtagccatggtctccagtttttcacggatgtcgggactagcataatgttctttgtccaataatttcttgccgaaatcttcaaacgactggaactcgctgtcacgcgcatcaatttctgcgcgatgttcctgatgcctgtccaacagagcttctgcactggccacatcctttgcaagttcatcagaagttaccaatgccataataccattgatccatgaagtaagatctctgaattcgttgaggaaatggaagtaatctgacgagtcactcagtttgccccttcttgcagccgcctgttctctgagattagcccacgcttcttctagctcgtgtctcttccattcaatgtcttgtgctgcattcggatgactggatgttaacttagcagcttccacgttgagttcacgaaccttctcttccaatgcggccaagtctctctccaaaacttcatgctttctctgcaaggcctggacactagccaaatcccgaccataatctgaggtggaaagagcagtgtctttctctgaaatccaagccttggtgtcatcagcatccctgtggaatttctgaatttcctgagcattatccaaattttccttcctctgctctgacatggtcttaagattggtccatttctggttcagcctctcaatcatctctcggatcatctcatattctctgtagtgttcaatctttagtttctcagccagtaatgtcagttctctgatacgaacctctttagaaatcatatccttttcaaagtcatcaaacttcttctgctgagcttccacatgttcgtagtcctttccgatttcctctgaggtgacaattgcttccttgtcattgatccaagactccagttcgtgagcatcccgcaacacactgtatctttgaatggaatcttccaatttcttcttacgttcttcgcctttgtccatcaggtcttggtatttatcatccaaagcctgctgtctgttagcaacagtgtccacttcaggagcctgcgacagaaggtcctgtgaggctttgtgggagtcaatcctcttgacatatgccgcaggaacaaaaccttgtctgtcattagtttctactttccaccaatccttgttgctagaattgagtagggtcagaatgtcacccttctgcatggacacctctcttgcagttttctcctgatagtcataaagagcaactacacattccttgtcagagatatctgtgacatgagccgctggcttgcagtgttgactttgttctctcagcccatctacaacagttccatatgctctcaagtctgacatgatggcatcatgtttggtaagcaatgcctgtgcactgtcttcatcttttccatagtcatcactggtaacaattggttctttctccttcaaccaggattctgcttcagcaacatcagctagatactgatgagcttgaagagagtcatccagatgtccttttcgcacatgtgccttatccttgaattccagccatttctggtcaaggtcatcaatcttctctttgatctcatcagcggcaaagtgcccattatctatcatttgaacaccattatcgcaaacagctctgacacgtggttcatgaccagcaatttcagtcattaaagcctggtgtttctttgacagattctgggctcctgtcaaatctcggccagtgtttgtggatgaagcaacaggttccttctcccttatccatgcctcctcatcttcaacatcatgaaggaatcgtttaagtctctcagcatcctggagctttgctttgcgagctaggagaggagcctgtagctgttggtatcggctatttaaaacctccttcttttctttgatggaaggagcatcaaagtggtcagcctccgcaaataaattagcttgtgcattaacaacctcaatcttttctgcacgcgctatgacatcagcctctatcatggcatgtttcttttggaggttttgcacacttgtcagatcctttcctacatcctcaagagcaagcaagttttcaacttcagtgaaccacaactcaacatcttcggcacctcggttgaactgctgttgctgtgctgcttctttcaatttcaaccctttatcatttgatctctcaaacagataggcccacaacttatgcagttcatcaagtctctctctgatttgatctgaggcatagtgttcatctccaatcagttgttcaccagtgttgtcaactgcatcaagccggctttgattggcattcaactcagcctcaaatgcttggtgcttctggatctttccttgcagattggttggatccttgtaagattcatcactggcaatcttcagcttttctgtaatccagctttttacttcatcacagtctctctcaaactgctgcagtttacgagattcctcaagtttcaacctgcgcgccttggacaactccccaatgttattacgcctttccattataccatctcttctctcacgcacctcatctgaggcatagtgattgctgtcaacaaggcggtttgcatactcgtcaatactattgatcttctctgcttgggcagcaaaagatttgtcaaagtcttcatgcttcctgatcagagcttctactccatcgagagaatcaccaaggttatcatcagcaaggaaggcctcttgtttagacatagtggcatctgcatgctcacaatcccgattaaaaagctgcagctccatacactgctcaaactgcacacgtctcctctcccacagctccaacagttccattttctcagtctccagactggcaagcttttctttgatctcatcagtagcataatgattggaattgacaagttcttctccatcatcggcaaacttcttaaatccatcctctgatgcatcaatataacccttatgttcctgatgacgttccagaagactttctgcactagctacatcttttgccagttcatcactttggatcaagactttcatttcatttataaaagaaatgtggtccctgtagtcactgatgaacctttgcagacgataagaatcctccaaccttgcttttcgcaccccagacttctccttaagatttccccaggcagttacaatttcatcttgcttagcagctatctcatctgcactttctggatatgcctcttgcagttgtgcagattctgaacccagggcagtgaccttatcttccactgcagccaaatctctttctagagcttcatgtttgcgtaacagagcattgacactggccagatcttttccataatcatctgatgacagaactttgtctttctcattgatccaattcttggtctcatcagcatcacgatagaagctgtgtatttgctgggcaccagccagtctcttctgacgtttgagtgccagcatcttcagcctttcccaagcttcattaacttccgcttgctttgtactgatcagttctatatctggatgaccctcatctccaagttgatgtgcaagctcattgatgtatgtgacccttgattcatttgcctgaatatccttcaagaagtcctcaaatttcttctgaagaacttccacatgttccaaatctcttcctacttcttcagaagtggcaattgcttctttttctaaaatccatgacataacttcctctgtttcatgcaagaagtggactcttttctgagtaaagagaagcatgcggcctttctctgctgatttggaaagcagcaattcccataacttgatgagtgagtcaagacgttccctgataagttcagaggcatagtgggactcactgatcatgccttcaccattttcttggagttcaataatggcattgctatgagctgaaatctctgcttcaaacg\r\n" ] } ], "source": [ "#Checking header on new tabular format file\n", "!head -2 fasta2tab" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "Added column with length of column 2 for 72890 lines.\r\n", "\r\n" ] } ], "source": [ "#Add column with length of sequence\n", "!perl -e '$col = 2;' -e 'while (<>) { s/\\r?\\n//; @F = split /\\t/, $_; $len = length($F[$col]); print \"$_\\t$len\\n\" } warn \"\\nAdded column with length of column $col for $. lines.\\n\\n\";' \\\n", "fasta2tab > tab_1" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 72890 218670 34198048 tab_1\r\n" ] } ], "source": [ "!wc tab_1" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#File used to count Cs and Gs will only include the sequence\n", "!awk '{print $2}' tab_1 > tab_2" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#This counts CGs - both cases\n", "!echo \"CG\" | awk -F\\[Cc][Gg] '{print NF-1}' tab_2 > CG " ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Counts Cs\n", "!echo \"C\" | awk -F\\[Cc] '{print NF-1}' tab_2 > C " ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Counts Gs\n", "!echo \"G\" | awk -F\\[Gg] '{print NF-1}' tab_2 > G " ] }, { "cell_type": "code", "execution_count": 146, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Locus_1685_Transcript_1/2_Confidence_1.000_Length_7457_transcripts_v2_1tspectrin\t\ttatacgattttatgccgtggaggtgttttcttgcagaagtttcaaattatgtcctaattgtagtgtagaacggactattggacataatttgaaacttctgcaagaaaacacctccacggcataaaatcgtatatggcgatacatgaaccactgcttcaggaacactgtcttccttttgatgtttaaaccatgcactggcccacatttttgtttgctaatataacagaacttttccgctatccctaaccaagtaacaactcatcactctaaacataataacatgcactgaagaaacattacattagtaattctcttgaacactgtgatcagttaatctgttgatgatagcttcagtaatacctagcgagctgctggggtattgtcatcttccagttacattaaaccgtgaggtgtttcgtttcttcagaaataaggaacataccacaattactgcaaatctatgccggactgactaatttagctagaagagaagagttcctcgcagaaagtcttgtagtcgtaagctcccggaacttctcggcccttgtcgtcaacgtaaggattcatacgatcaatacagaagtcggcctgctccttggtgagagactgatagagctcagcctcagtaacatagagacgttttccgccctcagtgagcgccttgaaggcgttgatgacttcttgactagagccgacattttctgtttcacgactgatcatgaaggccatgtactctcccatcgacaccacaccgtcaccgttgggatccaccgttctgaggatgctttggaactccggatcttcttcgccctcttctacaatggagagatcgtagccaagagaacggaggcatgacttgaattcttgatgatccaggtaaccagtcttatccttgtcaaagtgcttgaacatgatagtgaattctttgagtgtatcctcagaaacgccagtggtattgcgggcctggatttgctgttcaagattgtgtttcatgcgcatggcaagttgatccagctgatcccactgctgagcaaggtccactgtgctatgttcggtatacttgttgtccaggataagagcctcttccatttgagccccaagatcctccaggatcctaaggtcctctttcctgtctgctatctcagcactcttcttcttgacctcggccagttgatcttcaaggtccccttgaccgtcaaccattgcaacccttgtatcagagagccaagcgtggaacgagttggcagcttgtgcaaattcctgcctgaggttgtcattatactcctgacgctgggcttctttgcgcaaatcatcctcgcgttcctcaataatcttctgcagattttcccacgtgtcttccaatgcttccatagtaaaccaagtataagggttgatggagacgttatagctcttgatctgacgatcaagcttcctcaacatcataatgtcgtcttccgcttggtccagagaagcgcggaactgggtgtggccatcttgcaaagcacgtatctcctcaacagagttgcagcgcactggatcggtcagatcttcctccgcattctcgaaccaactgttaaaagcagaggctttcttggcaaacaacaggaacaagtcctccaccttcttgtattgatcctgggcatgttgaagtcgttcttttcgagtcttagagtcttccagcagctgctcccacctcctaataaggtcatcatgacgtttgatgatggcaggcgactgctcgtgttgtgattggacaagctcgtctttcagcgcagtcacacgtgcaatgccttcgttttcaaatgcttggagacctgagtcaaaggtttcctgtttggtgaaaagtgtttgcaccgaagacaaatcgcgccccagatcatctgaccgagcaattccttccttgtcaccaatccaggattccactacatcggccttccaattgaactgcaggaaagctgagttgtcattgagcttggacttgcgataggacgccattctctccaactcggtaagtttactcttaatggatgcaatcctgtggtcaatcaactcagattggtggttacccttttcaatgagcttgttgccagcatcttcaatgtcctgaactctttcccgatgaacctcaagatctgtctcgaatgcttcgtgcttctttaaaagaccttggactgcagccagtgtatcaccatagtcgtcacttccaacaagagtgttcttctcattgatccacgattcctcttcaccaacattagcgctgaactgttgatattccagagactcatctagcttgtgctgcctctgctcagccaggtttttcagttcgtcccagttttcctgcagctgatcacagcgagctttgatttcatcagagctactgtgtccttcgtcaataaatttacggccacacagtagtacagcctgaattctggcctcatgggtgttcaattcagcttccaaacgctgatgtttcttacgtaggttttgtacaccagtcaagtcctttccataatcttcagagctggtcaatagctttttctcctttatccaggactcttcgtcgtccacatctcggtaaaactggtgcaaagcgttggactcgtccaacttcttatggcgatctgcggccatgtcttttactttgtcgtagcgttctgcaataatgcgggatttgtcttgcagtgagtcagcatcaaaatggccaacctcagcgaagtgctgggtttgggcttgaagatcagtgatacgatcctcgtgagcagcaatatctgcttcaacaagttggtgcttcttgatcagattctgaacactggccagatctcgaccatgatcatcgtgagacaaggaagcttcaacctctcccagccagaaatccagttccttaacatttgtattaaattgctgttgttgattagattccttcagatgttgactcttctcattggacttctgtactaggtattcccactgctgctgaagtttggtgattctctctttcacaatctcctcacttccggcacacttgcgctgctcaatgagaccttcagctaggttgatggtctccaagactctctcctgattggctgagacttcagcttcaaacgcctgatgcttttggaacttggactgtaagttggttggatccttgtaagattcatccagcactgtctgtaacttttcactgacccatgcttcaatgtcctcagcatcacgactgaactgctgaatggttttggactcacccagttttgatcggcgttcaaccaaggctgccttcagagtagcccatctcgctaagactgcagaaatcctctctgcaatagctggagagtcataatgattgttatcaatcagccgatcagcattgtctttcagagcattgatcttcacgtcctgcactgcaagtgtctttgtgaagtcttcatgtttcttgatcagagcttcagcaccctccgctgcttctccgacatcttcgctctgaatgatggcctcacgtgtcgccatccactgctccagttgttcggcatctcgattgaataactgcaactccaaacattcatcgagtctttgcttgcgagaagcccaggccttttccagttcttctcgttctgtagccatggtctccagtttttcacggatgtcgggactagcataatgttctttgtccaataatttcttgccgaaatcttcaaacgactggaactcgctgtcacgcgcatcaatttctgcgcgatgttcctgatgcctgtccaacagagcttctgcactggccacatcctttgcaagttcatcagaagttaccaatgccataataccattgatccatgaagtaagatctctgaattcgttgaggaaatggaagtaatctgacgagtcactcagtttgccccttcttgcagccgcctgttctctgagattagcccacgcttcttctagctcgtgtctcttccattcaatgtcttgtgctgcattcggatgactggatgttaacttagcagcttccacgttgagttcacgaaccttctcttccaatgcggccaagtctctctccaaaacttcatgctttctctgcaaggcctggacactagccaaatcccgaccataatctgaggtggaaagagcagtgtctttctctgaaatccaagccttggtgtcatcagcatccctgtggaatttctgaatttcctgagcattatccaaattttccttcctctgctctgacatggtcttaagattggtccatttctggttcagcctctcaatcatctctcggatcatctcatattctctgtagtgttcaatctttagtttctcagccagtaatgtcagttctctgatacgaacctctttagaaatcatatccttttcaaagtcatcaaacttcttctgctgagcttccacatgttcgtagtcctttccgatttcctctgaggtgacaattgcttccttgtcattgatccaagactccagttcgtgagcatcccgcaacacactgtatctttgaatggaatcttccaatttcttcttacgttcttcgcctttgtccatcaggtcttggtatttatcatccaaagcctgctgtctgttagcaacagtgtccacttcaggagcctgcgacagaaggtcctgtgaggctttgtgggagtcaatcctcttgacatatgccgcaggaacaaaaccttgtctgtcattagtttctactttccaccaatccttgttgctagaattgagtagggtcagaatgtcacccttctgcatggacacctctcttgcagttttctcctgatagtcataaagagcaactacacattccttgtcagagatatctgtgacatgagccgctggcttgcagtgttgactttgttctctcagcccatctacaacagttccatatgctctcaagtctgacatgatggcatcatgtttggtaagcaatgcctgtgcactgtcttcatcttttccatagtcatcactggtaacaattggttctttctccttcaaccaggattctgcttcagcaacatcagctagatactgatgagcttgaagagagtcatccagatgtccttttcgcacatgtgccttatccttgaattccagccatttctggtcaaggtcatcaatcttctctttgatctcatcagcggcaaagtgcccattatctatcatttgaacaccattatcgcaaacagctctgacacgtggttcatgaccagcaatttcagtcattaaagcctggtgtttctttgacagattctgggctcctgtcaaatctcggccagtgtttgtggatgaagcaacaggttccttctcccttatccatgcctcctcatcttcaacatcatgaaggaatcgtttaagtctctcagcatcctggagctttgctttgcgagctaggagaggagcctgtagctgttggtatcggctatttaaaacctccttcttttctttgatggaaggagcatcaaagtggtcagcctccgcaaataaattagcttgtgcattaacaacctcaatcttttctgcacgcgctatgacatcagcctctatcatggcatgtttcttttggaggttttgcacacttgtcagatcctttcctacatcctcaagagcaagcaagttttcaacttcagtgaaccacaactcaacatcttcggcacctcggttgaactgctgttgctgtgctgcttctttcaatttcaaccctttatcatttgatctctcaaacagataggcccacaacttatgcagttcatcaagtctctctctgatttgatctgaggcatagtgttcatctccaatcagttgttcaccagtgttgtcaactgcatcaagccggctttgattggcattcaactcagcctcaaatgcttggtgcttctggatctttccttgcagattggttggatccttgtaagattcatcactggcaatcttcagcttttctgtaatccagctttttacttcatcacagtctctctcaaactgctgcagtttacgagattcctcaagtttcaacctgcgcgccttggacaactccccaatgttattacgcctttccattataccatctcttctctcacgcacctcatctgaggcatagtgattgctgtcaacaaggcggtttgcatactcgtcaatactattgatcttctctgcttgggcagcaaaagatttgtcaaagtcttcatgcttcctgatcagagcttctactccatcgagagaatcaccaaggttatcatcagcaaggaaggcctcttgtttagacatagtggcatctgcatgctcacaatcccgattaaaaagctgcagctccatacactgctcaaactgcacacgtctcctctcccacagctccaacagttccattttctcagtctccagactggcaagcttttctttgatctcatcagtagcataatgattggaattgacaagttcttctccatcatcggcaaacttcttaaatccatcctctgatgcatcaatataacccttatgttcctgatgacgttccagaagactttctgcactagctacatcttttgccagttcatcactttggatcaagactttcatttcatttataaaagaaatgtggtccctgtagtcactgatgaacctttgcagacgataagaatcctccaaccttgcttttcgcaccccagacttctccttaagatttccccaggcagttacaatttcatcttgcttagcagctatctcatctgcactttctggatatgcctcttgcagttgtgcagattctgaacccagggcagtgaccttatcttccactgcagccaaatctctttctagagcttcatgtttgcgtaacagagcattgacactggccagatcttttccataatcatctgatgacagaactttgtctttctcattgatccaattcttggtctcatcagcatcacgatagaagctgtgtatttgctgggcaccagccagtctcttctgacgtttgagtgccagcatcttcagcctttcccaagcttcattaacttccgcttgctttgtactgatcagttctatatctggatgaccctcatctccaagttgatgtgcaagctcattgatgtatgtgacccttgattcatttgcctgaatatccttcaagaagtcctcaaatttcttctgaagaacttccacatgttccaaatctcttcctacttcttcagaagtggcaattgcttctttttctaaaatccatgacataacttcctctgtttcatgcaagaagtggactcttttctgagtaaagagaagcatgcggcctttctctgctgatttggaaagcagcaattcccataacttgatgagtgagtcaagacgttccctgataagttcagaggcatagtgggactcactgatcatgccttcaccattttcttggagttcaataatggcattgctatgagctgaaatctctgcttcaaacg\t7457\t185\t1914\t1444\n", "Locus_9072_Transcript_1/1_Confidence_0.667_Length_101_transcripts_v2_72924t---NA---\t\tttcttgaagatttttttaagacaatcgtgttcagttgtaataatttttacataagtaatctaaatattattttttnnnnnnnnnnnnnnnnnagtcaaggg\t101\t1\t7\t12\n" ] } ], "source": [ "#Combining counts\n", "!paste tab_1 \\\n", "CG \\\n", "C \\\n", "G \\\n", "> comb\n", "!head -1 comb\n", "!tail -1 comb" ] }, { "cell_type": "code", "execution_count": 147, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Locus_9419_Transcript_1/1_Confidence_1.000_Length_142_transcripts_v2_72905t---NA---\t\tttggctcatccttcttgtctgtcttcttggccctctttcctctggtctttggctcctgatctgcctcctcgcccttctcctcttcctttggctcatccttcttgtctgtcttcttggccctctttcctctggtctttggctt\t142\t1\t51\t23\r\n", "Locus_9418_Transcript_2/3_Confidence_0.286_Length_141_transcripts_v2_72906t---NA---\t\tgaaagagggccaagaagacagacaagaaggatgagccaaaggaagaggagaaggtcgagaagaatgaggatgaagggaaggaagatgagaagccaaagaccagaggaaagagggccaagaagacagacaacaaggatgagc\t141\t1\t17\t52\r\n", "transcripts_v2_72907t---NA---\t\tcaccttaggaaatgattagagaatagaagggagaatatacatactgatgttaggatttaatgggtcactttaaccctttaaaccctaacatcagtatgtatattctccnnnnnnnnnnnncttctattctctaatca\t137\t0\t23\t19\r\n", "transcripts_v2_72908t---NA---\t\tttgttggatgggtagagaatgggaattcagtttcagattaggtatgagaccatggatagttgtagctttctcagcacctgttgtagcagcttttgcagtatttgttgtttatnnnnnnnnnnggacaagctagcttttt\t139\t0\t16\t35\r\n", "transcripts_v2_72909thypothetical\t\tgaactacgtcacgtgatgacaaacttgggagagaaacttacagatgaggaagttgatgagatgatccgagaagcagatactgacagtgaagaggagatcaaggaagcctttagagtgtttgacaaagatggaaacg\t136\t4\t18\t41\r\n", "Locus_9529_Transcript_1/1_Confidence_1.000_Length_136_transcripts_v2_72910tp700\t\tccaggattggacgaatagatcatttggttcatttatataccctatcggtccaggggatttatatgtacatcatgcaatagcacttggcttacatgtaactgtcctcatcctactaaagggaggtcttgaagctcgt\t136\t3\t28\t29\r\n", "transcripts_v2_72911t---NA---\t\tgttaagtgtagctgatggagattaatcctttgtaatgttgagggtgaccataaatgaaaaaaaaagtgtgaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaannnnnnnnnna\t131\t0\t5\t19\r\n", "Locus_990_Transcript_3/3_Confidence_0.600_Length_134_transcripts_v2_72912t---NA---\t\tgctatttctgatgacaacatataagaattaccacaccacatagtaattagacacaatatgagcacaacacaaggacaagagtagatcatactaaacatagaagatgtacctaattgagaagatgtaatgatgaa\t134\t0\t22\t22\r\n", "transcripts_v2_72913t---NA---\t\tttccgatctagcggttaacctttccttttcctttcgtacaccatcaatctcatgttacacggttaataaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaannnnnnnnnna\t127\t4\t19\t8\r\n", "Locus_9106_Transcript_3/7_Confidence_0.200_Length_128_transcripts_v2_72914t---NA---\t\tttccgatctagcggttaacctttccttttcctttcgtacaccatcaatctcatgttacacggttaataaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaannnnnnnnnnnn\t128\t4\t19\t8\r\n", "Locus_9106_Transcript_7/7_Confidence_0.300_Length_128_transcripts_v2_72915t---NA---\t\tttccgatctagcggttaacctttccttttcctttcgtacaccatcaatctcatgttacacggttaataaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaannnnnnnnnnct\t128\t4\t20\t8\r\n", "Locus_9936_Transcript_1/1_Confidence_1.000_Length_124_transcripts_v2_72916t---NA---\t\ttagtcgattagttactggaataaaaagtgcaagtctgttgagggtttgccatgtaatttgctgcaaacacggcaaaccctcaacagacttgcactttttattccagtaactaatcgactagtaa\t124\t3\t24\t24\r\n", "Locus_9701_Transcript_1/1_Confidence_1.000_Length_123_transcripts_v2_72917t---NA---\t\tggtgcttttgcagctgctcctgctggcgctggcgccagagcaggtgctgctggagccgcctctggcggtggtgcttttgcagctgctcctgctggcgctggcgccagagcaggtgctgctgga\t123\t6\t36\t48\r\n", "Locus_9530_Transcript_1/1_Confidence_1.000_Length_118_transcripts_v2_72918t---NA---\t\tagacaggcagacaaacagacaggcagacaaacagacaggcagacaaacagacagacagacaaacagacaggcagacaaacagacaggcagacaaacagacaggcagacaaacagacag\t118\t0\t29\t29\r\n", "Locus_9226_Transcript_1/1_Confidence_1.000_Length_116_transcripts_v2_72919t---NA---\t\taataaatcatcttctgaagggtttgttattgggtgcacagagtcaatgaaatggggataattttgtttgactctgtgcacccaataacaaacccttcagaagatgatttattaaca\t116\t0\t18\t23\r\n", "Locus_9682_Transcript_1/1_Confidence_1.000_Length_116_transcripts_v2_72920t---NA---\t\tataaaaataccaggaattgaggaagcaagcagcggcatgccggagcagacaggcaaaaaggaaaaccagaatacgaaaagaaaggaagacgggaatctgcaagacgctttggtgga\t116\t5\t19\t35\r\n", "Locus_9570_Transcript_1/1_Confidence_1.000_Length_112_transcripts_v2_72921t---NA---\t\tgctagtttcaggtgtgcattcattgaataaatgtatttgtatttagtacgagtgtataataaagcagtaaatacaaatacatttattcaatgaatgcacacctgaaactagc\t112\t1\t15\t19\r\n", "Locus_9787_Transcript_1/1_Confidence_0.714_Length_111_transcripts_v2_72922t---NA---\t\ttgcgtagctcggtggatgtatagagaatgggaattcagtttcagattaggtatgagaccatggatatttgtagnnnnnnnnnnncagcactctcagcacctgttgtagcag\t111\t2\t16\t29\r\n", "transcripts_v2_72923t---NA---\t\tggacgatgaggannnnnnnnnnnnnnctgatgacagtaacgatgatgatcttgatgatgatagcgttgacgagaacgacgaggatgaagactatgaagtga\t101\t6\t10\t29\r\n", "Locus_9072_Transcript_1/1_Confidence_0.667_Length_101_transcripts_v2_72924t---NA---\t\tttcttgaagatttttttaagacaatcgtgttcagttgtaataatttttacataagtaatctaaatattattttttnnnnnnnnnnnnnnnnnagtcaaggg\t101\t1\t7\t12\r\n" ] } ], "source": [ "!tail -20 comb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculating CpGo/e based on [Gavery and Roberts (2010)](http://www.biomedcentral.com/1471-2164/11/483)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"BMC_Genomics___Full_text___DNA_methylation_patterns_provide_insight_into_epigenetic_regulation_in_the_Pacific_oyster__Crassostrea_gigas__1A0683A5.png\"/" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "awk: division by zero\r\n", " input record number 9164, file comb\r\n", " source line number 1\r\n" ] } ], "source": [ "!awk '{print $1, \"\\t\", (($4)/($5*$6))*(($3^2)/($3-1))}' comb > ID_CpG #use ^ instead of ** for exponent\n" ] }, { "cell_type": "code", "execution_count": 145, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Locus_1685_Transcript_1/2_Confidence_1.000_Length_7457_transcripts_v2_1tspectrin \t 0.499212\n", "Locus_1685_Transcript_2/2_Confidence_1.000_Length_7457_transcripts_v2_2tspectrin \t 0.494242\n", "Locus_177_Transcript_12/12_Confidence_0.500_Length_6585_transcripts_v2_3tvitellogenin \t 0.669218\n", "transcripts_v2_5t---NA--- \t 0.266393\n", "Locus_180_Transcript_15/16_Confidence_0.327_Length_6143_transcripts_v2_6t---NA--- \t 0.279666\n", "Locus_180_Transcript_14/16_Confidence_0.308_Length_6142_transcripts_v2_7t---NA--- \t 0.279907\n", "Locus_180_Transcript_13/16_Confidence_0.308_Length_6140_transcripts_v2_8t---NA--- \t 0.280334\n", "Locus_180_Transcript_7/16_Confidence_0.288_Length_6119_transcripts_v2_9t---NA--- \t 0.280651\n", "Locus_180_Transcript_11/16_Confidence_0.250_Length_5898_transcripts_v2_10t---NA--- \t 0.292432\n", "Locus_1199_Transcript_3/4_Confidence_0.750_Length_5569_transcripts_v2_11tserine \t 0.464167\n", "transcripts_v2_9163 \t 0.849883\n", "Locus_17783_Transcript_1/1_Confidence_1.000_Length_207_transcripts_v2_9164tmps \t 0\n", "transcripts_v2_9165t---NA--- \t 0.636027\n", "Locus_17917_Transcript_1/1_Confidence_1.000_Length_207_transcripts_v2_9166t---NA--- \t 0.410266\n", "Locus_18343_Transcript_1/1_Confidence_1.000_Length_207_transcripts_v2_9167t---NA--- \t 0.562175\n", "Locus_18403_Transcript_1/1_Confidence_1.000_Length_207_transcripts_v2_9168t---NA--- \t 0.814834\n", "Locus_18577_Transcript_1/1_Confidence_1.000_Length_207_transcripts_v2_9169t---NA--- \t 0\n", "Locus_18650_Transcript_1/1_Confidence_1.000_Length_207_transcripts_v2_9170t---NA--- \t 0\n", "transcripts_v2_9171t---NA--- \t 0.496778\n", "Locus_18916_Transcript_1/1_Confidence_1.000_Length_207_transcripts_v2_9172t---NA--- \t 9163 18327 729602 ID_CpG\n" ] } ], "source": [ "!head ID_CpG\n", "!tail ID_CpG\n", "!wc ID_CpG" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Now joining CpG to annotation, but first must sort files." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Locus_1000_Transcript_1/1_Confidence_1.000_Length_292_transcripts_v2_6389\tnogo-b\tsp\tQ99LJ8\tNGBR_MOUSE\t35.48\t93\t57\t2\t283\t5\t155\t244\t6e-12\t63.5\r\n", "Locus_10015_Transcript_1/2_Confidence_1.000_Length_905_transcripts_v2_1500\tsp\tQ86UC2\tRSPH3_HUMAN\t71.43\t175\t50\t0\t17\t541\t297\t471\t3e-50\t 180\r\n", "Locus_10015_Transcript_2/2_Confidence_1.000_Length_896_transcripts_v2_1531\tsp\tQ86UC2\tRSPH3_HUMAN\t71.43\t175\t50\t0\t17\t541\t297\t471\t3e-50\t 180\r\n", "Locus_10024_Transcript_1/1_Confidence_1.000_Length_411_transcripts_v2_4529\tadp-ribosylation\tsp\tQ99PE9\tARL4D_MOUSE\t41.98\t131\t71\t2\t29\t409\t1\t130\t2e-31\t 116\r\n", "Locus_10027_Transcript_1/1_Confidence_1.000_Length_375_transcripts_v2_4989\tkelch\tsp\tQ5R8W1\tKLDC4_PONAB\t28.81\t118\t79\t4\t23\t364\t136\t252\t2e-06\t49.3\r\n", "Locus_10037_Transcript_1/1_Confidence_1.000_Length_428_transcripts_v2_4337\thypothetical\tsp\tC3YZ51\tUBA5_BRAFL\t62.24\t143\t47\t2\t2\t421\t239\t377\t1e-40\t 145\r\n", "Locus_1003_Transcript_1/1_Confidence_1.000_Length_421_transcripts_v2_4420\tprotein\tsp\tQ22A30\tRL15_TETTS\t70.09\t107\t32\t0\t321\t1\t1\t107\t2e-39\t 137\r\n", "Locus_10043_Transcript_1/1_Confidence_1.000_Length_339_transcripts_v2_5512\tphotosystem\tsp\tQ5ENP6\tPSAL_ISOGA\t57.58\t66\t27\t1\t139\t336\t10\t74\t7e-18\t79.0\r\n", "Locus_10044_Transcript_1/1_Confidence_1.000_Length_273_transcripts_v2_6833\ttransmembrane\tsp\tQ96HH6\tTMM19_HUMAN\t58.14\t86\t34\t1\t5\t262\t174\t257\t5e-14\t69.7\r\n", "Locus_10045_Transcript_1/1_Confidence_1.000_Length_696_transcripts_v2_2383\tsp\tQ9QY36\tNAA10_MOUSE\t70.28\t212\t44\t1\t1\t579\t2\t213\t5e-99\t 295\r\n" ] } ], "source": [ "#Sorting Pdam Uniprot/Swissprot annotation file. This file was the result of work done in another notebook: \n", "#Pdam_blast_anno.ipynb\n", "!sort Pdam_blastx_uniprot_sql.tab | tail -n +2 > Pdam_blastx_uniprot_sql.tab.sorted\n", "!head Pdam_blastx_uniprot_sql.tab.sorted" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Locus_10004_Transcript_1/1_Confidence_1.000_Length_174_transcripts_v2_10976\ttransport\r", "\r\n", "Locus_1000_Transcript_1/1_Confidence_1.000_Length_292_transcripts_v2_6389\tdevelopmental processes\r", "\r\n", "Locus_1000_Transcript_1/1_Confidence_1.000_Length_292_transcripts_v2_6389\tother biological processes\r", "\r\n", "Locus_10024_Transcript_1/1_Confidence_1.000_Length_411_transcripts_v2_4529\tsignal transduction\r", "\r\n", "Locus_10037_Transcript_1/1_Confidence_1.000_Length_428_transcripts_v2_4337\tother metabolic processes\r", "\r\n", "Locus_1003_Transcript_1/1_Confidence_1.000_Length_421_transcripts_v2_4420\tprotein metabolism\r", "\r\n", "Locus_10043_Transcript_1/1_Confidence_1.000_Length_339_transcripts_v2_5512\tother metabolic processes\r", "\r\n", "Locus_10045_Transcript_1/1_Confidence_1.000_Length_696_transcripts_v2_2383\tprotein metabolism\r", "\r\n", "Locus_10059_Transcript_1/1_Confidence_1.000_Length_589_transcripts_v2_3011\ttransport\r", "\r\n", "Locus_10063_Transcript_1/1_Confidence_1.000_Length_177_transcripts_v2_10761\tother biological processes\r", "\r\n" ] } ], "source": [ "#Sorting GOSlim annotation file. This file was the result of work done in another notebook: Pdam_blast_anno.ipynb\n", "!sort Pdam_GOSlim.tab | tail -n +2 > Pdam_GOSlim.sorted\n", "!head Pdam_GOSlim.sorted" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Locus_10000_Transcript_2/3_Confidence_0.667_Length_676_transcripts_v2_2486 \t 0.761861\r\n", "Locus_10001_Transcript_1/1_Confidence_1.000_Length_199_transcripts_v2_9515 \t 0.635946\r\n", "Locus_10002_Transcript_1/1_Confidence_1.000_Length_695_transcripts_v2_2386 \t 0.695709\r\n", "Locus_10003_Transcript_1/1_Confidence_1.000_Length_609_transcripts_v2_2870 \t 0.12449\r\n", "Locus_10004_Transcript_1/1_Confidence_1.000_Length_174_transcripts_v2_10976 \t 0.230119\r\n", "Locus_10005_Transcript_1/1_Confidence_1.000_Length_207_transcripts_v2_9134 \t 0.530625\r\n", "Locus_10006_Transcript_1/1_Confidence_1.000_Length_167_transcripts_v2_11475 \t 1.34405\r\n", "Locus_10007_Transcript_1/2_Confidence_0.857_Length_1261_transcripts_v2_788 \t 0.746746\r\n", "Locus_10007_Transcript_2/2_Confidence_0.857_Length_1272_transcripts_v2_779 \t 0.758383\r\n", "Locus_1000_Transcript_1/1_Confidence_1.000_Length_292_transcripts_v2_6389 \t 0.266367\r\n" ] } ], "source": [ "#Sorting CpG file\n", "!sort ID_CpG > ID_CpG.sorted\n", "!head ID_CpG.sorted" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# For this analysis, *Symbiodinium* sequences were removed. Using file generated from Pdam_zoox_removal.ipynb, ID_CpG.sorted2" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!join ID_CpG.sorted2 Pdam_blastx_uniprot_sql.tab.sorted | awk '{print $1, \"\\t\", $2}' > Pdam_cpg_anno" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Locus_1000_Transcript_1/1_Confidence_1.000_Length_292_transcripts_v2_6389 \t 0.266367\n", "Locus_10015_Transcript_1/2_Confidence_1.000_Length_905_transcripts_v2_1500 \t 0.364383\n", "Locus_10015_Transcript_2/2_Confidence_1.000_Length_896_transcripts_v2_1531 \t 0.368691\n", "Locus_10024_Transcript_1/1_Confidence_1.000_Length_411_transcripts_v2_4529 \t 1.4668\n", "Locus_10027_Transcript_1/1_Confidence_1.000_Length_375_transcripts_v2_4989 \t 0.847464\n", "Locus_10037_Transcript_1/1_Confidence_1.000_Length_428_transcripts_v2_4337 \t 0.578951\n", "Locus_1003_Transcript_1/1_Confidence_1.000_Length_421_transcripts_v2_4420 \t 0.914415\n", "Locus_10043_Transcript_1/1_Confidence_1.000_Length_339_transcripts_v2_5512 \t 0.52846\n", "Locus_10044_Transcript_1/1_Confidence_1.000_Length_273_transcripts_v2_6833 \t 0.533378\n", "Locus_10045_Transcript_1/1_Confidence_1.000_Length_696_transcripts_v2_2383 \t 0.206723\n", " 19133 38266 1468482 Pdam_cpg_anno\n" ] } ], "source": [ "!head Pdam_cpg_anno\n", "!wc Pdam_cpg_anno" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!join ID_CpG.sorted2 Pdam_GOSlim.sorted > Pdam_cpg_GOslim" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Locus_10004_Transcript_1/1_Confidence_1.000_Length_174_transcripts_v2_10976 0.230119 transport\r", "\r\n", "Locus_1000_Transcript_1/1_Confidence_1.000_Length_292_transcripts_v2_6389 0.266367 developmental processes\r", "\r\n", "Locus_1000_Transcript_1/1_Confidence_1.000_Length_292_transcripts_v2_6389 0.266367 other biological processes\r", "\r\n", "Locus_10024_Transcript_1/1_Confidence_1.000_Length_411_transcripts_v2_4529 1.4668 signal transduction\r", "\r\n", "Locus_10037_Transcript_1/1_Confidence_1.000_Length_428_transcripts_v2_4337 0.578951 other metabolic processes\r", "\r\n", "Locus_1003_Transcript_1/1_Confidence_1.000_Length_421_transcripts_v2_4420 0.914415 protein metabolism\r", "\r\n", "Locus_10043_Transcript_1/1_Confidence_1.000_Length_339_transcripts_v2_5512 0.52846 other metabolic processes\r", "\r\n", "Locus_10045_Transcript_1/1_Confidence_1.000_Length_696_transcripts_v2_2383 0.206723 protein metabolism\r", "\r\n", "Locus_10059_Transcript_1/1_Confidence_1.000_Length_589_transcripts_v2_3011 0.384943 transport\r", "\r\n", "Locus_10063_Transcript_1/1_Confidence_1.000_Length_177_transcripts_v2_10761 0.499315 other biological processes\r", "\r\n" ] } ], "source": [ "!head Pdam_cpg_GOslim" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Locus_10004_Transcript_1/1_Confidence_1.000_Length_174_transcripts_v2_10976 \t 0.230119 \t transport\r", " \r\n", "Locus_1000_Transcript_1/1_Confidence_1.000_Length_292_transcripts_v2_6389 \t 0.266367 \t developmental processes\r", " \r\n", "Locus_1000_Transcript_1/1_Confidence_1.000_Length_292_transcripts_v2_6389 \t 0.266367 \t other biological processes\r", " \r\n", "Locus_10024_Transcript_1/1_Confidence_1.000_Length_411_transcripts_v2_4529 \t 1.4668 \t signal transduction\r", " \r\n", "Locus_10037_Transcript_1/1_Confidence_1.000_Length_428_transcripts_v2_4337 \t 0.578951 \t other metabolic processes\r", " \r\n", "Locus_1003_Transcript_1/1_Confidence_1.000_Length_421_transcripts_v2_4420 \t 0.914415 \t protein metabolism\r", " \r\n", "Locus_10043_Transcript_1/1_Confidence_1.000_Length_339_transcripts_v2_5512 \t 0.52846 \t other metabolic processes\r", " \r\n", "Locus_10045_Transcript_1/1_Confidence_1.000_Length_696_transcripts_v2_2383 \t 0.206723 \t protein metabolism\r", " \r\n", "Locus_10059_Transcript_1/1_Confidence_1.000_Length_589_transcripts_v2_3011 \t 0.384943 \t transport\r", " \r\n", "Locus_10063_Transcript_1/1_Confidence_1.000_Length_177_transcripts_v2_10761 \t 0.499315 \t other biological processes\r", " \r\n" ] } ], "source": [ "#Putting tabs in between columns\n", "!awk '{print $1, \"\\t\", $2, \"\\t\", $3, $4, $5, $6}' Pdam_cpg_GOslim > Pdam_cpg_GOslim.tab\n", "!head Pdam_cpg_GOslim.tab" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Now time to plot data using pandas and matplot" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0 Locus_10004_Transcript_1/1_Confidence_1.000_Le... 0.230119 transport
1 NaN NaN
2 Locus_1000_Transcript_1/1_Confidence_1.000_Len... 0.266367 developmental processes
3 NaN NaN
4 Locus_1000_Transcript_1/1_Confidence_1.000_Len... 0.266367 other biological processes
5 NaN NaN
6 Locus_10024_Transcript_1/1_Confidence_1.000_Le... 1.466800 signal transduction
7 NaN NaN
8 Locus_10037_Transcript_1/1_Confidence_1.000_Le... 0.578951 other metabolic processes
9 NaN NaN
10 Locus_1003_Transcript_1/1_Confidence_1.000_Len... 0.914415 protein metabolism
11 NaN NaN
12 Locus_10043_Transcript_1/1_Confidence_1.000_Le... 0.528460 other metabolic processes
13 NaN NaN
14 Locus_10045_Transcript_1/1_Confidence_1.000_Le... 0.206723 protein metabolism
15 NaN NaN
16 Locus_10059_Transcript_1/1_Confidence_1.000_Le... 0.384943 transport
17 NaN NaN
18 Locus_10063_Transcript_1/1_Confidence_1.000_Le... 0.499315 other biological processes
19 NaN NaN
20 Locus_10068_Transcript_1/1_Confidence_1.000_Le... 0.689883 transport
21 NaN NaN
22 Locus_10069_Transcript_1/1_Confidence_1.000_Le... 0.348955 other biological processes
23 NaN NaN
24 Locus_10069_Transcript_1/1_Confidence_1.000_Le... 0.348955 other metabolic processes
25 NaN NaN
26 Locus_10073_Transcript_1/1_Confidence_1.000_Le... 0.640578 protein metabolism
27 NaN NaN
28 Locus_10078_Transcript_1/1_Confidence_1.000_Le... 0.157409 cell cycle and proliferation
29 Locus_10078_Transcript_1/1_Confidence_1.000_Le... 0.157409 other metabolic processes
............
73380 transcripts_v2_977 1.236260 signal transduction
73381 NaN NaN
73382 transcripts_v2_98 0.341826 other biological processes
73383 NaN NaN
73384 transcripts_v2_9821 0.635342 other biological processes
73385 NaN NaN
73386 transcripts_v2_9821 0.635342 other metabolic processes
73387 NaN NaN
73388 transcripts_v2_9821 0.635342 protein metabolism
73389 NaN NaN
73390 transcripts_v2_983 0.508182 other metabolic processes
73391 NaN NaN
73392 transcripts_v2_983 0.508182 transport
73393 NaN NaN
73394 transcripts_v2_9880 0.891478 protein metabolism
73395 NaN NaN
73396 transcripts_v2_9896 0.394843 other metabolic processes
73397 NaN NaN
73398 transcripts_v2_991 0.638445 RNA metabolism
73399 NaN NaN
73400 transcripts_v2_991 0.638445 developmental processes
73401 NaN NaN
73402 transcripts_v2_9910 0.761721 stress response
73403 NaN NaN
73404 transcripts_v2_9932 0.386127 other metabolic processes
73405 NaN NaN
73406 transcripts_v2_9932 0.386127 protein metabolism
73407 NaN NaN
73408 transcripts_v2_9936 0.892228 RNA metabolism
73409 NaN NaN
\n", "

73410 rows × 3 columns

\n", "
" ], "text/plain": [ " 0 1 \\\n", "0 Locus_10004_Transcript_1/1_Confidence_1.000_Le... 0.230119 \n", "1 NaN \n", "2 Locus_1000_Transcript_1/1_Confidence_1.000_Len... 0.266367 \n", "3 NaN \n", "4 Locus_1000_Transcript_1/1_Confidence_1.000_Len... 0.266367 \n", "5 NaN \n", "6 Locus_10024_Transcript_1/1_Confidence_1.000_Le... 1.466800 \n", "7 NaN \n", "8 Locus_10037_Transcript_1/1_Confidence_1.000_Le... 0.578951 \n", "9 NaN \n", "10 Locus_1003_Transcript_1/1_Confidence_1.000_Len... 0.914415 \n", "11 NaN \n", "12 Locus_10043_Transcript_1/1_Confidence_1.000_Le... 0.528460 \n", "13 NaN \n", "14 Locus_10045_Transcript_1/1_Confidence_1.000_Le... 0.206723 \n", "15 NaN \n", "16 Locus_10059_Transcript_1/1_Confidence_1.000_Le... 0.384943 \n", "17 NaN \n", "18 Locus_10063_Transcript_1/1_Confidence_1.000_Le... 0.499315 \n", "19 NaN \n", "20 Locus_10068_Transcript_1/1_Confidence_1.000_Le... 0.689883 \n", "21 NaN \n", "22 Locus_10069_Transcript_1/1_Confidence_1.000_Le... 0.348955 \n", "23 NaN \n", "24 Locus_10069_Transcript_1/1_Confidence_1.000_Le... 0.348955 \n", "25 NaN \n", "26 Locus_10073_Transcript_1/1_Confidence_1.000_Le... 0.640578 \n", "27 NaN \n", "28 Locus_10078_Transcript_1/1_Confidence_1.000_Le... 0.157409 \n", "29 Locus_10078_Transcript_1/1_Confidence_1.000_Le... 0.157409 \n", "... ... ... \n", "73380 transcripts_v2_977 1.236260 \n", "73381 NaN \n", "73382 transcripts_v2_98 0.341826 \n", "73383 NaN \n", "73384 transcripts_v2_9821 0.635342 \n", "73385 NaN \n", "73386 transcripts_v2_9821 0.635342 \n", "73387 NaN \n", "73388 transcripts_v2_9821 0.635342 \n", "73389 NaN \n", "73390 transcripts_v2_983 0.508182 \n", "73391 NaN \n", "73392 transcripts_v2_983 0.508182 \n", "73393 NaN \n", "73394 transcripts_v2_9880 0.891478 \n", "73395 NaN \n", "73396 transcripts_v2_9896 0.394843 \n", "73397 NaN \n", "73398 transcripts_v2_991 0.638445 \n", "73399 NaN \n", "73400 transcripts_v2_991 0.638445 \n", "73401 NaN \n", "73402 transcripts_v2_9910 0.761721 \n", "73403 NaN \n", "73404 transcripts_v2_9932 0.386127 \n", "73405 NaN \n", "73406 transcripts_v2_9932 0.386127 \n", "73407 NaN \n", "73408 transcripts_v2_9936 0.892228 \n", "73409 NaN \n", "\n", " 2 \n", "0 transport \n", "1 NaN \n", "2 developmental processes \n", "3 NaN \n", "4 other biological processes \n", "5 NaN \n", "6 signal transduction \n", "7 NaN \n", "8 other metabolic processes \n", "9 NaN \n", "10 protein metabolism \n", "11 NaN \n", "12 other metabolic processes \n", "13 NaN \n", "14 protein metabolism \n", "15 NaN \n", "16 transport \n", "17 NaN \n", "18 other biological processes \n", "19 NaN \n", "20 transport \n", "21 NaN \n", "22 other biological processes \n", "23 NaN \n", "24 other metabolic processes \n", "25 NaN \n", "26 protein metabolism \n", "27 NaN \n", "28 cell cycle and proliferation \n", "29 other metabolic processes \n", "... ... \n", "73380 signal transduction \n", "73381 NaN \n", "73382 other biological processes \n", "73383 NaN \n", "73384 other biological processes \n", "73385 NaN \n", "73386 other metabolic processes \n", "73387 NaN \n", "73388 protein metabolism \n", "73389 NaN \n", "73390 other metabolic processes \n", "73391 NaN \n", "73392 transport \n", "73393 NaN \n", "73394 protein metabolism \n", "73395 NaN \n", "73396 other metabolic processes \n", "73397 NaN \n", "73398 RNA metabolism \n", "73399 NaN \n", "73400 developmental processes \n", "73401 NaN \n", "73402 stress response \n", "73403 NaN \n", "73404 other metabolic processes \n", "73405 NaN \n", "73406 protein metabolism \n", "73407 NaN \n", "73408 RNA metabolism \n", "73409 NaN \n", "\n", "[73410 rows x 3 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jData = pd.read_table('Pdam_cpg_GOslim.tab', header=None)\n", "jData" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import matplotlib.pyplot as plt " ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAg8AAAD7CAYAAADtuXeEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXmYXEW5xn9vwp6ArBpQIZAAsgcCEQFlYhT1CoisCiJB\nLi7IoixeroIEcYkoIOAFRCRBRPaABARZnCB7IMkkYREFA3pRL1G2gLK/94+qZk46Pcl05sx098z3\ne57z9Kk6daq+801Pn+9U1alXtgmCIAiCIOgugxptQBAEQRAErUUED0EQBEEQ1EUED0EQBEEQ1EUE\nD0EQBEEQ1EUED0EQBEEQ1EUED0EQBEEQ1MUyjTYgCJYWSfGecRAEwVJgWz05P4KHoKXp6T9A0Imk\nCbYnNNqO/kD4slzCn+VSxoNXDFsEQVBheKMN6EcMb7QB/YzhjTYgWJgIHoIgCIIgqIsIHoIgqDC5\n0Qb0IyY32oB+xuRGGxAsjELbImhVJDnmPARBENRHGb+d0fMQBAEAktoabUN/IXxZLuHP5iOChyAI\ngiAI6iKGLYKWJYYtgiAI6ieGLYIgCIIg6HMieBigSPp6o23oDpK+ImnFRtsxEIhx5fIIX5ZL+LP5\niGGLAYqkBbZXrpEvADfBF0PSYOAxYFvb/6xxvOE2Bt1joA0vSWqzPa3RdvQXwp/lUsawRSxPPQCR\nNBFYUdIs4EHgBOBm4F5gNPAfko4HtgNWBK6qLA0r6QnSO9e7AcsC+9h+VNLOwI9yEwY+AGwLfAt4\nARgJtAOH2bakTwP/DQi4wfbxuf4XgfOADwFXA+sA7ZLm2x5XfS3t7eX5Jegdxo5ttAV9T9zoyiX8\n2XzEsMUAJN+o/217a9sHkm7gI4H/sb257T8D37C9HbAVsLOkzSunA/NtjwbOBY7N+ceQAoOtgZ2A\nf+f87YDDgU2BEcCektYBJgJjgVHAdpI+kcuvBNxre5TtU4C/Am21AocgCIKgMUTPQ1DhSdvTC+n9\nJB1K+o6sTbr5P5iPTcmfM4E98/5dwBmSLgGm2H4qj4BMt/0EgKRLSYHFa8C0ylBEPucDwK+AN0g9\nDt1i4kQYNiztDx0KI0fCqFEp3dGRPiPdvfRVV/WO/ypUxq0rT5H9OV0co28Ge1o9Hf4sxX/jswuf\noARizsMApTjnQdJwYKrtLXJ6fdIwxra2n5c0CWi3/XNJ84DRtp+RtC3wA9tj83mbAR8HDgM+Qgo6\nJthuy8c/B2wOTAP2sn1Qzj8E2MT2sdVzMYrt1bgGx7BFeXR0dN74y2Ts2JjzEPSM8Ge5xKuaQU94\nTVJXPU+rAC8BL0h6B/CxJVUmaYTth2yfCtwPbJwPjZE0XNIgYF/gDmA6aShkjTwp8lPA7V1UvSDb\nE/QyvRE4DFTiRlcu4c/mI4KHgcv5wBxJF5PmMbzVBWV7NjAL+D1wCXDnYuqpnHeUpLmSZgOvAjfm\n/PuBHwMPA3+yfY3tvwPHkyZQdgAP2J5aVV/Rzpsk3bZ0lxkEQRCUTQxbBL1GHmc7xvZuvVR/fHlb\nhBi2CHpC+LNc4lXNoNlZqEejVxoYYDel3iR+oIMg6C7R8xC0LKFtEQRBUD8xYTIIgiAIgj4ngocg\nCIDQDyiT8GW5hD+bjwgegiAIgiCoi5jzELQsMechCIKgfmLOQz9D0kGS1m60HUEQBEGwOOJVzeZi\nPEk/4m/VByQNsv1mbzQqaRnbr/dG3b1NrPUQlEWZvVjx2mu5hD+bjwgeGkBekvlnJPlrAxcCfyFJ\nWF8i6V/ADqQVHi8DPgycKulZYAKwPPA4cLDtl7LE9m7A68BvbH9N0j7AN0lCU8/b3rnKhjbgFOAZ\nYGNJmwLfB3bO9f+P7fNzT8jlwMqk78sXbd+VpbPPB3YB/g58yvY/JI0iSWqvmG38nO3nJE0jSX6P\nBVYFDrF9Z9bDuBBYjtQTtqftxyV9Bjgi599HUuxcJHhqJ8QtyqKDDkYxMNeoHssA1A0Pgh4QwxaN\nYRSwju0tbG8JXGj7auABYH/b29h+mRRY/MNJ/vo24BvAuJyeARwtaXVgD9ub2d4K+HZu40RgF9uj\nSIFFLbYGjrT9HuA/gedsjwHGAIdmwaxPAzc5SW1vBczO564E3G97c5IuxUk5/+fAcdmWuYV8A4Nt\nvxf4SiH/i8CZuf7RwFOSNiHpYOyQ898EDui+e4OlYaAGDr1BPCWXS/iz+Yieh8bwOLCBpLOAG0gK\nlhWqu04vz5/bk2Sx785S18sBdwPPAy9L+hlwfd4gSWRfJOkKOiW0q5lu+8m8vwuwhaS9c3oVYCRJ\nm+JCScsC12bdC0g39IptvwCmSFoFeJvtO3L+RcCVhfaKUt7D8/7dwDckvYsk5f2YpHGkQOKBfK0r\nkno3FmEiExlG0uQeylBGMvKtm2AHSRM60pHuTrqZJJQjHemQ5A5qImkl4KPAgcAztg+R1E7SgpiZ\nyxTlr3cl9UrsX6Ou5YBxwN7AcNvjcv4YkkT2Z6mSta7WnZB0FfAT27fUqH8YsCvwZeB02xdLeh1Y\nzvabkjYArgLagLm218vnjQCusD26eG2S1iT1Wqyfy62f6z8C+AKwGaln5utL8KFj2KI8BvqwRcx5\naF7Cn+USb1u0KJLWAJaxPYU0vLB1PrQ4+en7gB3zDRlJQyRtKGkIsKrtG4GjSUMLFYns6bZPAuYD\n71qCWb8BDlOW6Za0kaSVJK0LzLd9AWmeRsXWQcA+eX9/4A7bLwDPStop5x8ITFuCLzawPc/22cCv\ngC1IQzR7S1orl1k92xEEQRA0ATFs0RjeCUySVAnejs+fk4HzChMm38L2fEnjgUslLZ+zv0EKOH4l\naQXSkMdX87FTJW2Y8261PafKhmrRqgtIQwkzlcYKngY+SepNOE7Sa7mtz+byLwFjJJ0A/B+wX84/\nKF/DSuRJnV34oNL2vnly5Gukt0y+kydYngDcnH30GnAY8Ocu6gpKYKD2OvQG8ZRcLuHP5iOGLYKl\nQtIC2ys32Ib48galEQuOBQOFMoYtouchWFqa4sYdP/jlEePK5RG+LJfwZ/MRcx6CpcJ2V3MzgiAI\ngn5ODFsELUsZXW9BEAQDjXjbIgiCIAiCPieChyAIgM5FZYKeE74sl/Bn8xHBQxAEQRAEdRHBQ0lI\nOjkvq1x2vU9k/Yrq/MWuvtgbSJosaa+lPPfrVem7yrEqKIuYzV4e4ctyCX82HzFhsskpLlFdlV9z\nnYW8wBPuhT+spEnA1LwyZr3nlr4uRKzzELQSMbk3aBZiwmQDkDQ4P4HPlTRH0lE5/62nckn/IekR\nSQ9IOkvS1Jw/QdKFktolPS7piEK91+TyD0o6dAk2TARWlDRL0sWS1pP0qKSLSEqW75Z0jqT7c30T\nCuc+ke2Yke3fOOfvnOubJWlmXv5akn4s6feSbgHeXlXP6nl/26xdgaShkiblumdL2lPS94r25nIv\n5k9J+kHBn/vm/DZJ0yRdmX35i9recGylbe1NYEN/2ap9GfSEmPPQfMQiUfXzlpw2gJKSJORfibxM\n9HnA+20/KemXLPzrsREwlqRh8aikc2y/AXzO9rOSVgSmS7rK9rO1DLB9vKQvZ7lqlKSzRwIH2p6e\n876R6xsM3Cppc9sPZlvmZ7GqLwHHAocCxwCH2b4nLy39Cml56o2ATYBhwMMkfYvK9dbiROBZJ6lx\nJK1qe4qkwyv2Vp2/J0mPY0tgLeB+Sb8r+HpT0rLVd0na0XYMdwRBEDSY6Hmon7fktCV9hKT3UEHA\ne4A/uVPq+lI6ZbYN3GD7Ndv/JOlHvCMfO0pSB3AP8G5gwzrterISOGT2kzSDJH+9GekmXKGWNPZd\nwBm5N2S1HNC8H/ilE38DftsNO8YB/1NJ2H5uCeV3KrTxNHA7sB3JV9Nt/zUPwXQUbC0wHpiQtx+x\nsA7XtEjXlWYJxyPd/XTbIsdzb1pbpOtPV2Slm8WeVkvn/cl5m0AZ2I6tzg1YifTEfA3ws5w3CdiL\n9BQ9rVB2d9I8AYCTSLLUlWNzgXVJvzR3ACvk/HbgA3l/HrB6DRsWFPaHk6SwK+n1gT8CbyvY9tnq\n+oBtgfbCeZsBXyPpvW8MnAEcXDh+NbBn3v8jsGbe36lSD/AAMHJx9hbTwOlVbfycJM+9c8VvOf9s\n4KCqOgyOLbYW2HCjf7dii62ylfF9jJ6HOlHXctoABh4l9Uysl/P2y/nQ2QOxUJWkIYxnbb8s6T3A\n9t0w5TVl+ewarEJSvXxB0juAjy2pMiUJ74dsnwrcT+pB+R2pB2OQpLVJwy0VniAFH5CCpgq3AF8u\n1LvqEuy9o9DGWsAHgOnU9lXQq0xrtAH9iGmNNqBfEXMemo8IHurnnUC7pFnAxcB/Fw/afpkkH32T\npAeAF4DnK4dhkbkCBm4ClpH0MPA90tDFkjgfmKM0AXGhem3PBmYBvwcuAe5cTD2V845SmrQ4G3gV\n+LXta0g9DA8DFwF3F847GThT0v3A64V6vg2sluvqIPWqVNv7Vru5jTnAbOA24Din4YuufBUEQRA0\nmHhVsxeQNMT2S3n/f4A/2D6zwWb1OxSvagYthONVzaBJUEhyNy2HSjoIWI40KfEnDban3xI/yEEQ\nBH1P9DwELUsZ0XPQiaQ2x0p+pRC+LJfwZ7mU8dsZcx6CIAiCIKiL6HkIWpboeQiCIKif6HkIgiAI\ngqDPieAhCAIg3qUvk/BluYQ/m49426KPkfQV4Ce2/72Ecj8FTrf9SB/ZdRBws9My1IsrN420SuaM\nbtY7nqQKeoSkLwD/sn3xEk7rNvG6ZrlI/X8UKIa6gqDnRPDQC0ga7KQNUYujSItLLTZ4sL1YZc1e\nYDzwIEmEanHUe7MuLl5V+iur7e1l1xj0Z8aOXXKZMog3A8ol/Nl8xLBFnUh6UdLpSlLXt0paM+dP\nk3RGXnHxKEnjlKSt50j6maTlJB0JrENaofK2fN4uku5Wksi+QtKQQn3bFNr8tqQOSfdIensNuyZI\nukjS75TksveU9MPc/o2VpaEljc51PyDpJknDJO1NWmr6kmzzCpK+KWl6Ximy+qZ/oJK89lxJ2+V6\nV5d0rZIM9z2StujCxmPy/pGSHsrlf1nPNQRBEASNJYKH+lkJuN/25iQFyJNyvoFlbW8HnEMSo9rX\nSZp6GeBLts8C/gq02R6XA49vAONsjwZmAEcX6iu2eY/tUSS9ia56JdYn6U/sDvwCuCW3/2/g45KW\nJQlM7WV722zjd2xfRRK02t/2NnmJ7bNtj3GSHl9R0q65DQErOslrHwZcmPNPBmbY3gr4OkngqlK+\nQnHJ6f8CRuXyX+zuNXRx3UEJdHQ02oL+Q4zRl0v4s/mIJ7n6eRO4PO//gk55awr5GwPzbD+W0xeR\nxKKql6jeniSVfXcea16OhfUjKrxq+4a8PwP4cI0yBm60/YakB4FBtn+Tj80lKW9uRFLOvDW3N5gU\nzFQo3ug/KOk4UuCyOmlI4/rczqUAtu+QtIqktwE7kpRGsd0uaQ1JK9ews8Ic4JeSrgWureMaFmLi\nRBg2LO0PHQojR8KoUSlduRlGunvpxx5rLnt6K12hckOqdIlHOtL9NZ33x5N4ghKIdR7qRNLrwHK2\n35S0AXCV7W0ktZMmEs6UtBVwlu2d8znjSD0Pe0uaR5pA+Ex+mt/f9v412inWt8D2yjl/b+Djtg+u\nKn8S8KLt03K6eM5JwIvAb4Dzbe+whPZWIH3BRtt+Kp9v29/K5U4ufEGfBLYgyQjuZXtezv8zKTDa\nm84Jk2/ZKGkQSUFzN5Lq5xbACUu6hsqxnOeY8xDUw9ixMWEyCBTrPDSEQcA+eX9/kqR0hcof41Fg\nuKQROX0gaYgDYAFJMhvgPmDHSjlJQyRt2FuGZ7vWkrR9bm9ZSZvWsGuF/PlPSUPpvF5I17hfPn8n\n4DnbL5D8cEDObwPm236xqn3l4wLWzQHI8cDbgKEQMtxBEAStQAQP9fMSMEbSXJLc9LcKxyoy0y8D\nBwNXSppDkqw+L5c5nyTXfZvt+aSupEuVpLDvJg15VOOq/a66i6rLLXTM9muknoDvK8llzwLel49P\nBs6TNBN4GfgpaajiJlKQU6z35VzuHOCQnD8BGJ2v47vAQTXsrewPBi7OvpkJnGn7+RrXFpLcfUjM\neSiPGKMvl/Bn8xHDFnVS7EoPGotijYdgKeiLYQuFkFOphD/LpYxhiwge6kTSC7ZXWXLJoLcp4x8g\nCIJgoBFzHhpABA5BEATBQCeChyAIgBhXLpPwZbmEP5uPCB6CIAiCIKiLmPMQtCwx5yEIgqB+Ys5D\nEARBEAR9TksHD5K+XtgfntdeaAokfULSJt0oN1nSXnXU2yZpat7fTdJ/9cTOVkeSYxu4W6O/f12h\nGKMvlfBn89Hq2hb/TVqQqMdo8TLaS8MnganAI0sot9Q/gLan5jZKoxf80Ku0E+tTl0UHHYxiVKPN\n6DZj6SN97SAIFqEleh4kfVpJlnmupIk5byJJ7XGWpIvJKxdKOl9JLvs3ShoNSBqhJOn8gJLc88Y5\nf7Kk8yTdC3y/qs3xShLTN0uaJ+lwSccqSVbfI2m1ruqWtANJs+EHufwGkg5VkrjukHSVpBULzX1I\n0v2SHpX08VzvCpIm5eueWSvyzjaenfffIemaXH+HpPfVKL/UcuK53HaS7sr136e0nPZgST/I1zZb\n0udz2bWzPyrS3TtKGpR9PjfX/ZUl/H32yWU7JN1efT1BubRS4NDsxIJG5RL+bD6avudB0jrARGAb\n4DngZkmfsH28pC9naWgkDQc2BD5l+/OSLgf2Ai4hLQn9BduPSXovaVnlcbmJdYD3ufbM0c2AUcCK\nwOPAcVkE63TgsySVzEXqdpLbvg6YantKtu852z/N+6eQlnX+MUnPYT3b20kaCbTnzy8Db9jeMt9M\nb5a00WJcdRbQbvuTSqJTQ2uUqciJHy3pRJKc+BEU5MRzwPUH4IP5mi4CviTpXOAyksz4DCXNi5fz\ndTxne4yk5YE7Jd1MUti8yfZ3JQkYAmwNrOMk842kypoZXf19TgR2sf23QtkgCIKgwTR98ABsR7op\n/hNA0iUkNcZf1Sg7z/acvD+DJE41BNiBpDNRKbdc/jRwZReBg3O7LwEvSXqOziGCucCWS6gbFhZ6\n2kLSt+kUgbqp0M4VAPnm+SfgPSSJ67Ny/qNK6pWLCx7GAp/J5d8EXqhRpidy4rcBf7M9I7fxIoCk\nXfK17Z3LrwKMBO4HLpS0LHCt7dmSHgc2kHQWcAMpIBpK0teo5cO7gIskXVFl61tMZCLDSJrcQxnK\nSEa+9QTdQRJriHT30ldxVUv5DxZetljNJ4FMs9jT6unwZ0hy142k3UlSzwfl9CHAJraP1cKSzcNJ\nT/qVp9pjSE+7ZwCP2l6nRt2TgOttX13j2EHAtraPyOmilPZBwLbA15dQd7HnYR6wu+25+fw22wfn\ncrfbnpzL3U7qDZgAnG27Pef/DjgMWJMknb2bpPF0yl0/DbzL9quL8eXSyokfRuqlOM/2TlV1XgX8\nxPYtNdobBuxKCj5Ot31xDrg+QlIafQb4Slc+zHWMAT5O6ukZbfuZwjHHnIfyaMU5D836qm4xqAl6\nTvizXKSB8arm/cDOktaQNBj4FJ3y1q9JWlzviWwvAOZVnoyV2LIb7S7OsQJYQt1FiWtIvQ1/z0/i\nn6FzoqSAffK5I4ANgN+zsMT1RsC6JEntrrgN+FIuP7iLbv6llROflvPXlrRtbmPl/Pf4DXBY5e8g\naSNJK0lalyTLfQFwAbCNpDWAwTmgOhHYenE+lDTC9nTbJwHzgXct5vqDHtJKgUOzEze6cgl/Nh9N\nHzzY/htwPNAOdAAPOL1lAGmsfI46J0x2JeF8AHCIkgz1g8DuNcos0nTVsa5ksbuq+zLgOEkz8lP+\niSRp6ztZ+A0MA38GpgO/Jo39v0oa9x+kJFt9GXCQk6R2LYlrgKOAsbn8A0Ct10SXWk48t70fcHa+\n1t8Ay5MCg4eBmbnec0nDYW1Ah5J0977Aj4B3kuZ0zAIuJr0tszgfnqo8URa4qzAkFQRBEDSQph+2\nCMpD/UxOXE38nn/QN8SwxcAg/FkuZQxbtMKEyaA8+t3NtllvHq1I/EAHQdBdouchaFnKiJ6DIAgG\nGgNlwmQQBEEQBE1EBA9BEAChH1Am4ctyCX82HxE8BEEQBEFQFy0fPKgXlTVV0I6oceyuJZzbI1sk\n3dDFWg1LOq9Lm4NgccRkyfIIX5ZL+LP56A9vW/SmsmaXs0lt71hGm4up/+NLe+rStilpGduvL+35\njSBe1wxqERNpg6B3aZmeBzVAWTPzbkntkv4g6ZsFeyraDlJSlawoRe5bw/aaCpl5JcYrJD0kaYqk\neyVtk489IWn1vP9ZJcXKDiWhKiTtlsvPlHSLpLcvwX8TJF0s6e58Lf+Z89sk3SHpV8CDkpbvwtbB\nkn6Yr3O2pMNz/mglVc4HJN2ktCQ1ko7M1zVb0qU5b+f8t5qV6x6S849TpyrnhJw3JPe+dOQ2F/Fr\nwrGVtrU3gQ1lbI0nxujLJfzZhNhu+o2kfPkksAYwmLQU8yfysQWFcsOB14Atc/py4IC8fxswMu+/\nF7gt708GriO/tlrV7njgr8BqwAokQaxtiu2SlDtvJi3v/PZs5zuyLXNzmWOAC/L+xrnM8sCxwLk5\nf7Nse6X+ecDqOf9RYPWcv1r+XLVg538CPyzYfHaNa5kAzMrtrkFa1XJt0kqQL5KUPRdn65dIAl6D\nKnYAywJ3A2vkvP2An+X9p0hKnQCr5M/rSAqmkBQ+BwO7kLQxIAWzU4H3k1Q5zy/Yv0qNazI4ttK2\n9iawoYwNN8FvVlujbehPW/izdH+6p3W0yrBFo5Q1AW62/WxudwrpxjazcHwn4Jf5/KeVhK3GkAKN\nCl0pZO5IWrYZ2w8pLQddRMAHgSucBaEqtpB6RK4AhuVr+VMX9lcw8CvbrwCvKIlhjSHJnE+3/eQS\nbB1HCnTerNghaXNScHNr9utgUrAFMAf4paRrgWtz3l3AGfnvN8X2U0qqnLsoLVkNScxsJGkZ79Ny\n79L1tu9cwvUFPaat0Qb0Gxxj9KUS/mw+WiV4MAsLVSnn1eKVwv4bpB6DQcCztrfu4px/LabdIiLJ\nWi/OtlrnUaPMkvIXVz/A2aTehusl7UzqWaiXyrW81E2bqvMFPGR7hxplP04K8HYDviFpc9vfl3R9\nPnaXpI/kst+zff4ijUlb57LflnSb7VMWbWY8qZMHYFVgFJ03wWn5M9IDK51QE0kiRzrSjUyrFyS5\nG9590p2N1L3+BJ3DFrcAu+VjzwDL5P3h5KGCnD4GOCnv3wXsnfdF59DGJJLkd612x5O631cDVgRm\ns+iwxSeBm0gBylrZzrez8LDFV+kcCtgol1mONGxxTs7fFHiVRYctNqX2sMXMQtlJpJ6Zis3dGbZ4\nktRr0UaSDmcxti4LfAG4kqSKCZ3DFn8Ets95y2Z7BQwv5D1FUhgdUWjnSpIA1oeBe4EhOf+d2Y9r\nAyvkvF2Ba2pcUxN0kfenrb0JbChjw03wm9XWaBv60xb+LN2f7mkdLdHzYPtvkirKmiJ1Y1cra84A\nToBFnvor6QOAcyWdQLqhXUrqWqfGOcVzpwNXk+SgL7Y9s3iO7WskvY8UWBg4zvbTkoYX6j0nt11R\nqTzI9quSzgEukvQQSYb7IeD5qmt/WNJ3gNslvUEKGj5HCgaulPQs8FtgvYJdta7H+XrbgTWBb9n+\nu9LE0WL5Wra+JukCUjAxR9JrpPkI5yhJaZ8l6W2knqwzgD8AF+c8AWfafkHStyWNJfV4PAjcmOve\nBLgnD30sIMmAjwR+IOlNUlD1pdp/oiAIgqCvCW2LBiJpEGlS4SuSRpB6VDZyL7wuKekk4EXbp5Vd\nd6OQ5K7jvmDgIhyvagZBlyhUNVueIcBvJS1LekL/Um8EDgX64Z027hFBEAR9TfQ8BC1LGdFz0IlC\nkrs0wpflEv4slzJ+O1tmkaggCIIgCJqD6HkIWpboeQiCIKif6HkIgiAIgqDPieAhCAIg9APKJHxZ\nLuHP5qPlgwf1ggR1b9TZUyStJ+nT3ShXqix5EARBEFQTr2rWphkngqwP7E9a3KrXkDTIWb+iFVBI\ncpdKQfsl6CF96cv+Pvcn3rRoPloyeJB0MHA8SdRpNlnPQtJawLnAurnoV0hLHz8OjLL9fC73R5JQ\nFtXlbd9d1dZw4ELSks7zgYNt/0XSZOBlYDRp6eWjbd8gaTywB0k1ckPgNJK+xv7Zzv9wEpUaAfyY\ntBTzv4BDnYSoJpNWmdyWtHz012xfDUwE3pMFpCaTxKYuJq0VAXC47XsW47M24FvAC6TVG9uBw2xb\nSV78POBDwJclvRc4OJ96ge0zcx2fJS35bWCO7c/W8rntu7Pexo9ynkmCYquQlE5XJn33vmT7ziyO\nNYG0dPbj2ccvZVGs3UgrXd5s+7jq62pv7+qKg2BgMHZsoy0IBiItN2whaW3SjWYHkqLlpnT2FJwJ\nnGF7DLA36cb3Jkl985P5/PeSlDfn1ypfaabQ5NnAJNtbAZeQFScz69rejiTedJ6k5XP+Zrm97YDv\nAC/Y3ga4B/hsLnM+cITtbYHjSMtCVxhme0eSpsPEnPdfwB22t84386eBD9seDXyqyq6u2A44PPts\nBEn2GlKgc6/tUaSAaDxJcXN74FBJoyRtBnwDGJvLHZnP7cqHx5CCk61Jf6eXgU8DN+W8rYAOSWvm\nesfla5kBHC1pdWAP25tl39cQxQrKpKOj0Rb0H8KX5RJzHpqPVux5eC8Ly3NfTtJcgPTkvEmhu3Bl\nSSuRnna/SXpi/1ROd1W+8iRfYXtSTwLAL4BT876BKwBsPybpT8B7cn677ZeAlyQ9B1R0OOYCW2rJ\nEuHX5nofkfSOnF/dLbkc8GNJW5HUQzdiyUy3/QSApEtJN/Wr8/lX5zI7keSy/53LVWTIzcLS4M/l\n8l35sJb89v3AhXlFzWttz84/CpsCd+c6lgPuJvW+vCzpZ8D1eQuCIAiagFYMHkzX8twC3mv71eIJ\nku4FRuan3E+Quu8XV756HL2744mV84qy4G8W0m+SfL4kifCiPV21/VXgb7YPlDSY9GTfXfsq9Vbm\nNrzszgVFdztqAAAgAElEQVQ/avl3cbbU9CGwiPy27TskvZ/UozJZ0unAs8AttvdfpGJpDDCO1KNx\neN5fiIkTYdiwtD90KIwcCaNGpXTl6S/S3UtX8prFnlZOjxrVd+1VaCYJ6LLT7pSVbgp7Wi2tXpDk\nbrlFovKwxT3ANiQFxt8Cs2wfmZ9yZ9n+YS47ynZH3j+VJPO8mu1dc17N8nnewmjbR0j6FXCl7V/k\n/N1s75XnJqxFuhFuAEwjDQXsXzk31zkvp5+pqvcuUnf/VUqP3FvYniNpEkk19Op8/gLbK0saDZxm\nuy3nnw78r+3T8xyQn9kelOdoTLW9RZXf2oBfk57y/wzcCJznpAq6wPbKudzWpB6a7UlBzr3AZ4DX\ngGuA9+VrWS3P3ejKhyNsP57zriTNz+gAnrL9hqQvZ399lzRU8UHbj+dei3WAv5Jkup9WUud83Paa\nVdfkmPMQDHTGju3/EyaDctFAXCTK9t9Icx7uAe4kyVhXOBLYVtJsJZnrzxeOXU6S5b68G+WLstZH\nAAdLmp3PP6pQ5s8kye5fA1/IT9/VktjV+0WJ8EMkdZDkqXdfzDmQJoa+IalD0lGkORIH5fM3Bl7s\n4vxi3v2kSZoPk27G11SXt12ZkDmdFDj81PZs2w+T5m/cntusqHN25cOjJM3NfnsVuAloI81zmAns\nS5Lq/gcpIr40l707X8/KwNScdweppyXoRWKcvjzCl+UScx6aj5breWgWcg/BVNtTGm1Ld8j/fMfY\n3q3RtpRFjeGlIBiQ9PeeB4UwVqmU0fPQinMegqWjukekX9DffzSDIIh1HpqR6HkIWpYyoucgCIKB\nxoCc8xAEQe8Q48rlEb4sl/Bn8xHBQxAEQRAEdRHDFkHLEsMWQRAE9RPDFkEQBEEQ9DkRPASloR5I\nmUvaStLHCukJko4pz7pgScS4cnmEL8sl/Nl8xKuaQbOwNUmh9Mac7tZ4Wqz1UC4aQJLcMeQVBEtP\nzHkIeoRqyKPn5be7kuoeQ5LqXgH4N0n6+wngsZz3FPA9YJN87gb580e2F+rVkOR2Yn3qoH7GMjaC\nh2DAEnMegoaiOuXRc/4jwPudJMpPAr6bl/U+EbjMSXL8CpLg1nuAXUjy4CdlAbAgCIKgwcSwRdAT\nlkYefVXg55JGkgKNyndQLKzaaZJA2GvAPyU9DbyDJJgV9AIddDCKUUsuGCyRWE65XMKfzUcED0FP\nWBp59HOA22x/UtJ6JDXSriie+wY1vq8Tmcgwkib3UIYykpFv3QA7SOpEke5e+jEeayp7ejvdTJLJ\nkY50b6YVktxBM6H65NG3sj1b0hTgF7anSJoAHGR7fUl7ArvbHp/LnwS8aPu0nJ4LfNz2nwvtx5yH\nYKmIOQ/BQCbmPAQNpU559C/k/FOB72VZ7sF09lS0A5tKmiVp30oTvXwJQRAEwVIQPQ9ByxKvaQY9\noTd7HmKMvlzCn+USktzBgCe6nssjfqCDIOgu0fMQtCyhbREEQVA/MechCIIgCII+J4KHIAiA0A8o\nk/BluYQ/m48IHoIgCIIgqIuY8xC0LDHnIQiCoH5izkM/QVKbpKl5f6llrcuoU9I6kq7safs16n3L\nBklfkHRg2W0EQRAEfUO8qtl89EZXULfrtP1XYJ/etMH2T8qqNNZ6CIJF6W89cvEacfMRPQ+9hKSP\nSpohqUPSrTlviKQLJd0naaak3Wud2ld1Sto5r+g4K587RNLwvBQ0klaSdIWkhyRNkXSvpG3ysRcl\nfTvbco+kt+f83XK5mZJuqeRXtTtB0jF5f5qkidn+RyXttJi2Ry96FY6ttK29CWzoL1sjfRkEvU/0\nPPQCktYCzidJTz8padV86BskUajP5bz7KkFAI+oEjgEOs32PkuLlK1XHDwP+aXszSZtBVhZKrATc\nY/sESd8HDgW+A9xhe/ts838CXwOOZVHFTBf2B9t+r6SPkWS6P9xF2/HL2Ku0NdqAfkRbow3oV0Sv\nQ/MRwUPvsD1wu+0nAWw/l/N3AXaTdGxOLw+8u4F13gWckUWspth+Slqok2JH4Ee5vYckzSkce9X2\nDXl/BumGD/BuSVcAw4DlgD91w44p+XMmMLwbbQdBEAQNJIKH3sF0Pfywp+0/FjOyOiVVeYNIN1MD\n1wH397TORYy0vy/peuDjwF2SPsKivQ9dtflaYf9NOr9LZwM/tH29pJ1JwllLotJmtex2N8Ztx9MZ\nb6wKjKLzqW9a/ox099I/IvxXVrqy34j2E2oiSeiepovrPDSDPa2WVi9IcmM7tpI3YC3gz8DwnF49\nf34HOLtQbuv82QZMzfvji2V6uc4Rhf0rgd1Jd+K5Oe9Y4Jy8vynwKrBNTi8onLs3MCnvzyyUmQS0\nV9tACiiOyfvthfJrAvOW1HahXYNjK21rbwIb+svWSF/iRv8G9sJvalujbehPWxnfkZgw2QvYng98\nHpgiqQO4NB86BVhW0hxJDwInF08rfJoqeqNO4ChJcyXNJt2cb6w67xxgLSVJ7VNIktvPV5Wprn8C\ncKWkB4D5XdjQlT3dbTvoFdoabUA/oq3RBvQrHHMemo5YJCrokjx0sqztVySNAG4BNrL9ejO0rXhN\nMwhq4n72qmZQLgpJ7qCXGQL8VtKypPkHX+qLwKGetuNHsjwU79KXRviyXMKfzUcED0GX2F4AbDfQ\n2g6CIAgWTwxbBC1LGV1vQRAEA40yfjtjwmQQBEEQBHURwUMQBEDne+FBzwlflkv4s/mI4CEIgiAI\ngrroteBBvSAzXQZaSjloJRGp9/W0nrJRQciqKv8t/9c4doOkVXrfuvqR9FNJmzTajoFIzGYvj/Bl\nuYQ/m4++ettiqWdlSlqmzNcDvfRy0GOBBcA9Payn4dj+eKNt6Arbh9ZTPtZ6GDjE5NggaB7qCh4k\nfZS0HPJg4B+2PyRpCEnPYDNgWWCC7euqT+2ivtWBC4H1gX8Bn7c9V9IEYETOf1LSUaQVFdcm3bw/\nTFqq+BlJ15CEoFYAzrT901z3i6TF+ncF/g18wvbTue4FwC+BXxfM2SK3N4qkVLkc8E/gAJKC5BeA\nNyR9BjgC+BBpiebTJI0CzgNWBB4HPmf7OUnTgHtJgceqwCG276zywRDgV8Bq2X8n2L5O0nDSio93\nADsAT+VreDlLU19ICspuruXbfGyVrF0xkrQM9GG2LemJgv+OBg7O51xg+8xs14n52ucDfwFm5Gsd\nAfyYtFz2v4BDbT8qaTJpBchtSaJYX7N9da7rOGAfkmjXNbYn5Ou+Angn6fv0LdtXZp8dTVLRvBAY\nna/lQts/qr7I9vYurj6om44OGDWq0VbUZuzYRltQH7EuQbmEP5uPbg9bqFMSek/bo0h6BtApCf1e\n4IPAD7K8c3c4mXRT2gr4OvDzwrH3AONsH0Ba8vhW25sDVwHrFsp9zva2pDUBjpS0Ws6vSEaPAn5H\nkoyG3Ati+2+2t7a9NXABcJXtv5AlpW1vA1xOugk+QQoOTs/n3MnCSyz/HDguX8dckqx0pa3B2Tdf\nKeQXeRn4pO3R2X+nFY6NBH6cr/s5YK+cPwn4cr62rhAwBjicpA0xAtiz6IMchIzP5bYHDpU0StJ2\nueyWwMdIAUHlWs8Hjsg+P460jHSFYbZ3JAVsE3MbuwAjbY8BtgZGS3o/8BHgKdujbG8B/KZoWy67\nju0tbG+ZrzkIgiBoAurpeegNSegdyTc02+2S1pC0MukGcp3tVwrl9sjlfiPp2UIdR0naI++/G9gQ\nmE7XktELIWlH4D9zG7B4SelFelDy3IG32b4jZ11EEpmqUEtuusgg4Hv5hvomsI6kt+dj82xXpKhn\nAMMlvS23V+nBuJh0g6/F9Bz4IOlSYCfg6sK17ESS4v53LjMFeH+26VrbrwKvFuauDCH1glypTunu\n5fKngWsBbD8i6R05fxdgF0mzcnoIKSi6EzhN0kTg+uoeGVIPzgaSzgJuoIselokTYdiwtD90KIwc\n2fn03NGRPiPdvXQlr1nsqU43k0rhktIuKEE2gz2tng5/Np+qZrcXiZK0K/Ap25+pyn8A+LQXlYRu\nIykn7iZpPDDa9hFVZWYCe9mel9N/Jg1/HA28aPu0nD+L9HT+RE7/kxQkbEkSTfpw7s5vB06y/TtJ\nC2yvnMvvDXzc9sGSTqrUrSRb/VtgN9uP5bLTqJKUtj22eF4udxJp+OMCkgrlejl/BHCF7dHZnmNs\nz5S0JnC/7fWrfDAe+ChwgO03JM0DdibdwKfmp3IkHQMMJQ3FzCm0tyVwSaVclf8n2G7L6c8Bm9k+\nJrexLWlYYg3bJ+UypwBP57ZXsz0h558O/C/wU+BR2+tQhaRJpCCgMlSxwPbKkn4I/MH2+TXOWZUk\nB34oqffqlCqfDSH1UBwIPGP7kKrzHcMWA4OxY2POQxCUhfp4kaj7gA8ojcVX5itA6m4+smDU1nXU\neQfpBla52c13Wpa4+qLuAvbN5XYhzQ8AWAV4NgcO7yH1jiwJ5XqWIfUQfK0SOBTq/GveH1/IXwCs\nXF2X7ReAZyXtlPMOBKZ1w45ie0/nwGEssN7iCtt+Hngu95hA9l8XjFF6G2MQsB/paf+tqkj+30PS\nivlGvQdpiOcuUm/S8pKGkm7wlSWj5+VgDCW2XML1/Qb4XK4fSe+UtFYO3F62fQnwQ9IwRQVJWoM0\n5DMFOBHYZgntBD2k8qQf9JzKU19QDuHP5qPbwxa250uqSEIPAv6P9FR4CvAjSXNIwcifgN0rpxU+\na3VxTAAuVJKEfgk4qIvyJwOXKr0aeQ/wd9LN/Cbgi5IeBh7NxyjUUdyvtmUH0mS8b0n6Vj72H3RK\nSj9L6pWo3MynAldJ2p3OYKlS50HAeUpzPR6ncwJiNbV8cAkwNfvvAeCRxZSvpA8m+a0yYbJWvQbu\nJ01uHAn81vY1xXpsz1Ka6Dg95//U9mwASdcBc0h/57l0ymEfAJwr6QTSBM9Lc7lqeytt3KL06uU9\nSkMdC0gB1kjS/Jg3gdeAL1ad+05gUv6uARxf4xqDIAiCBtAS2haSlgPeyE/n7wP+x2lCY9BLSBpi\n+6UcEN1OequiqZ5NFa9pDihi2CIIyqGMYYtWUdVcF7giP4W+SuebE0Hvcb6kTUmvwE5utsChQtxQ\ngiAI+p6W6HkIglqUET0HnSjepS+N8GW5hD/Lpa8nTAZBEARBEETPQ9C6RM9DEARB/UTPQxAEQRAE\nfU4ED0EQAPEufZmEL8sl/Nl89KvgQX0kAy5pQl7xsU/orWup8tdukv4r768l6T5JMwqLUfWknU+o\nILMt6WRJ43pabxAEQdAYWuVVzaWhNydztMxEEXVT0tz2VNJCWADjSEtgd/uVWEmDbL/ZxeFP5rof\nyW3VEghbKmKth3KRWmsKSbPOeYk3A8ol/Nl8NH3woPJlwIfmcytSzycDbwO2tP3VXOZQYBPbR0v6\nLHBMLjvb9kFV9dWUqK4qM4akSbECSR78YNt/yLoWu5OkvEeQ5KorT/8Hk1ZVfA6YDbxCFeqULh8B\nrAmcavuC3MV3CvAMsLGkrUiqoKOB14Gjq/8Zsy2jSVod3wdWVFLd3AH4AGnlzeXJK2jmBaSeAC4j\niY6dqiRq9nmSWNZjpJUktwZ2Iy1t/g2SGus3SbodV+ceiB+Qvov3A1+y/Wque3I+d1lgn2q/ArQT\n4hYDlbG0mE53EPQjmnrYQr0jA34iSQ9jSycJ7d8CV5C0HAbnMuOBn0naLLc1Nrd/VKGe7khUV3gE\neH9eFfMk4LuFY1uRdDu2APbL2g9rk27WO5CULzel696OzYGxwPuAb+ZzId20j7T9HpIs9xtO0taf\nBi6StHytyvLy1N8ELsv2Ds0+GOckGz6DJFxW8cE/bI+2fTlJoXNM9tUjwCG27wauA461vY3tP+Xz\nLGkFktT2vtm2ZYAvFeqen9s8F6iotga9RAdNuQ5YSxJj9OUS/mw+mr3noTdkwMeRRKIo1inpt7nO\n3wPL2n5I0hEkhcxnqtonn7M4ieoiqwI/lzSSdFMs+v22LDhF1ugYTurFmGb7nzn/cmCjGvUa+JWT\ndPkrSoqUY0i9FdMrfiPJjZ+Vr+FRSU92Ud9bl0Znz832pODl7nyNywF3F8peXtjfQtK3ST05Q0na\nI8U6q9vYmCQ7XhEmuwj4MnBmThflzPesZehEJjKMpMk9lKGMZCSjSBrOlZthpLuXfozHmsqeJaVh\n4cWD1EQSyJGOdDOl1UhJ7kag3pEBfyDX+VhV/hjSE/YjwBO2z5N0ODDM9glVZSty3F1KVFeVnww8\nYPvHktYjBQbrV9uYJy/+kBRs7FkZIpF0JLBhjWs5ifQ3nJDTFwFXAS+QnvR3y/lTgLNtt+f074DD\nSEMdi/iran9XYH/b+9e4rnm53DOF9O6250o6CGhzkkGfRBqmmJLLTQKuB/6Y7do5548jDVvsXaxb\n0rbAD2yPrWrfMWwxcBnL2Kad8xAEzYwGwDoPvSEDfgvp6bZy7qoAtqcD7wL2JylFQhrS2KfSrqTV\nCvXI3ZeoLsp8d6W4WcGk695Z0uqSlgX26aKsgE8oSWevAbSR5g1UfymK0ucbkbRCFpk/0AX3ATsq\nze1A0hBJG3ZRdijw92zzZ+gcallA8kERZxuGV+omzZG4vZt2BUEQBA2iqYMH2/NJE/CmSOqg86Z+\nCrCspDmSHiRNenzrtMJnrW6VbwOrSZqb62wrHLsCuNP287n9h0mTNW/PZU+r0c4BwCH5+IN0ypEX\nORX4nqSZpImfi7XR9t9Jcx7uAe4EHuriWkySw27PZb+Vz62u9xxgkJLs92XAQbZfqypXcz//DcaT\nJNFnk4YsNq5hC6T5JPdlm4vS4pcBxym9+rlB4TpfIQVTV2bbXidN7KzYULzO5u0i6yfEnIfyqHQZ\nB+UQ/mw+mnrYoq/JwwanV7r3m508bPGi7dOWWLgfonhNc8DTrMMWxbkYQc8Jf5ZLGcMWzT5hsk/I\nQxf3AR2tEjgUGNA30Ga9eQQDm7jRlUv4s/mInoegZSkjeg6CIBhoDIQJk0EQ9BExrlwe4ctyCX82\nHxE8BEEQBEFQFzFsEbQsMWwRBEFQPzFsEQRBEARBnxNvW/RTurPaZh11TSOJac2U9KLtoT20bR3g\nTNtdLX5VT13RdRYEwSJEr2TvEsHDwKCnN9jqBZt6Vpn9V7peNbPe2sqpJgCmsfCaacHSM43wZZlM\noz5/RtzQ28SwRQsh6aN5lcYOSbfmvCGSLpR0n6SZkmqtcNmVPPkYSXfn8+7KS1cjaUVJl0l6OOti\nrFh13rezDfdIenvOW0vSVZKm522HnL+zpFl5m5ntHS5pbj6+gqRJebXQmZVZ1ZLGS5oi6UZJf5D0\n/ZLcGHRJW6MN6Ee0NdqAfkZbow0IqoiehxZBnfLk77f9ZEWTg0558s9VFruqBBbdoCIV/oakD5Gk\nwvcmyWK/aHtTSVuQVC0rDAHusX1CvqEfSlrC+0zgDNt3SVqXpKi5KXAMcJjte5Rk01+psuHLZLlw\nSRsDN1eCGJJc+SjgVeBRSWfZfqqb1xYEQRD0EhE8tA69IU/elVT4+8my2Fkhc07hnFdt35D3ZwAf\nzvsfAjZRpzT5ykqS5XcBZ0i6BJhi+6lCGehaLtzUliuvCh7G5+zK5Yyi8yllWv6MdPfSPyL8V1a6\nst8s9rR6urLf/fPVRJLYjU6rFyS5sR1bC2zArsAvauQ/QJLrrs5vI8lgk780Z9coMxk4PO8PB+bl\n/WuAsYVyM4Bt8v6CQv7ewKS8Px9YrgvbNwO+lr+0G+e25uZjU6ra+h2wBXBQ0WZgKvCBqnoNjq20\nrb0JbOgvW/iysf7Ejf7NbuatDP/EnIfWoTfkyYtS4eML+b8jSZMjaXOglsx4NTdX2TEqf46w/ZDt\nU0ly4dWKnLXkwn9P7XkaMQuqV2lrtAH9iLZGG9DPaGu0AUEVETy0CO4defKupMLPBYbmoYKTSb0b\n1XVW13sksK2k2ZIeyrYCHKUkfz6bNHfhxqp6uiMXXqvtIAiCoEHECpNByxJrPARB0BWOdR66JCS5\ngwFP/ECUh6Q2h/RxKYQvyyX82XxEz0PQsoS2RRAEQf2EtkUQBEEQBH1OBA9BEACd74UHPSd8WS7h\nz+YjgocgCIIgCOoi5jwELUvMeQiCIKifeNuihZD0BPAC8CbwD+CzTuqSSHoTON32sTl9LDDE9smF\n8zuAR2x/upfs+wrwE9v/7sZ1bGP7mW7WO4G0KuVpkk4Gfmf7tp7aW6g/ot8BTASPQdAYInjoOwy0\n2X4m31D/GzgiH3sV+KSk79n+J1WLIUnaBHgZeK+klWz/qxfsOwq4GFhs8JBtq+cH+61rsX3SUti1\nWNrby65x4NLRAaNGNdqK7jN2bKMt6Jp4tbBcwp/NR8x5aAz3AiMK6ddIiplf7aL8p0krSt4MfKJW\nAUmTJZ2TZbIfl9Qm6aIsqz2pUG6XLMM9Q9IVWSL7SGAdoF3SbbncuZLul/RgDnaKfC2vaHmfpBG5\n/HBJv80rTN4qaRFxrmzjXnl/oqSHcvlT67mGIAiCoLFE8NC3VJ7YPwo8WHXsHOAASavUOG9f4Iq8\ndTVsYWBV2+8jBSHXkZaf3gzYQtJWktYkSXiPsz2aJHh1tO2zSBoXbbbH5fq+bns7kiz2zlnjosJz\ntrcEfkySYgQ4mySStRVwCVkps4aNlrQGsIftzXL5b3f3Grq49qAEWqnXodmJp+RyCX82HzFs0be0\nZ0Gr14HizRjbCyT9nKQR8dbQgaRtgfm2/ybpaWCypNVsP1uj/qn580Hg77YfynU8RFKyfDewKXB3\nlsVeDri7C1v3k3Qo6Tuydj6vEvBUdDUuA87I+9sDe+T9X5Bu+l3xHPCypJ8B1+etu9cwu1jRxIkw\nbFjaHzoURo7svAl2dKTPSPfPNCzcnd1MEsiRjnQzpdULktzxtkUfIWkeMBp4nvRkfp/tM/KxBbZX\nlrQaMBOYRPrbnCzpNJI89YJc1WrAsbYvqKp/EnC97auVlDen2t6ieAx4Bdjf9v5d2ZfnZKxPGiLZ\n1vbz+fx22z/P5cbafkLSssBfba8laT6wtu3Xq/JPIk2YPL3KxuWAcSRZ7+G2x3XnGmxfXbDZMeeh\nPFpxzkOzTpgsBjVBzwl/lotihcnWw/YbwFeAYyQNrTr2LGlo4hBS9/4gYB9gc9vr216f9HS/NG9c\nmDTXYsfCPIUhkjbMxxeQJLrJny8BL0h6B/CxQj0C9sv7+9HZc3E38Km8fwBJ1rtSfqEvqaQhpOGJ\nG4GjSUMjQRAEQYsQwxZ9R/Gtg79LmgJ8Gfh+8RhwGnB43t8J+F/bfy8cvwPYVNI7bP9fV21U7Vfa\n/Yek8cClkpbP2d8A/kiasHmTpKdyL8As4PfAX4A7q+pdTUli+2U6A5kjgEmSjgOeBg4ulK+2a2Xg\nV5JWIAUWX6063tU1RDdZL9JKvQ7NTjwll0v4s/mIYYugZVGs8TDgadZhiyBoZsoYtoieh6CliZtH\necS4cnmEL8sl/Nl8xJyHIAiCIAjqIoYtgpaljK63IAiCgUa8bREEQRAEQZ8TwUMQBEDnojJBzwlf\nlkv4s/mI4CEIgiAIgrqI4KEPkPSEkpDUHCUxqFMq6ywoCUq9KenwQvkfSzqokF5G0nxJ3+tFG7/e\nzXIv1llvUQzrp0oKoaUhyWVsZdrUqsRs9vIIX5ZL+LP5iFc1+4aiHPcQ0oJMP6FzrfGngSMl/cT2\nayy6GNKHSSJWe5GkvHuD/wa+241y9d5o31okyvah9Rq1JNrp+frUY2libecgCIImJHoe+hjbLwFf\nBPaQtGrOng/cRtKwqMWngHOBP0l6X60CkqZJOl1JRvsRSdtJukbSHySdUij3GSUp7VmSzpM0SNJE\nYMWcd3Eud62kB5QkuQ+tauv0nH+rklInkkZJuldJYntK4dqqbdwmtzlZ0tzcG3NUPdcQ9A4xrlwe\n4ctyCX82HxE8NADbC4B5wIaF7FOBY7OexVvkJZw/CNzIkiW5X8ky2ucCvyIFKZsD4yWtlocM9gV2\nsL018CZwgO3jgX/b3tr2gbm+g21vC2xH6hVZLecPAe63vTlwO3BSzv85cFyW2J5byK+2EWBrYB3b\nWzhJe0/q7jV0ce1BEARBHxLDFo1joXdsbc+TdB9QrXi5KzDN9quSrgUmSDrKtRfouC5/Pgg8WNG+\nkPQnYF3g/SRlzweUJLlXBP5eox6AoyRVJLbfTQp0ppMCjstz/i+AKZJWAd5m+46cfxFw5WKu/XFg\nA0lnATeQFDy7ew0LSZFPZCLDSJrcQxnKSEYyiiTS0EHScF5SuoKaSEK3EelKXrPY08rpggxyU9jT\n6unwZ8/SCknu1kQFueucXpkkOLUeSWJ7qu0tJG0MXEV6or/f9kWSrgZ2BP6dq1sL2MP2rVVttAPH\n2J6ZvyjH2N6tcOxY4H2kJ/5FJkcqy4Ln/TbgFODDtl/O559k+3eSXgeWs/2mpA2yvW3AXNvr5fNH\nAFfYHq0kpT3V9pQqG4cAHwEOBJ6xfUg3ruEY2zMLNrusOQ89XTAlCIKgVVAsEtVSCEBJhvsc4Brb\nzxcL2H4UeBjYjSTJvQpJWfPd7pTkPpyll+S+Ddhb0lrZltUlrZuPvyap0hO1CvBsDhzeA2xfqKci\nEw6pl+QO2y8Az0raKecfCEzrwg5JWgMYbHsKcCJpGCNoMDGuXB7hy3IJfzYfMWzRd7QrjRUMAqaQ\nnuwrFLt/vgNUnq73AG5zegOjwnXA9yUtW5VfpFoGO2Xaj0g6AbhZaW7Fa8BhwJ9Jb4DMkTQDOAT4\noqSHgUeBewrVvASMyfX8H7Bfzj8IOE/SSqRhiYOpjYF3kuS7K8Hr8d29hiAIgqDxxLBF0LKoxPUZ\nYtgiCIKBQhnDFtHzELQ0cdMPgiDoe2LOQxAEQIwrl0n4slzCn81HBA9BEARBENRFzHkIWpYyxu2C\nIAgGGvGqZhAEQRAEfU4ED0EQADGuXCbhy3IJfzYfETwEQVBhVKMN6EeEL8sl/NlkRPAQBEGFRZRQ\ng3EcF88AAAN8SURBVKUmfFku4c8mI4KHIAiCIAjqIoKHIAgqDG+0Af2I4Y02oJ8xvNEGBAsTr2oG\nLUuZy1MHQRAMJHr6qmYED0EQBEEQ1EUMWwRBEARBUBcRPARBEARBUBcRPARNiaSPSvq9pD9K+q8a\nx9skPS9pVt5O6O65A42l8OWJhWNPSJqT86f3reXNSXe+X9mnsyQ9KGlaPecONHroz/h+FujG//qx\nhf/zuZJel7Rqd85dBNuxxdZUGzAYeIw0w3pZoAPYpKpMG3Dd0pw7kLae+DIfmwes3ujraJatm/5c\nFXgIeFdOr9ndcwfa1hN/5v34ftbhy6ryuwK3Ls25tqPnIWhKxgCP2X7C9mvAZcAnapSrNVu4u+cO\nFHriy+4cG2h0x5/7A1fb/l8A2/+o49yBRk/8WSG+n4l6v1/7A5cu5bkRPARNyTuBvxTS/5vzihjY\nQdJsSb+WtGkd5w4keuLLyrFbJT0g6dBetrUV6I4/NwRWl9Se/XZgHecONHriT4jvZ5Fuf78krQR8\nBLi63nMrLLPUZgZB79Gd94dnAu+2/a//b+fuWaMIoyiO/w8ai4gIgmgTEEFbwUILg40gWAgWgn4D\nEfxCIpa2vjWa2KmghZVCDFibFKIIQtIEj8U8xBUxeZ4dEifM+TUzuzuXnTnchTszy0i6AjwGTu/s\nbu1JfbO8YHtV0lHghaRl2692amf3gJo8Z4CzwCVgFngj6W1l7dhMnaftT8C87ZX0J9DWX1eB17a/\nT1EL5MpDDNNnYG7i9RzdJLzJ9g/ba2X9GTAj6UjZbsvakemTJbZXy/IL8Iju8uaYbZsn3Rncou11\n21+Bl8CZytqx6ZMntlfKMv3Z1l83+X3LorUWyPAQw/QOOCXphKQDwA3g6eQGko5JUlk/R/fAs281\ntSMzdZaSZiUdKu8fBC4DH3Z39wenpr+eAPOS9pXLw+eBpcrasZk6z/TnX6r6S9Jh4CJdrk21k3Lb\nIgbH9oakO8AC3b+A79v+KOlW+fwucB24LWkDWKObpP9Z+z+OYwj6ZAkcBx6WuWI/8MD24m4fw5DU\n5Gl7WdJz4D3wE7hnewkgvfmnPnlKOkn6c1Plbx3gGrBge3272q2+L4+njoiIiCa5bRERERFNMjxE\nREREkwwPERER0STDQ0RERDTJ8BARERFNMjxEREREkwwPERER0STDQ0RERDT5Bd13nx8t7PlIAAAA\nAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "jData.groupby(2)[1].mean().plot(kind='barh', color=list('myb'))\n", "plt.axis([0.5, 0.7, -1, 14])\n", "plt.xlabel('')\n", "plt.ylabel('')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.761861\r\n", "0.635946\r\n", "0.695709\r\n", "0.12449\r\n", "0.230119\r\n", "0.530625\r\n", "1.34405\r\n", "0.746746\r\n", "0.758383\r\n", "0.266367\r\n" ] } ], "source": [ "##To plot density curve, must use CpG data WITHOUT annotation. Also must remove \"-nan\" in order to plot.\n", "!awk '{print $2}' ID_CpG.sorted2 | sed '/-nan/d' > CpG\n", "!head CpG" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
0 0.761861
1 0.635946
2 0.695709
3 0.124490
4 0.230119
5 0.530625
6 1.344050
7 0.746746
8 0.758383
9 0.266367
10 0.158412
11 0.736118
12 0.978761
13 1.466680
14 0.364383
15 0.368691
16 0.779823
17 0.110361
18 0.560740
19 0.898968
20 0.281310
21 0.082143
22 0.687209
23 0.219951
24 1.466800
25 0.847464
26 0.676556
27 0.853481
28 0.661166
29 0.162397
......
69920 0.000000
69921 0.961957
69922 0.635342
69923 0.000000
69924 0.508182
69925 0.245346
69926 0.850773
69927 0.623037
69928 0.066227
69929 0.893366
69930 0.891478
69931 0.394843
69932 0.638445
69933 0.761721
69934 1.173440
69935 1.535910
69936 0.386127
69937 0.892228
69938 0.674579
69939 0.881767
69940 1.053640
69941 1.083160
69942 0.850852
69943 0.558495
69944 0.480530
69945 1.000290
69946 0.804978
69947 0.787074
69948 0.000000
69949 0.660990
\n", "

69950 rows × 1 columns

\n", "
" ], "text/plain": [ " 0\n", "0 0.761861\n", "1 0.635946\n", "2 0.695709\n", "3 0.124490\n", "4 0.230119\n", "5 0.530625\n", "6 1.344050\n", "7 0.746746\n", "8 0.758383\n", "9 0.266367\n", "10 0.158412\n", "11 0.736118\n", "12 0.978761\n", "13 1.466680\n", "14 0.364383\n", "15 0.368691\n", "16 0.779823\n", "17 0.110361\n", "18 0.560740\n", "19 0.898968\n", "20 0.281310\n", "21 0.082143\n", "22 0.687209\n", "23 0.219951\n", "24 1.466800\n", "25 0.847464\n", "26 0.676556\n", "27 0.853481\n", "28 0.661166\n", "29 0.162397\n", "... ...\n", "69920 0.000000\n", "69921 0.961957\n", "69922 0.635342\n", "69923 0.000000\n", "69924 0.508182\n", "69925 0.245346\n", "69926 0.850773\n", "69927 0.623037\n", "69928 0.066227\n", "69929 0.893366\n", "69930 0.891478\n", "69931 0.394843\n", "69932 0.638445\n", "69933 0.761721\n", "69934 1.173440\n", "69935 1.535910\n", "69936 0.386127\n", "69937 0.892228\n", "69938 0.674579\n", "69939 0.881767\n", "69940 1.053640\n", "69941 1.083160\n", "69942 0.850852\n", "69943 0.558495\n", "69944 0.480530\n", "69945 1.000290\n", "69946 0.804978\n", "69947 0.787074\n", "69948 0.000000\n", "69949 0.660990\n", "\n", "[69950 rows x 1 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#To plot density curve, must use CpG data with original annotation\n", "CpG = pd.read_table('Pdam_cpg_anno', header=None, )\n", "CpG" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[-0.3, 1.7, 0, 1.7]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD7CAYAAABt0P8jAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcHGW1//HPlxC8IJsYRE2CAQRZZF+VbRCEsMgiICSC\nhggGFFDUnwmLGHEjekVAvFxki4qyCMi+qEggIrvsJJAAUQLIJuGyakLO74/n6emanu6Znpmqrqru\n83695pWu6urukyeVM9WnnkVmhnPOuc6xRN4BOOecay1P/M4512E88TvnXIfxxO+ccx3GE79zznUY\nT/zOOddhlsw7gGZI8j6nzjk3CGam2n2lSPxQP/iykTTVzKbmHUe78PZMl7dneorSlo0umr3U01pj\n8g6gzYzJO4A2MybvANrImLwD6Isnfuec6zCe+Ftret4BtJnpeQfQZqbnHUAbmZ53AH1RGebqkWTt\nUON3zrlWapQ7/Yq/hSR15R1DO/H2TJe3Z3qK3pae+J1zrsNkWuqRdB6wO/CCma3f4Jgu4KfAcOAl\nM+uqc4yXepxzboAa5c6sE/+2wOvAr+olfkkrArcBu5jZfEkjzOylOsd54nfOuQHKpcZvZjOBV/o4\nZDxwmZnNj8f3SvrtpOh1v7Lx9kyXt2d6it6Wedf41wRWknSzpHskHZxzPM451/bynrJhOLAJsCOw\nDHC7pDvMbE6+YWXDzGbkHUM78fZMl7dneorelnkn/qcJN3TfAt6SdCuwIdAr8UuaDsyLmwuA+yuN\nW/la5du+7du+3cnb8fEEgnk0kPkALkljgKsb3NxdGzgD2AV4F3AncICZPVpzXFvc3JXUVfQrgTLx\n9kyXt2d6itKWjXJnplf8ki4EtgdGSHoa+DahvIOZnWVmsyXdADwILAbOrk36zjnn0uVTNjjnXJvy\nKRucc84Bnvhbquh9e8vG2zNd3p7pKXpbeuJ3zrkO4zV+55xrU17jd845B3jib6mi1/3KxtszXd6e\n6Sl6W3rid865DuM1fueca1Ne43fOOQd44m+potf9ysbbM13enukpelt64nfOuQ7jNX7nnGtTXuN3\nzjkHeOJvqaLX/crG2zNd3p7pKXpbeuJ3zrkO4zV+55xrU17jd845B3jib6mi1/3KxtszXd6e6Sl6\nW2aa+CWdJ+l5SQ/1c9zmkhZJ+nSW8TjnnMu4xi9pW+B14Fdmtn6DY4YBfwTeBM43s8vqHOM1fuec\nG6BcavxmNhN4pZ/DjgIuBV7MMhbnnHNBrjV+SSOBvYAz467idzEagqLX/crG2zNd3p7pKXpbLpnz\n558KTDEzkySgYTlH0nRgXtxcANxvZjPic10ARd9O/F0KEU/ZtyuKEk/ZtyuKEk/JtzcCWv758fEE\ngnk0kHk/fkljgKvr1fglPUk12Y8g1PkPM7Orao7zGr9zzg1Qo9yZ6xW/ma1eeSzpfMIviKv6eIlz\nzrkhyro754XAX4GPSHpa0kRJkyRNyvJzi6rodb+y8fZMl7dneorelple8ZvZuAEce0iWsTjnnAt8\nrh7nnGtTPlePc845wBN/SxW97lc23p7p8vZMT9Hb0hO/c851GK/xO+dcm/Iav3POOcATf0sVve5X\nNt6e6fL2TE/R29ITv3POdRiv8TvnXJvyGr9zzjnAE39LFb3uVzbenuny9kxP0dvSE79zznUYr/E7\n51yb8hq/c845wBN/SxW97lc23p7p8vZMT9Hb0hO/c851GK/xO+dcm8qlxi/pPEnPS3qowfOflfSA\npAcl3SZpgyzjcc45l32p53xgbB/PPwlsZ2YbAN8FfpFxPLkqet2vbLw90+XtmZ6it2XWa+7OlDSm\nj+dvT2zeCYzKMh7nsiYhYAywNrACsAh4AnjUjH/nGJpz3TKv8cfEf7WZrd/Pcd8A1jKzL9Z5zmv8\nrrBist8MmAB8Chhd57DXgauAU824u3XRuU7WKHdmesXfLEk7ABOBrfs4ZjowL24uAO43sxnxuS4A\n3/btVm6D3QLsBledDMt/FOJuZsQ/e2wvC13jgfHSpX+B008zu/XSIv19fLv82/HxBIJ5NJD7FX+8\noXs5MNbM5jY4pi2u+CV1Vf6x3NDl1Z4Sw4BPA8cBG9U55P+A+4DngaWB9Qnln6RXgS+YcVl2kQ6M\nn5/pKUpbFnLkrqRVCUn/oEZJ37mikJDE3sCDwCX0TPoLgV8DnwDea0aXGQeYsSewOrApcFHi+BWA\nSyWmxlKRcy2T6RW/pAuB7YERhKufbwPDAczsLEnnAPsA/4gvWWhmW9R5n7a44nflJbEtMA34WM1T\nbwFnAT8xY34T77MdMB1YLbH758DRZixOJ1rngka50wdwOdcHidWA0wg3bZNeA35GuFn74gDfcyXC\n1f8nE7vPBL5sRvH/Q7rSKGSpp9MUvW9v2WTZnhJLSBwFPEzPpL8QOBVYw4zjB5r0Acz4F7A7PUs/\nRwAnDSHkIfPzMz1Fb8tC9OpxrkgkRgO/BbZJ7DbgAuBEs8a9JZplxkKJg4DFwPi4+wSJR824cKjv\n71xfvNTjXEKs5V8GrJzY/Qgw0Yy7Mvi84cCVwK5x11vAx8x4IO3Pcp3HSz3O9UPi08CfqCb9dwhT\niWyaRdKHcOUPjAMei7uWBn4v8d4sPs858MTfUkWv+5VNmu0pcQDwO2CpuOtF4BNmnJj1VAtmvArs\nTbhhDKHHz2+k1v7/9PMzPUVvS0/8ruNJ7An8hur/hznAFmbc2qoYzJgNfC6xaxfg8FZ9vussXuN3\nHU1ic+AWQokFYBbQZcYLOcXzY+AbcfNNYAMznsgjFld+3o/fuRoSI4F7gPfHXU8C25jxXI4x/Rdw\nL7Bu3PUXwi+id/KKyZWX39wtgKLX/cpmKO0Ze9NcRDXpvwLsmmfSBzDjbULJp5LotwGOacVn+/mZ\nnqK3pSd+16m+Q7Wf/mJgPzMezzGebmbcC3wvseskiQ/lFY9rP17qcR1HYgvgdqoXPseZ8cMcQ+ol\nfiO5B6gsR3q5GfvmGJIrIa/xO0fdGvrNwE5FnCBNYmtCjb9irBk35hWPKx+v8RdA0et+ZTPI9jyR\natJ/gzAnfuGSPoAZtwG/TOz6mcS7svo8Pz/TU/S29MTvOobEZsDkxK5vmvFUXvE0aTJh0RaANYGv\n5RiLaxNe6nEdIY6CvZOwNi6E9RB3LOrVflKcJfT0uPk6YWbQXMYZuHLxUo/rdAdTTfr/Bg4tQ9KP\nziRMFAewLKFc5dygeeJvoaLX/cqm2faUWBZ69Nr5SZlGw5qxiJ4lqkkSa6X9OX5+pqfobZlp4pd0\nnqTnJT3UxzGnS5oj6QFJG2cZj+tYU4APxMfPQbG6bjbpOkJ5CsI6Gj/ILxRXdlmvubstoSb5KzNb\nv87zuwFHmtlukrYETjOzreoc5zV+NygSY4DZ0N0bZoJZj54ypRHnFUpOD/1xM27PKx5XfLnU+M1s\nJmEofCN7ErurmdmdwIqSVskyJtdxplJN+vcAv84vlKEx4256Ltc4TcIviNyA5V3jHwk8ndieD4zK\nKZbMFb3uVzb9tafEh4GDEru+VqIbuo0cDyyKj7cFdkrrjf38TE/R2zLvxA/0umIpfv9SVxbHA8Pi\n45vMmJlnMGkw40ng3MSu7/pVvxuovBdbfwYYndgeFff1Imk6dC9yvQC438xmxOe6AHzbt6vbu30Q\nrj0YINwT/d2V8HOKE9/gt6HrJjhxInxiOLAlHDtFOvn2osTn2z2v9lv5+fHxhPjR82gg8wFcksYA\nVzdxc3cr4FS/uevSIHEuMDFu3mzGJ/KMJ20SpwJfiZv3E9YFLnsZy6Vs0Dd3JV0uaXdJAy4LSboQ\n+CvwEUlPS5ooaZKkSQBmdh3wpKS5wFnAlwb6GWVS9Lpf2TRqT4nV6LmM4XdaElBrnQy8FR9vBOwz\n1Df08zM9RW/LZko9ZwKHAD+TdAlwvpk91sybm9m4Jo45spn3cm4AjqN6bs8w45Y8g8mCGf+U+Bnw\nzbjrJIkrfKUu14ymSz2SVgQOBE4A/gGcDVxgZguzC6/7s73U45oS++3PoZr4dzDrHvjUViRGAE8R\npnEAOMiM3+QYkiuYIfXjl/Rewg2DQ4G/ESaM2hT4Y4oxOpeG5NX+re2a9AHMeAn4aWLXVCn3Dhuu\nBJqp8f+esBjEMsCnzGxPM7solmiWyzrAdlL0ul/Z9O49wYcIZcmKdqzt1zqF0MsN4MPA+MG+kZ+f\n6Sl6WzZzxX+2ma1jZj8ws+cAJL0LwMw2zTQ65wbmWKpX+38hrK7V1sxYQEj+FSf4Vb/rT781fkn3\nmdnGNfv+ZmabZBpZz8/zGr/rk8SqwFxgeNz1STP+lGNILSOxAqHP9opx18FmXJBfRK4oBlzjl/QB\nSZsCS0vaRNKm8c8uQtnHuSKZQjXp3wbclGMsLWXGq8CpiV0nSN0jlp3rpeEVv6QJwOcJi1fck3jq\nNWC6mV2eeXTVWNriil9SV3X0pRuqSntKjCZc7S8Vn9rZrLM6HkisSLjqXyHu+qwZvx3Ye/j5mZai\ntGWj3NmwFmhm04HpkvY1s8uyDM65IZpCNenfDp1R4kkyY4HEaVRX5/qWxMXer9/V09cV/8Fm9mtJ\nX6fnxGkCzMxOqfvCDLTLFb9Ln8Qo4AmqiX+sGTfmGFJuJN5DuOpfPu4aZ9ZjGmfXYQbTj79Sx1+u\nwY9zRTCZatK/E/hDjrHkyoxXqC7KDnCi1/pdPZlP0paGdrniL0rdr11I2+0Ht15AdaGV3cy4Ps+Y\n8iaxEuGqv3JxdqAZFzf3Wj8/01KUthzKJG0/krS8pOGSbpL0kqSDswnTuYE47GCqSf9u4IYcgykE\nM/4F/Cyx61tSIdbdcAXSTD/+B8xsQ0n7AHsAXwNmmtkGrQgwxtAWV/wuPRKrA49R7aDQsbX9WhLv\nJVz1V+bw+YwZv8svIpeXoczVU/mPtQdwqZm9iq+S5fL3barn5kw6uLZfy4yX6X3V7xdOrlszif9q\nSbMJk7LdJOl9wNvZhtWeij5/R1lIrAMcRHX+tePN/GKkxinAG/Hx+sCu/b3Az8/0FL0t+038ZjYF\n2BrY1Mz+QziZ9so6MOf6cBLVc/fGdlhLN21x5s6zE7sm5xWLK56mevVI2hr4ENUh8WZmv8oysJrP\n9xq/A0BiK8IgrYrNzXqMLHdRHNH8JNWS2MfMuCPHkFyLDaVXzwXAj4FtCNM3bAZs3uSHjpU0W9Ic\nSb2uOCSNkHSDpPslPRyniXCurtgn/YzErss96TdmxtPQY9oGv+p3QHO9emYB69oAO/xLGkbodbET\n8Ayhu904M5uVOGYq8C4zO1bSiHj8Kma2qOa92uKKvyh9e8tK4ouEtZkB3oaxh5jd4CNT+yCxHvBw\n3DRgXTNm1z/Wz8+0FKUth9Kr52HgA4P4zC2AuWY2Ly7PeBG97w08R3V4+fLAy7VJ3znoHpj0g8Su\naXDjP/OKpyzMeAS4Om4K+H85huMKopkr/hnARsBdwL/jbjOzPft53X7ALmZ2WNw+CNjSzI5KHLME\n8GdgLcJIw8+YWa+Rl+1yxe8GT+LnwJfi5jzCletb+UVUHhJbExamAVgIrG7G/BxDci0y4Nk5E6bG\nPw26+wI3U/Zp5pjjgPvNrEvSGsAfJW1oZq818VrXIWLiOiKx6xhP+s0z4zaJ2wi984YDXwW+kW9U\nLk/9Jv4w37nGAB82sz9JWqaZ1xHq+qMT26Oh11XGx4Hvx895QtJTwEeg9w07SdMJV3oQ1hi9v1JD\nq/SZLfp2ZV9R4inDtsS74YZL4L8EXQA3wLAF0uLEMcWJt7jbU66FH24NADd9STp6htkj1ySP9/ZM\ndXsjMzu11Z8fH08gmEcDzZR6vggcBqxkZmtIWgs408x27Od1SxJu1u4IPEsoFdXe3D0FeNXMviNp\nFeBeYAMz+1fNe7VFqacoN3zKROJcYGLcfBVYP/ZW8fYcgDhfz4PAenHXZDN+1PMYb8+0FKUtG+XO\npubqIdyovcPi2ruSHjKz9Zv40F0JS8INA841sx9KmgRgZmfFnjznA6sSbjT/0Mx6rRrULonfDYzE\n54HpiV0TzPhlTuGUnsQhwHlxcz6h1r8wx5BcxoaS+O8ysy0UF12PV/J/M5+kzWUoDtSaQXX2zQuA\nz/nUDIMn8S7g78Aqcdd4My7MMSSXsaF057xF0vHAMpI+CfyOavcwNwDJWqprLM68eSXVpD8LOKI2\n6Xt7DowZ/wb+J7Hra8nJ27w901P0tmwm8U8BXgQeAiYB1wEnZBmU61wSIwlr5r4v7noZ+JQZr+cX\nVVs5k2q37M0II/Jdh2l2rp73AZjZC5lHVP/zvdTTASTWAG4E1oi73gZ29knY0iXxC0KHDYArzNgn\nz3hcdgZc6lEwVdJLhN45jymsvvVtSZ6EXaokNgb+SjXpLwL286SfiVMTj/eKv3BdB+mr1HMMYcDH\n5mb2HjN7D6F3z9bxOTdARa/75UViZ+AWquWdt4F9zbi279d5ew6GGY9C99rEAr4C3p5pKnpb9pX4\nPweMN7OnKjvM7Engs/E554ZEYgmJEwhr5VYWB18AfNKMq/KLrCP8NPF4osR7covEtVzDGr+kh83s\nowN9Lgte428/cV3YXwK7J3Y/Q1g79+H6r3Jpib15HgQq/497Dehy5TeY7px9DezwQR9u0CR2JfQS\nSyb9GcBmnvRbI3aNPSWx62ipe6El1+b6SvwbSHqt3g9hDU83QEWv+2VNYnmJ/yV0CU5O9T2NUN4Z\n0DTLnd6eKfgt8Hx8PBK+e2KewbSTop+bDRO/mQ0zs+Ua/DQzSZtzQCgrSIwn9A6blHjqeUIf/Slm\n+DoMLRYHdP28umfj/ZMDulz7aqoff968xl9eEtsRFkffvuapy4DD46LgLicSKwNPUx0lvZ13oW0f\nQ5mywXUIiSUlRklsLrGxxEckRkpNTcNd+z57SMwkdNNMJv1ngQOB/T3p58+MF4FfJXZ9La9YXOv4\nFX8LFWWqVuju1bE2YZL77QhrI4yi/sXAO8A/CPN7P5X4mQe8Qph9dQRhLYWPA7tQ7ZOffI/TgKlm\npLLQTpHas8wk1gUeCffXuwxYy4y5+UZVbkU5NxvlTq/VdxiJjwIHAPsTEnUzhgGrxZ8dBviRCwlX\nlNPMmDPA17oWMONRieuBXakO6Dqq71e5MvMr/g4Qp+PdD/gy8LF+Dn8BeC4+fjewArDyID72OUKv\nkdMqC6e44pLYCfhj3HwTGGXGKzmG5FIw6Pn4i8AT/+DEftmHAicC769zyBuE/+wzgFuBR2NPj9r3\nWRoYQ7jir/xZebwMYX3lVwjln4fi+91rxjvp/W1clmLp7wGqXbV9QFcb8MRfAK2s+0msDfwG2KTm\nqYXA74GLgRvMeLMV8WShKHXUdiGdfDJMmRw3nwXWMOPtPGMqq6Kcm96rp4NI7EJYvziZ9J8lrKMw\n2owDzLi8zEnfZeG/b4LuQXQfpDp1s2szmV7xSxpLdc3dc8xsWp1juggTRg0HXjKzrjrHtMUVfytI\n7AlcCt3D7/9D6Ed/ihlv5RaYKwWJr1Cdtvk5wlW/nzcl1fJSj6RhhJGaOxEm37obGGdmsxLHrAjc\nBuxiZvMljTCzXn27PfE3R2J94A5C3R1CF8w9zXggv6hcmcT7OU9QnVLjGLMe8/e7Esmj1LMFMNfM\n5pnZQuAiYK+aY8YDl5nZfIB6Sb+dZDl/h8RyhNp9Jek/CWzTzkm/6POhlE2oS/MW8MPE7ilS9znl\nmlT0czPLxD8SenTjmx/3Ja0JrCTpZkn3SDo4w3ja3Xeorl71BuFK37tRusE4m/AtHWAV4IgcY3EZ\nyHIAVzM1pOGEG5A7Eq5Ub5d0h5n1GugjaTphpCiExTrur9w1r/x27dzt8RPhsK9Ux1b94HQ4fuXK\nP0H+8fl2mbZBW8FPfgdf+2rY/tOJ0lGPmc26pgjxlWW7opWfHx9PiB89jwayrPFvBUw1s7Fx+1hg\ncfIGr6TJwNJmNjVunwPcYGaX1ryX1/j7IHEjsHPcvBnYMc637tygxEF/jwEfirt+YsY3cgzJDUIe\nNf57gDUljZG0FGGagNrl9K4EtpE0TNIywJbAoxnGlKss6n4Sm1NN+ouBL3dK0i96HbVsku0ZB/JN\nTjx9tC/K3ryin5uZJX4zWwQcCdxISOYXm9ksSZMkTYrHzCast/ogcCdwtpm1beLPyHGJxxeZMavh\nkc4NzCXAX+Pj4YQFc1wb8JG7JSaxJvB4Ytf6vnShS5PEloQuwhXbm3FrXvG4gfGRu+0pObLyGk/6\nLm1m3EmYbK/i9IGuz+CKxxN/C6VZ95NYiurde4D/Teu9y6LoddSy6aM9j4Xu0bsbAke3JKASK/q5\n6Ym/vPaiOl3yfMK9EudSZ8Y/CONEKk6SWDWveNzQeY2/pCSuAXaPm98xY2qO4bg2F6f4vg9YL+66\nwox9cgzJNcGnZW4jEu8Bnqc6EdsaZjyZY0iuA0hsDfwlsWtvM67MKx7XP7+5WwAp1v32oZr07+7U\npF/0OmrZ9NeeZtwGnJPYdWa8CHE1in5ueuIvpwMSjy/OLQrXiSYTvm1CmMHzpznG4gbJSz0lI/Fe\nwn+8YXHXqj4Zm2slib0JM8FW7GHGtXnF4xrzUk/72INq0r/Dk75rNTOuAC5M7PqFl3zKxRN/C6VU\n90uuafD7hkd1gKLXUctmgO15FPBCfPxBvOTTQ9HPTU/8LSIxDq46Q+JciY8N8j2WAcYmdl2RTnTO\nDYwZLwOHJ3Z9XuruXuwKzmv8LRAT/UyqJZrFwP5mXD7A99kTurvPzTZjnfSidG7gJH4LjIubzxHm\ni3o5x5Bcgtf4cxKXRPwN1aQPod1PG8SSdskyj/efdkVwFD17+ZwlUdqLtE7hiT97XwBWCw9nJPeP\nAr7e7JtIDAP2TOzq+DJP0euoZTOY9oxX94cmdu0LfC6tmMqq6OemJ/7s7Vt9eN/pwKTEc1+XWLrJ\n9/k4MCI+/idwVxrBOTdUZlwDnJXYdYbE6nnF4/rnNf4MSbwfeBYQoa7/AeBlwpJ2ldWMxplxURPv\n9d9UvyGcZdbjxppzuZJ4N2EunzXjrtuALjMW5ReV8xp/PvaG7nrnTDNeMOMdYHrimEP6e5NYM907\nscvr+65QzHgD+CzwTty1NT2XbnQFkmnilzRW0mxJc+LC6o2O21zSIkmfzjKeHCT/Ppcl6n6/gu51\ncT8pMaqf91mP6jeE14E/pxZhiRW9jlo2Q21PM+6GHrPETo1rQnecop+bmSV+ScOAMwj9ztcFxknq\n1f0wHjeNMJ986co5jcRpbLdJ7Oq+So/zm99UORQY38/bJXvzXB8XwnauiE6muk7vksAFsQzkCiTL\nK/4tgLlmNs/MFgIX0TOBVRwFXAq8mGEsedgAum/c/t2Mf5jZjMTzv048PrCf90qWeTq+N09FTXu6\nIUqjPWNN/2DCN1OAtYAfD/V9y6bo52aWiX8k9JhHZn7c103SSMIvgzPjruLfaW5ecnTuHXWevwK6\nr9w3lvhIvTeRGA1sFjcXAdelFqFzGYjThCeXZzzCR/UWS5aLJjeTxE8FppiZSRJ9lHokTQfmxc0F\nwP2V36qVelqxti/cq3ohf9qL0le7j6nW/+xa4NOhf/9jU2DSIXXe79PV/v9dM8xYUIy/X/7bte2Z\ndzxl3063PZd4Ct65nO7zd+GvpU+uY8bzRfn7Zry9kZmd2urPj48nEMyjgcy6c0raCphqZmPj9rHA\nYjObljjmSarJfgTwJnCYmV1V816l684p8QR092Xe0oy7JHUlvwJK7A9cEjefANYyY3HN+9wHbBQ3\njzDrvEXVG6ltTzc0abenxAjgQUI3ZoDrCVM4L278qvZQlHOz5UsvSlqS0F99R0Jf9ruAcWY2q8Hx\n5wNXm1mv+WvKlvgl3kd1GPu/geXN+E+d45YhtM0KcdeOZtUeOxIbEfpGA7wNfMCMBZkF7lzKJHYG\nbkzsOsaMU/OKp9O0vB+/mS0CjiT8oz8KXGxmsyRNkjSp71eX3paJx/fWS/oAZrxJz5u8X6w5JNnH\n//ee9F3ZmPEH4CeJXdMkNs4rHhf4yN0MSJwAfDdunm7GV8L+3l//JNYnfB0GWEhYUeufEisTyj/L\nxed2NuOPmQdfIkX5Ot0usmpPiaWA24FN4q7HgE3joK+2VJRz00futtYGiccP9HWgGQ9R7fUznNAP\nGuBbVJP+bHzQliup+I13HHQn+o+Al3vy5Ff8GZB4jNB/GWBzM+7p5/ixhBtfFTcAu1C98b2XGVf1\neqFzJSJxCHBeYtf+ZlyaVzydoOU3d9NUpsQfb9i+Dt0Tsy1rxltNvO4SYP86T80EtjdrqzEOrgPF\nOacuBA6IuxYAG8aR7C4DXuppnfWoXqk/nkz6/czf8VXgmZp9TwETPenXV/T5UMom6/aM5/HhwN/j\nrhWB38bpTdpK0c9NT/zp2zDx+MGGR9Uw41lCf/2TCL2gzgc2MWNuuuE5l5/YM208PWfx/EF+EXUm\nL/WkTOJ0wvxDACeY8f0843GuiCSOgx7/N/Yd6BrUrn9e6mmd5BV/nz16nOtgJwPXJranS90dIlzG\nPPGnKN68SnblfLDn88Wu+5WNt2e6WtmecdqGg6nOJ7MccLnE8q2KIUtFPzc98adrFOGGFYQeC0/3\ncaxzHc2MVwhrUldmqV0PuFjKdPJIhyf+tPW4sVvbG6cII/naibdnuvJoTzP+BhyW2DWWNhjcVfRz\n0xN/uhqWeZxz9Znxa+B7iV1flvhqXvF0Ak/86erzxm7R635l4+2Zrpzb89vAxYntn0pMzCuYoSr6\nuemJP11+xe/cIMSbvYdQXa8X4ByJcTmF1Na8H39KJJYmTNWwBGH1seXaefZB57IgsSJhQsLK1M3v\nAAf6nD6D4/34s7ce1fac40nfuYGLI3t3Bh6Ju4YRevocml9U7ccTf3r6LfMUve5XNt6e6SpKe5rx\nErATYd5+CHnqbIlv5hfVwBSlLRvxxJ+epufgd871zYx/AtsBf0vsniZxmvfzH7rME7+ksZJmS5oj\naXKd5z8r6QFJD0q6TdIG9d6nBPqdnK3ofXvLxtszXUVrTzNeAHYAbknsPhq4Jt4LKKyitWWtTBO/\npGHAGYRW2yp2AAAIxklEQVRBGesC4yStU3PYk8B2ZrYBYbnCX2QZUxb6m6rBOTc4ZvwfIX9clti9\nC3CHxJr5RFV+WV/xbwHMNbN5ZrYQuAjYK3mAmd1uZq/GzTsJ0x6UzUhgpfj4/6jON95D0et+ZePt\nma6itqcZbwOfobqONYTlG++U2CmfqPpW1LasyDrxj6TnfDXz475GvgBcl2lE2ehxte8LpziXLjMW\nm3EiYe3et+Pu9wA3Spwg+f3Kgci6sZpOgJJ2ACYCve4DlEBTZZ6i1/3KxtszXWVoTzMuItz0fS7u\nWoLwTeB6iZVzC6xG0dsy67vjzwCjE9ujCVf9PcQbumcDY83slXpvJGk61SlcFwD3Vxq38rUqr224\nZGd4H9AF8EDe8fi2b7fzNujdsPVR8Jejge1gBsDO0PVQ6O+v14sUbyu34+MJoZ2682UvmY7clbQk\noS/ujsCzwF3AODOblThmVcJIvYPM7I4G71PokbsSc4E14uYWZtxd/zh1Ff1KoEy8PdNVtvaM3TpP\nAo6teeo84Jh4YzgXRWnLXEbumtki4EjgRsI6sheb2SxJkyRNioedSKjVnSnpPkl3ZRlT2iTeSzXp\n/wfv0eNcS5ixyIzjgF2BfyaemgjMkvhM7HHnavhcPUMksQtwQ9y824wt8ozHuU4UL8D+h9D7J+kP\nwFfMmN36qPLnc/VkZ/PE41J9W3GuXZjxshkHAAfS8+p/Z+BhiTMlVsknuuLxxD90ySv8urX9iqL3\n7S0bb890tUN7mnExsDZh4OjiuHsYcDgwV+JEiXdnHUfR29IT/xDE+mEy8fsVv3M5M+NVM44CNiN0\nHKlYFvgOMEfi0E6e88dr/EMg8SGqXaZeA1aMC0o45wogXpyNBX4EfLTm6ceAE4DL2nXQpdf4s9GV\neHyHJ33nisUMM+N6YCPgUKoDvyBM+/A74G6JnTupB5An/qH5ROLxnxseFRW97lc23p7pauf2NOMd\nM84F1iRc5Sf7+G9K6HL+Z4mt0vi8orelJ/5BilcHOyR29Zv4nXP5MuMNM74PrA78mOq8PxC+wd8u\ncaXUvfRjW/Ia/yBJfBiYEzdfA1YyY1GOITnnBkhiJPAtQhloWM3TNwInA7eU9R6A1/jTl7zav8WT\nvnPlY8YzZhxO6AJ6Yc3TuwA3A3+V2E9ieMsDzIgn/sHbI/H45mZeUPS6X9l4e6ark9vTjLlmjCfc\nBL4EenTU2IpwE3i+xLRmFoApelt64h+EuOzb2MSuK/OKxTmXHjMeiCOA1ybMGPyfxNPvA74JPC4x\nQ+LLEh/MI86h8hr/IEh8Dvhl3LzXjM3yjMc5l42Y2L8EHAJ1k7wRBm7+mfDN/zYz3mxdhH1rlDs9\n8Q+CxDXA7nFzshk/yjMe51y24ijf3YDD4p+NqiULCb8I7gHujz+PmvX45tAynvhTIjEKeIrqIjar\nm/FUc68txhzd7cLbM13ens2J3wL2AfYFtqfuL4EZJMZ3LiKMEn6EMD195c85ZizMNtb6ubNj56oY\ngq9TbbeZzSZ951x7MONZ4OfAz+N00F2EXn47AOvWecmSwHrxJ2mRxONUfxFUfinMyfobgl/xDygO\nRgB/B5aJu/Yw49ocQ3LOFUic+nkrYENCD6GNgNUG+DaLCGOEKr8MZgGzgcfNeGNg8XipJ4U4OJsw\n0APCSlsblXVgh3OuNSSWJ3wTWJdw1V/5c3Rfr2vgH4RfAnMIJed5iZ9/1eajXBK/pLHAqYQRceeY\n2bQ6x5xOWDrtTWCCmd1X55jcE7/EPsDliV37m3HpwN7Da6hp8vZMl7dnepppy/gLYR16/0JYdZAf\n+zo9fxE8DZrW0hq/pGGExRB2Ap4B7pZ0Vc1C67sBHzazNSVtCZwJ6UySlKaY9H+b2HUJcNkg3moj\nwl0flw5vz3R5e6an37aMi8HfGX+6SSxH+IWwHmE8QeVnDXpPK5G0LGHq6drpp3vJ8ubuFsBcM5sH\nIOkiYC9CvapiT2J/eDO7U9KKklYxs+czjKspsfG3JXTf2jvx1NPA4YMs8ayYRmyum7dnurw90zPo\ntjTjNUKX0B4LO0ksRUj+axMmmRuT+FkNml9ZLMvEP5KQJCvmA1s2ccwooFfil7gWUIMf+niumeeT\nx7wLWBl4f52/01xgFzNeafi3ds65DMSePrPoefEMdM8WvBI9fxmMBI6p915ZJv5mr4hr60+NXrfb\nEGJJw8XAUWa8OIT3GJNSLC4Yk3cAbWZM3gG0kTGt/LBYgXg5/txb2S+1PvE/Q8+71qMJV/R9HTMq\n7qsj9049BwAHaIhhSPp8KtE4wNszbd6e6SlyW2aZ+O8B1pQ0BniWkDjH1RxzFXAkcJGkrYAF9er7\neffocc65dpJZ4jezRZKOJCxmMAw418xmSZoUnz/LzK6TtJukucAbhImQnHPOZagUA7icc86lx+fj\nz4CksZJmS5ojaXKDY06Pzz8gqa3X9xyq/tpTUpekVyXdF39OyCPOMpB0nqTnJT3UxzF+bjahv7Ys\n8nnpiT9liYFrYwkj8cZJWqfmmO6Ba8AXCQPXXB3NtGd0i5ltHH++19Igy+V8ei4i1IOfmwPSZ1tG\nhTwvPfGnr3vgmpktBCoD15J6DFwDVpS0SmvDLI1m2hMK0O2rDMxsJvQ5DsXPzSY10ZZQ0PPSE3/6\n6g1KG9nEMaMyjqusmmlPAz4eSxPXSao3Na5rjp+b6Snseenz8acv7YFrna6ZdvkbMNrM3pS0K3AF\nsFa2YbU1PzfTUdjz0q/405fywLWO1297mtlrZvZmfHw9MFzSSq0Lsa34uZmSIp+XnvjT1z1wTdJS\nhIFrV9UccxXwOYC+Bq45oIn2lLSKFMZUS9qC0E35X60PtS34uZmSIp+XXupJmQ9cS1cz7QnsBxwh\naRFhXYcDcwu44CRdSFgndoSkp4FvA8PBz82B6q8tKfB56QO4nHOuw3ipxznnOownfuec6zCe+J1z\nrsN44nfOuQ7jid855zqMJ37nnOswnvidc67DeOJ3zrkO8/8BrSnhsOnqNogAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# pandas density plot\n", "CpG[0].plot(kind='kde', linewidth=3);\n", "plt.axis([-0.3, 1.7, 0, 1.7])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }