{ "metadata": { "name": "", "signature": "sha256:9718c252c1767fd29c6d0a36a54d98c7403d830869635f62c5fd4ea313c2c245" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Dheilly et al : Resources associated with Gametogenesis in the Pacific Oyster Crassostrea gigas: A Microarrays-Based Analysis Identifies Sex and Stage Specific Genes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "quick links \n", "- [RNAseq join array](#BiGo-Sperm-RNAseq-v-GPL11353-array) \n", "- [Supp data from Dheilly et al](#Supplementary-tables-from-Dheilly-et-al)" ] }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Array Design" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"SQLShare_-_View_Query_18EEEE73.png\"/" ] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Sigenae Version 6" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!wget http://dl.dropboxusercontent.com/u/115356/docs/annotable/Roberts_Sigenae6_transcriptome.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "--2014-04-04 12:05:10-- http://dl.dropboxusercontent.com/u/115356/docs/annotable/Roberts_Sigenae6_transcriptome.fa\r\n", "Resolving dl.dropboxusercontent.com... 54.225.207.37, 54.243.164.243, 54.225.246.188, ...\r\n", "Connecting to dl.dropboxusercontent.com|54.225.207.37|:80... " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "connected.\r\n", "HTTP request sent, awaiting response... " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "200 OK\r\n", "Length: 25210642 (24M) [text/plain]\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Saving to: `Roberts_Sigenae6_transcriptome.fa'\r\n", "\r\n", "\r", " 0% [ ] 0 --.-K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 0% [ ] 167,558 806K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 1% [ ] 365,934 876K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 2% [ ] 545,486 872K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 2% [> ] 730,830 876K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 3% [> ] 933,054 895K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 4% [> ] 1,127,582 902K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 5% [=> ] 1,327,406 910K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 6% [=> ] 1,520,942 912K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 6% [=> ] 1,719,814 917K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 7% [=> ] 1,910,950 917K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 8% [==> ] 2,108,830 920K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 9% [==> ] 2,307,702 923K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", " 9% [==> ] 2,503,182 924K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "10% [===> ] 2,701,558 922K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "11% [===> ] 2,902,830 925K/s eta 24s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "12% [===> ] 3,099,758 926K/s eta 24s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "13% [====> ] 3,299,582 928K/s eta 24s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "13% [====> ] 3,497,958 934K/s eta 24s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "14% [====> ] 3,697,782 937K/s eta 24s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "15% [=====> ] 3,901,454 941K/s eta 22s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "16% [=====> ] 4,099,830 945K/s eta 22s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "17% [=====> ] 4,292,910 938K/s eta 22s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "17% [=====> ] 4,488,390 944K/s eta 22s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "18% [======> ] 4,692,062 943K/s eta 22s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "19% [======> ] 4,885,142 945K/s eta 21s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "20% [======> ] 5,087,862 943K/s eta 21s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "20% [=======> ] 5,287,686 948K/s eta 21s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "21% [=======> ] 5,484,118 948K/s eta 21s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "22% [=======> ] 5,674,302 947K/s eta 21s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "23% [========> ] 5,875,078 946K/s eta 20s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "24% [========> ] 6,068,158 946K/s eta 20s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "24% [========> ] 6,266,534 946K/s eta 20s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "25% [=========> ] 6,467,806 947K/s eta 20s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "26% [=========> ] 6,669,078 947K/s eta 20s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "27% [=========> ] 6,869,854 951K/s eta 19s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "28% [=========> ] 7,062,934 950K/s eta 19s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "28% [==========> ] 7,262,758 950K/s eta 19s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "29% [==========> ] 7,462,582 950K/s eta 19s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "30% [==========> ] 7,656,118 949K/s eta 19s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "31% [===========> ] 7,853,542 948K/s eta 18s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "31% [===========> ] 8,051,422 948K/s eta 18s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "32% [===========> ] 8,237,262 946K/s eta 18s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "33% [============> ] 8,426,950 945K/s eta 18s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "34% [============> ] 8,602,158 938K/s eta 18s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "34% [============> ] 8,800,534 940K/s eta 17s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "35% [============> ] 8,996,014 938K/s eta 17s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "36% [=============> ] 9,195,838 937K/s eta 17s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "37% [=============> ] 9,386,974 937K/s eta 17s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "38% [=============> ] 9,583,902 937K/s eta 17s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "38% [==============> ] 9,776,486 937K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "39% [==============> ] 9,963,278 932K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "40% [==============> ] 10,161,654 931K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "41% [===============> ] 10,365,326 932K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "41% [===============> ] 10,494,198 752K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "42% [===============> ] 10,594,606 729K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "42% [===============> ] 10,704,158 709K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "43% [===============> ] 10,889,502 707K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "43% [================> ] 11,076,790 704K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "44% [================> ] 11,267,926 705K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "45% [================> ] 11,456,166 705K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "46% [=================> ] 11,647,302 712K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "46% [=================> ] 11,835,542 707K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "47% [=================> ] 12,022,334 711K/s eta 16s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "48% [=================> ] 12,210,574 709K/s eta 15s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "49% [==================> ] 12,400,262 700K/s eta 15s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "49% [==================> ] 12,594,294 703K/s eta 15s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "50% [==================> ] 12,789,774 709K/s eta 15s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "51% [===================> ] 12,985,254 705K/s eta 15s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "52% [===================> ] 13,179,286 714K/s eta 14s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "53% [===================> ] 13,382,958 708K/s eta 14s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "53% [====================> ] 13,577,486 702K/s eta 14s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "54% [====================> ] 13,781,158 876K/s eta 14s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "55% [====================> ] 13,975,686 901K/s eta 14s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "56% [====================> ] 14,174,062 925K/s eta 12s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "57% [=====================> ] 14,373,390 931K/s eta 12s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "57% [=====================> ] 14,569,366 932K/s eta 12s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "58% [=====================> ] 14,756,158 919K/s eta 12s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "59% [======================> ] 14,951,638 925K/s eta 12s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "60% [======================> ] 15,139,878 919K/s eta 11s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "60% [======================> ] 15,338,254 923K/s eta 11s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "61% [=======================> ] 15,541,926 927K/s eta 11s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "62% [=======================> ] 15,736,454 930K/s eta 11s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "63% [=======================> ] 15,939,174 927K/s eta 11s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "63% [=======================> ] 16,131,758 932K/s eta 10s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "64% [========================> ] 16,331,582 933K/s eta 10s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "65% [========================> ] 16,529,958 932K/s eta 10s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "66% [========================> ] 16,712,406 929K/s eta 10s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "67% [=========================> ] 16,900,646 925K/s eta 10s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "67% [=========================> ] 17,090,334 924K/s eta 9s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "68% [=========================> ] 17,280,022 921K/s eta 9s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "69% [==========================> ] 17,479,350 922K/s eta 9s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "70% [==========================> ] 17,670,982 923K/s eta 9s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "70% [==========================> ] 17,870,806 922K/s eta 9s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "71% [==========================> ] 18,035,878 928K/s eta 8s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "72% [===========================> ] 18,230,862 921K/s eta 8s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "73% [===========================> ] 18,423,942 928K/s eta 8s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "73% [===========================> ] 18,626,662 922K/s eta 8s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "74% [============================> ] 18,826,486 929K/s eta 8s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "75% [============================> ] 19,015,678 918K/s eta 7s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "76% [============================> ] 19,201,518 918K/s eta 7s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "76% [=============================> ] 19,394,102 913K/s eta 7s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "77% [=============================> ] 19,585,238 917K/s eta 7s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "78% [=============================> ] 19,783,614 912K/s eta 7s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "79% [=============================> ] 19,981,990 917K/s eta 6s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "80% [==============================> ] 20,186,158 924K/s eta 6s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "80% [==============================> ] 20,381,638 927K/s eta 6s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "81% [==============================> ] 20,578,070 929K/s eta 6s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "82% [===============================> ] 20,762,462 922K/s eta 6s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "83% [===============================> ] 20,959,390 925K/s eta 5s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "83% [===============================> ] 21,150,526 925K/s eta 5s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "84% [===============================> ] 21,318,494 913K/s eta 5s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "85% [================================> ] 21,519,766 920K/s eta 5s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "86% [================================> ] 21,716,694 916K/s eta 5s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "86% [================================> ] 21,908,782 913K/s eta 4s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "87% [=================================> ] 22,097,518 920K/s eta 4s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "88% [=================================> ] 22,279,966 915K/s eta 4s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "89% [=================================> ] 22,479,790 920K/s eta 4s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "89% [==================================> ] 22,679,614 921K/s eta 4s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "90% [==================================> ] 22,880,886 918K/s eta 3s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "91% [==================================> ] 23,058,990 918K/s eta 3s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "92% [==================================> ] 23,251,078 919K/s eta 3s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "92% [===================================> ] 23,438,366 913K/s eta 3s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "93% [===================================> ] 23,643,486 918K/s eta 3s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "94% [===================================> ] 23,743,398 861K/s eta 2s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "94% [====================================> ] 23,921,998 856K/s eta 2s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "95% [====================================> ] 24,115,534 858K/s eta 2s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "96% [====================================> ] 24,295,582 858K/s eta 2s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "97% [====================================> ] 24,500,702 868K/s eta 2s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "97% [=====================================> ] 24,693,782 861K/s eta 1s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "98% [=====================================> ] 24,889,262 868K/s eta 1s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "99% [=====================================> ] 25,089,086 872K/s eta 1s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100%[======================================>] 25,210,642 887K/s in 28s \r\n", "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "2014-04-04 12:05:38 (890 KB/s) - `Roberts_Sigenae6_transcriptome.fa' saved [25210642/25210642]\r\n", "\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "#tab delimited version\n", "!perl -e '$count=0; $len=0; while(<>) {s/\\r?\\n//; s/\\t/ /g; if (s/^>//) { if ($. != 1) {print \"\\n\"} s/ |$/\\t/; $count++; $_ .= \"\\t\";} else {s/ //g; $len += length($_)} print $_;} print \"\\n\"; warn \"\\nConverted $count FASTA records in $. lines to tabular format\\nTotal sequence length: $len\\n\\n\";' Roberts_Sigenae6_transcriptome.fa > Roberts_Sigenae6_transcriptome.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "Converted 31952 FASTA records in 451733 lines to tabular format\r\n", "Total sequence length: 24247683\r\n", "\r\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "!awk -F \",\" '{print \">\"$1\"\\n\"$2}' /Volumes/web/cnidarian/GPL11353_v6fasta.csv > /Volumes/web/cnidarian/GPL11353_v6fasta.fa" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/cnidarian/GPL11353_v6fasta.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">AM860865.p.cg.6\r\n", "CACAGTGTGAAGCTGTTACATGGTTGCAGCTCAGGTGATGGGAAACCATGTTGCAGTGACGGTAGGTGGAAGTGACGGCCATTTTCAGTTGAATGTATTCAAACCAGTGATCGTCACAAATGTACTACAGTCTGTTAGACTGATCGGAGACTCTTGTGTGTCATTCTCCAAAAACTGTGTGGATGGAATAATACCGAATACAGAGAAGATTAATCGTTTTGTAAATGAATCTCTGATGCTGGTCACAGCACTGAATCCACACATAGGGTACGACCGCGCGGCAAAGATAGCTAAGCATGCTCATACAGGGTCCGCGTTAAAAATGGCCGCCGTGTCACTGCAGATGCTGACTGAGGAGGAGTTTGACATCAAAGTTCAACCAAAAGACATGTTAGGACCAAAGTAATGCCACTGCAGCAGAACAAATTCCTAGACCCAATGAGAGTCATTCGAACATGTATTGGTGGTACACATGTAATAAAATTCAAATCTTGCAAAGAACATACATTAATGTTTTATTTCATCTGTTGGTCTGTGACAATATTTGTCATTCCCAATATTGTCCCCATAATTTTTTCTGTCTTC\r", "\r\n", ">AM869341.p.cg.6\r\n", "ATTTTGCAAGATTTTTGTTGGCTTATAAAGTCAAATTTTGTCGGCTAGATGGCTACCCCAGTATCCAGGAGCGCCAGGAGCAGATGGCTGATTTTAACAGGAATCCATAGGTATTCATTTTCCTGTGAAGCACTACAGCTGGAGGGTTGGGGATCAATCTGGTTGGGGCAGACACTGTGATCATATACGACAGTGACTGGAATCCCCAGTGTGATCTCCAGGCCCAAGACAGGTGTCACAGAATAGGACAGAGTATGCCTGTGGTCATCTACCGCTTAGTGACCACCAACACCATTGACCAGCGCATTGT\r", "\r\n", ">EE677551.p.cg.6\r\n", "ACCTTTGAACATTAACACTCTTTCACATTACAAAATGCTCATATTACAACACCTAGTATTGACACAATTGCTCTCATGATTACAACTCTGTATCAAACTAAAAATGCCAAGTACCAATTATTAATAAGTTTATATGGATTAGTCCTTTGGTCAGTATTTACATGTCCTCTTCCTCGGCTCTGGTCGGCGCCACGAAGGACAATCTGCCCTCGGGGTCACGACTCACCTTGACCTGCCTGCACTCCATGCCCTCGGGT\r", "\r\n", ">AM860135.p.cg.6\r\n", "CGGCAGAACCAGTGGCTGGCCAAAAGGAACAAATCCAGTGTCTTATGCAGCTAAATATGGCATAACGAACCCTTCACCTACGTTAGATCGCCCGATCAATCTGTACCCATTAACAAATTACACATTTGGAACCAAAGAGCCATTGTATGAGAAGGATAGCTCAGTCCCAGCACGGTTTCAGCGTATGCGAGATGAGTTTGAGAAGATTGGAATGAGGCGATCTGTTGAGGGAGTGTTAATTGTACACGAGCACGGACTTCCCCACGTTCTCCTTCTACAACTAGGGACCACATTTTTCAAATTACCTGGGGGAGAGTTGAATTCAGGCGAGGATCAAGTAGAAGGCTTGAAAAGATTATTAACAGAGACCCTGGGAAGACAAGATGGTGGGACTATGGAGTGGGTAGTGGAGGATACTATAGGGAACTGGTGGAGACCAAACTTTGAACCCCCACAGTACCCTTACATCCCAGCACACATCACAAAACCCAAGGAACACAAAAGACTTTTCCTTGTACAGCTACCAGAAAAGGCATTGTTTGCTGTTCCTAGAAATTATAAACTTGTGGCCGCCCCCTTGTTTGAACTATATGACAACAGTGCAGGATATGGGCCAATCATCTCAAGTTTACCTCAAGCTCTAAGTCGGTTCAACTTCATATACAATTGAGTGGGAAGATGCGAAGAACTGTACAACAATATTATTGTGACTTTTTTTATTACTCTGTCTGTAATACTGTCAGCTGTGGCTTGTCAACTTTGTATCACGATCATATCATCATATAATTATATGATTCATCATGGTCGAAAATGTCATTGAGGAATAAAAAAAAATTCAAACAG\r", "\r\n", ">BQ426856.p.cg.6\r\n", "CCTGAAACAACCAAGCAATTTTAGAGATCTGTCAAAACATATAGGTGCTCTGAACCCCACAAGAAGAGAATTTTTTCAGGAGCGTTATAACTCATGGGAGCATGATCAAATCCCACCATTCCATTATGGCACTCATTATTCAACGGCAGCTTTTACTCTGAACTGGCTCATAAGAGTGGAGCCATTTTCTACAATGTTTTTGAACCTTCAAGGTGGCAAATTTGACCACGCCAACAGAACCTTCAGCTCTATAGCTCAGTCTTGGAAGAATTGTCAGAGAGACACTTCTGATGTGAAGGAGTTGATCCCTGAGTTCTACTTTCTCCCAGAAATGTTCATCAATCAAAATAGGTACAAGTTTGGAAAGCAAGAAGATGGCGGAGAAGTTGCTGATGTGGAGATGCCACCATGGGCCAAGAACCCTGATGATTTTGTCCGAATAAACAGAATGGCCTTAGAATCTGAATTTGTGTCTTGTCAACTTCATCATTGGATTGATCTGATATTTGGCTACAAACAAAGAGGTCCAGAAGCAGTGAGGTCCACCAATGTCTTCTATTACCTGACCTACGAAGGCAGTGTGAACCTGGAGAGTATGACTGACCTAGTCATGAAGGAGGCCATAGAGAACCAGATTAAGAGTTTTGGACAGACCCCCACCCAGCTCCTGGCTGAGCCCCACCCTCCCCGCAGTTCTCTGATGCACTTGACTCCGATGATGTTTTCGACAGTCCAGGATGATGTGTGTATGATCATGAAGTTCCTGTCCAACTCCCCGGTCATTCACATCGCCGCCAACACCCACCCTGCCGTTCCCAACCCCGCGGTCACAACCATCACATGTAACCACAACTTTGCTGTCAACAAATGGAACACCACATACCAACTAACCAACACTGGCCTTCAGAGACGATCCCTAGGGGACAACTTTGACGAGAGAATAAAACCTACCCACCAGAGCTTTGTCACTTCAGCAGACAACCGCTTCATCTTTGCCTGTGGATTCTGGGACAAGAGCTTCAGAATATTCAGTTCCGAGTCTGGGAAGATCCTTCAGGTGGTGAATGGCCACTTTGATGTGGTGACCTGTATCACGCGCTCTGAGTGTAACCTGAATCAGGACTGTTACATTGTGACCGGGTCCAAGGACTGTACCGCCATGGTCTGGATGTTCACCTCAAGGAACCAGGCCATCATAGGCGACAATGGAAGTC\r", "\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGoRNA_array_v6_tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\"Name\"\t\"Chromosome\"\t\"Region\"\t\"Expression value\"\t\"Gene length\"\t\"RPKM\"\t\"Unique gene reads\"\t\"Total gene reads\"\r\n", "CU995582.p.cg.6\tCU995582.p.cg.6\t1..1092\t4\t1092\t0.236\t4\t4\r\n", "AM862909.p.cg.6\tAM862909.p.cg.6\t1..595\t0\t595\t0\t0\t0\r\n", "CU991229.p.cg.6\tCU991229.p.cg.6\t1..1230\t148\t1230\t7.767\t146\t148\r\n", "AM863560.p.cg.6\tAM863560.p.cg.6\t1..562\t101\t562\t11.601\t101\t101\r\n", "AM868645.p.cg.6\tAM868645.p.cg.6\t1..462\t2\t462\t0.279\t2\t2\r\n", "AM856822.p.cg.6\tAM856822.p.cg.6\t1..846\t0\t846\t0\t0\t0\r\n", "FP006184.p.cg.6\tFP006184.p.cg.6\t1..287\t16\t287\t3.599\t16\t16\r\n", "AM862004.p.cg.6\tAM862004.p.cg.6\t1..808\t132\t808\t10.545\t132\t132\r\n", "CU998134.p.cg.6\tCU998134.p.cg.6\t1..1001\t772\t1001\t49.783\t772\t772\r\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGoRNA_array_v6_tab.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "ID\tSig_No\tGene length\tRPKM\tUnique gene reads\tTotal gene reads\r\n", "CU995582\tCU995582.p.cg.6\t1092\t0.236\t4\t4\r\n", "AM862909\tAM862909.p.cg.6\t595\t0\t0\t0\r\n", "CU991229\tCU991229.p.cg.6\t1230\t7.767\t146\t148\r\n", "AM863560\tAM863560.p.cg.6\t562\t11.601\t101\t101\r\n", "AM868645\tAM868645.p.cg.6\t462\t0.279\t2\t2\r\n", "AM856822\tAM856822.p.cg.6\t846\t0\t0\t0\r\n", "FP006184\tFP006184.p.cg.6\t287\t3.599\t16\t16\r\n", "AM862004\tAM862004.p.cg.6\t808\t10.545\t132\t132\r\n", "CU998134\tCU998134.p.cg.6\t1001\t49.783\t772\t772\r\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "spd=\"/Users/sr320/sqlshare-pythonclient/tools/\"\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "#uploading to SQLSHARE\n", "!python {spd}singleupload.py -d BiGoRNA_array_v6 /Volumes/web/cnidarian/BiGoRNA_array_v6_tab.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "processing chunk line 0 to 31918 (1.10805797577 s elapsed)\r\n", "pushing /Volumes/web/cnidarian/BiGoRNA_array_v6_tab.txt...\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "parsing F8132D65...\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "finished BiGoRNA_array_v6\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!python {spd}fetchdata.py -s \"SELECT * FROM [sr320@washington.edu].[BiGoRNA_array_v6]rna left join [sr320@washington.edu].[GPL11353_array]arr on rna.Sig_No=arr.ContigName\" -f tsv -o /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "ID\tSig_No\tGene length\tRPKM\tUnique gene reads\tTotal gene reads\tID\tRow\tCol\tProbeName\tContigName\tGB_ACC\tDescription\tSEQUENCE\r", "\r\n", "CU995582\tCU995582.p.cg.6\t1092\t0.236\t4\t4\t13\t1\t13\tCUST_15609_PI419289827\tCU995582.p.cg.6\tCU995582\t\tCCTGGAATTTAACAATCCACTCCCCCGGGTTTTTCCCTTTTTAAAAATATAAAAAACAAG\r", "\r\n", "AM862909\tAM862909.p.cg.6\t595\t0\t0\t0\t14\t1\t14\tCUST_4684_PI419289827\tAM862909.p.cg.6\tAM862909\t\tCTACGCTCTACTAATGTCATTTTCAGACATTATCATGACATGTACCAGATGAAATACTAC\r", "\r\n", "CU991229\tCU991229.p.cg.6\t1230\t7.767\t146\t148\t15\t1\t15\tCUST_13060_PI419289827\tCU991229.p.cg.6\tCU991229\tpredicted protein [Trichoplax adhaerens]\tTAGGCATACCCATCTGTATCATGTCTAGTAGCTATTGCCCTTGAATTATTAGGAATTAAA\r", "\r\n", "AM863560\tAM863560.p.cg.6\t562\t11.601\t101\t101\t17\t1\t17\tCUST_4942_PI419289827\tAM863560.p.cg.6\tAM863560\t(sp:Q7ZVC2) Clusterin-associated protein 1 homolog OS=Danio rerio GN=cluap1 PE=2 SV=2\tTAAAAGAAGCAAGAAGACTGGCATCAGAAATAACCACCAAAGGAGCAACTTTATATGATC\r", "\r\n", "AM868645\tAM868645.p.cg.6\t462\t0.279\t2\t2\t18\t1\t18\tCUST_25910_PI419289827\tAM868645.p.cg.6\tAM868645\t\tTGAATACCAAATTGTTGCAATAAAAAATCTGCTACAGAAGAACAACAAGGGTTCTTGTGG\r", "\r\n", "AM856822\tAM856822.p.cg.6\t846\t0\t0\t0\t19\t1\t19\tCUST_2145_PI419289827\tAM856822.p.cg.6\tAM856822\t\tCCTCGTCAAGACCACAGTGATAAACACAACCACAAATAATGTAGGTTATTAAGATTAGAA\r", "\r\n", "FP006184\tFP006184.p.cg.6\t287\t3.599\t16\t16\t20\t1\t20\tCUST_22004_PI419289827\tFP006184.p.cg.6\tFP006184\tHomo sapiens 5.8S ribosomal RNA (LOC100008587), non-coding RNA\tGCTCACTAGTTGACCTAACGGACCATTCTGAATAAAAGATTATCATTTATATTGGACTTC\r", "\r\n", "AM862004\tAM862004.p.cg.6\t808\t10.545\t132\t132\t21\t1\t21\tCUST_28017_PI419289827\tAM862004.p.cg.6\tAM862004\t\tAGCTACGAGTGCTGAATGATGCGAGATTACACAATACATGTGTGTGTCATGGCCGAGTTT\r", "\r\n", "CU998134\tCU998134.p.cg.6\t1001\t49.783\t772\t772\t22\t1\t22\tCUST_17107_PI419289827\tCU998134.p.cg.6\tCU998134\t\tACCCGAAGTCACTCATTTCCAAATTCAAGGGTTTGGAAAAGTTTCAAATGGGTTTTAAAA\r", "\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Applications/q \"SELECT * FROM /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt WHERE c1 like CU995582\"" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "query error: no such column: CU995582\r\n" ] } ], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "!grep \"CU995582\" /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "CU995582\tCU995582.p.cg.6\t1092\t0.236\t4\t4\t13\t1\t13\tCUST_15609_PI419289827\tCU995582.p.cg.6\tCU995582\t\tCCTGGAATTTAACAATCCACTCCCCCGGGTTTTTCCCTTTTTAAAAATATAAAAAACAAG\r", "\r\n" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -3 /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "ID\tSig_No\tGene length\tRPKM\tUnique gene reads\tTotal gene reads\tID\tRow\tCol\tProbeName\tContigName\tGB_ACC\tDescription\tSEQUENCE\r", "\r\n", "CU995582\tCU995582.p.cg.6\t1092\t0.236\t4\t4\t13\t1\t13\tCUST_15609_PI419289827\tCU995582.p.cg.6\tCU995582\t\tCCTGGAATTTAACAATCCACTCCCCCGGGTTTTTCCCTTTTTAAAAATATAAAAAACAAG\r", "\r\n", "AM862909\tAM862909.p.cg.6\t595\t0\t0\t0\t14\t1\t14\tCUST_4684_PI419289827\tAM862909.p.cg.6\tAM862909\t\tCTACGCTCTACTAATGTCATTTTCAGACATTATCATGACATGTACCAGATGAAATACTAC\r", "\r\n" ] } ], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "!egrep -wi --color 'CF369228|EF219426|EF219427|EF219428|EF219429' /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\u001b[01;31m\u001b[KEF219429\u001b[m\u001b[K\t\u001b[01;31m\u001b[KEF219429\u001b[m\u001b[K.p.cg.6\t2460\t10.758\t45\t410\t12243\t145\t3\tCUST_24587_PI419289827\t\u001b[01;31m\u001b[KEF219429\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KEF219429\u001b[m\u001b[K\thypothetical protein BRAFLDRAFT_118409 [Branchiostoma floridae]\tCAGTGTTTATGGCAGTTTCAGAGGACACTATTGTGTCAACGGAAATACAAAAGATTTTTT\r", "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\u001b[01;31m\u001b[KEF219426\u001b[m\u001b[K\t\u001b[01;31m\u001b[KEF219426\u001b[m\u001b[K.p.cg.6\t1221\t1.004\t1\t19\t21490\t253\t70\tCUST_24584_PI419289827\t\u001b[01;31m\u001b[KEF219426\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KEF219426\u001b[m\u001b[K\thypothetical protein BRAFLDRAFT_118409 [Branchiostoma floridae]\tTTGATCTTGGGGCTGTTAACAACGTTGACAAAGTGGTCATCTATAACCGGAACGTTCAGG\r", "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\u001b[01;31m\u001b[KEF219427\u001b[m\u001b[K\t\u001b[01;31m\u001b[KEF219427\u001b[m\u001b[K.p.cg.6\t1641\t15.656\t52\t398\t32629\t384\t74\tCUST_24585_PI419289827\t\u001b[01;31m\u001b[KEF219427\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KEF219427\u001b[m\u001b[K\thypothetical protein BRAFLDRAFT_118409 [Branchiostoma floridae]\tAGGACGTTTTAGGAATGCATTCATACGTGTTGGAGTATCAATAAATAGACTGAGGAAATG\r", "\r\n", "\u001b[01;31m\u001b[KCF369228\u001b[m\u001b[K\t\u001b[01;31m\u001b[KCF369228\u001b[m\u001b[K.p.cg.6\t1088\t926.486\t9709\t15616\t33033\t389\t53\tCUST_24740_PI419289827\t\u001b[01;31m\u001b[KCF369228\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KCF369228\u001b[m\u001b[K\thypothetical protein BRAFLDRAFT_125849 [Branchiostoma floridae]\tCCCAAAATAGAGAATTTTCAAGTATGTTGTACCATTGACAGGGGTAAAAGGGAATGTTAA\r", "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\u001b[01;31m\u001b[KEF219428\u001b[m\u001b[K\t\u001b[01;31m\u001b[KEF219428\u001b[m\u001b[K.p.cg.6\t2049\t256.783\t773\t8151\t38965\t459\t35\tCUST_24586_PI419289827\t\u001b[01;31m\u001b[KEF219428\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KEF219428\u001b[m\u001b[K\thypothetical protein BRAFLDRAFT_118409 [Branchiostoma floridae]\tGACGTGCAAGTTTTTGGAGGTTAATTAGACTCTGTTAACCAAGAGCATATACTTTGCTTT\r", "\r\n" ] } ], "prompt_number": 35 }, { "cell_type": "code", "collapsed": false, "input": [ "!egrep -wi --color 'CU998852' /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\u001b[01;31m\u001b[KCU998852\u001b[m\u001b[K\t\u001b[01;31m\u001b[KCU998852\u001b[m\u001b[K.p.cg.6\t449\t229.304\t1595\t1595\t9555\t113\t35\tCUST_30571_PI419289827\t\u001b[01;31m\u001b[KCU998852\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KCU998852\u001b[m\u001b[K\t(sp:Q9C005) Protein dpy-30 homolog OS=Homo sapiens GN=DPY30 PE=1 SV=1\tCTGTGAACTTGTTACAGTTTTCATGCATTCATCTGTTATTTGTGTCTCGACAGTATCATT\r", "\r\n" ] } ], "prompt_number": 36 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "BiGo Sperm RNAseq v GPL11353 array" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"SQLShare_-_View_Query_18F591B3.png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Supplementary tables from Dheilly et al " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"SQLShare_-_View_Query_18F5948F.png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "--- \n", "\"SQLShare_-_View_Query_18F5959E.png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "--- \n", "\"SQLShare_-_View_Query_18F5994E.png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "--- \n", "\"SQLShare_-_View_Query_18F5996D.png\"/" ] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }