{
"metadata": {
"name": "",
"signature": "sha256:9718c252c1767fd29c6d0a36a54d98c7403d830869635f62c5fd4ea313c2c245"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Dheilly et al : Resources associated with Gametogenesis in the Pacific Oyster Crassostrea gigas: A Microarrays-Based Analysis Identifies Sex and Stage Specific Genes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"quick links \n",
"- [RNAseq join array](#BiGo-Sperm-RNAseq-v-GPL11353-array) \n",
"- [Supp data from Dheilly et al](#Supplementary-tables-from-Dheilly-et-al)"
]
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Array Design"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Sigenae Version 6"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!wget http://dl.dropboxusercontent.com/u/115356/docs/annotable/Roberts_Sigenae6_transcriptome.fa"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"--2014-04-04 12:05:10-- http://dl.dropboxusercontent.com/u/115356/docs/annotable/Roberts_Sigenae6_transcriptome.fa\r\n",
"Resolving dl.dropboxusercontent.com... 54.225.207.37, 54.243.164.243, 54.225.246.188, ...\r\n",
"Connecting to dl.dropboxusercontent.com|54.225.207.37|:80... "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"connected.\r\n",
"HTTP request sent, awaiting response... "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"200 OK\r\n",
"Length: 25210642 (24M) [text/plain]\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Saving to: `Roberts_Sigenae6_transcriptome.fa'\r\n",
"\r\n",
"\r",
" 0% [ ] 0 --.-K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 0% [ ] 167,558 806K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 1% [ ] 365,934 876K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 2% [ ] 545,486 872K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 2% [> ] 730,830 876K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 3% [> ] 933,054 895K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 4% [> ] 1,127,582 902K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 5% [=> ] 1,327,406 910K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 6% [=> ] 1,520,942 912K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 6% [=> ] 1,719,814 917K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 7% [=> ] 1,910,950 917K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 8% [==> ] 2,108,830 920K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 9% [==> ] 2,307,702 923K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
" 9% [==> ] 2,503,182 924K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"10% [===> ] 2,701,558 922K/s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"11% [===> ] 2,902,830 925K/s eta 24s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"12% [===> ] 3,099,758 926K/s eta 24s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"13% [====> ] 3,299,582 928K/s eta 24s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"13% [====> ] 3,497,958 934K/s eta 24s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"14% [====> ] 3,697,782 937K/s eta 24s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"15% [=====> ] 3,901,454 941K/s eta 22s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"16% [=====> ] 4,099,830 945K/s eta 22s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"17% [=====> ] 4,292,910 938K/s eta 22s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"17% [=====> ] 4,488,390 944K/s eta 22s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"18% [======> ] 4,692,062 943K/s eta 22s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"19% [======> ] 4,885,142 945K/s eta 21s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"20% [======> ] 5,087,862 943K/s eta 21s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"20% [=======> ] 5,287,686 948K/s eta 21s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"21% [=======> ] 5,484,118 948K/s eta 21s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"22% [=======> ] 5,674,302 947K/s eta 21s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"23% [========> ] 5,875,078 946K/s eta 20s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"24% [========> ] 6,068,158 946K/s eta 20s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"24% [========> ] 6,266,534 946K/s eta 20s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"25% [=========> ] 6,467,806 947K/s eta 20s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"26% [=========> ] 6,669,078 947K/s eta 20s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"27% [=========> ] 6,869,854 951K/s eta 19s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"28% [=========> ] 7,062,934 950K/s eta 19s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"28% [==========> ] 7,262,758 950K/s eta 19s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"29% [==========> ] 7,462,582 950K/s eta 19s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"30% [==========> ] 7,656,118 949K/s eta 19s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"31% [===========> ] 7,853,542 948K/s eta 18s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"31% [===========> ] 8,051,422 948K/s eta 18s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"32% [===========> ] 8,237,262 946K/s eta 18s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"33% [============> ] 8,426,950 945K/s eta 18s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"34% [============> ] 8,602,158 938K/s eta 18s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"34% [============> ] 8,800,534 940K/s eta 17s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"35% [============> ] 8,996,014 938K/s eta 17s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"36% [=============> ] 9,195,838 937K/s eta 17s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"37% [=============> ] 9,386,974 937K/s eta 17s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"38% [=============> ] 9,583,902 937K/s eta 17s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"38% [==============> ] 9,776,486 937K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"39% [==============> ] 9,963,278 932K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"40% [==============> ] 10,161,654 931K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"41% [===============> ] 10,365,326 932K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"41% [===============> ] 10,494,198 752K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"42% [===============> ] 10,594,606 729K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"42% [===============> ] 10,704,158 709K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"43% [===============> ] 10,889,502 707K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"43% [================> ] 11,076,790 704K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"44% [================> ] 11,267,926 705K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"45% [================> ] 11,456,166 705K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"46% [=================> ] 11,647,302 712K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"46% [=================> ] 11,835,542 707K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"47% [=================> ] 12,022,334 711K/s eta 16s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"48% [=================> ] 12,210,574 709K/s eta 15s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"49% [==================> ] 12,400,262 700K/s eta 15s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"49% [==================> ] 12,594,294 703K/s eta 15s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"50% [==================> ] 12,789,774 709K/s eta 15s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"51% [===================> ] 12,985,254 705K/s eta 15s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"52% [===================> ] 13,179,286 714K/s eta 14s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"53% [===================> ] 13,382,958 708K/s eta 14s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"53% [====================> ] 13,577,486 702K/s eta 14s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"54% [====================> ] 13,781,158 876K/s eta 14s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"55% [====================> ] 13,975,686 901K/s eta 14s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"56% [====================> ] 14,174,062 925K/s eta 12s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"57% [=====================> ] 14,373,390 931K/s eta 12s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"57% [=====================> ] 14,569,366 932K/s eta 12s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"58% [=====================> ] 14,756,158 919K/s eta 12s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"59% [======================> ] 14,951,638 925K/s eta 12s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"60% [======================> ] 15,139,878 919K/s eta 11s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"60% [======================> ] 15,338,254 923K/s eta 11s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"61% [=======================> ] 15,541,926 927K/s eta 11s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"62% [=======================> ] 15,736,454 930K/s eta 11s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"63% [=======================> ] 15,939,174 927K/s eta 11s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"63% [=======================> ] 16,131,758 932K/s eta 10s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"64% [========================> ] 16,331,582 933K/s eta 10s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"65% [========================> ] 16,529,958 932K/s eta 10s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"66% [========================> ] 16,712,406 929K/s eta 10s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"67% [=========================> ] 16,900,646 925K/s eta 10s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"67% [=========================> ] 17,090,334 924K/s eta 9s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"68% [=========================> ] 17,280,022 921K/s eta 9s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"69% [==========================> ] 17,479,350 922K/s eta 9s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"70% [==========================> ] 17,670,982 923K/s eta 9s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"70% [==========================> ] 17,870,806 922K/s eta 9s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"71% [==========================> ] 18,035,878 928K/s eta 8s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"72% [===========================> ] 18,230,862 921K/s eta 8s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"73% [===========================> ] 18,423,942 928K/s eta 8s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"73% [===========================> ] 18,626,662 922K/s eta 8s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"74% [============================> ] 18,826,486 929K/s eta 8s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"75% [============================> ] 19,015,678 918K/s eta 7s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"76% [============================> ] 19,201,518 918K/s eta 7s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"76% [=============================> ] 19,394,102 913K/s eta 7s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"77% [=============================> ] 19,585,238 917K/s eta 7s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"78% [=============================> ] 19,783,614 912K/s eta 7s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"79% [=============================> ] 19,981,990 917K/s eta 6s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"80% [==============================> ] 20,186,158 924K/s eta 6s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"80% [==============================> ] 20,381,638 927K/s eta 6s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"81% [==============================> ] 20,578,070 929K/s eta 6s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"82% [===============================> ] 20,762,462 922K/s eta 6s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"83% [===============================> ] 20,959,390 925K/s eta 5s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"83% [===============================> ] 21,150,526 925K/s eta 5s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"84% [===============================> ] 21,318,494 913K/s eta 5s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"85% [================================> ] 21,519,766 920K/s eta 5s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"86% [================================> ] 21,716,694 916K/s eta 5s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"86% [================================> ] 21,908,782 913K/s eta 4s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"87% [=================================> ] 22,097,518 920K/s eta 4s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"88% [=================================> ] 22,279,966 915K/s eta 4s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"89% [=================================> ] 22,479,790 920K/s eta 4s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"89% [==================================> ] 22,679,614 921K/s eta 4s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"90% [==================================> ] 22,880,886 918K/s eta 3s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"91% [==================================> ] 23,058,990 918K/s eta 3s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"92% [==================================> ] 23,251,078 919K/s eta 3s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"92% [===================================> ] 23,438,366 913K/s eta 3s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"93% [===================================> ] 23,643,486 918K/s eta 3s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"94% [===================================> ] 23,743,398 861K/s eta 2s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"94% [====================================> ] 23,921,998 856K/s eta 2s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"95% [====================================> ] 24,115,534 858K/s eta 2s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"96% [====================================> ] 24,295,582 858K/s eta 2s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"97% [====================================> ] 24,500,702 868K/s eta 2s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"97% [=====================================> ] 24,693,782 861K/s eta 1s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"98% [=====================================> ] 24,889,262 868K/s eta 1s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"99% [=====================================> ] 25,089,086 872K/s eta 1s "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r",
"100%[======================================>] 25,210,642 887K/s in 28s \r\n",
"\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"2014-04-04 12:05:38 (890 KB/s) - `Roberts_Sigenae6_transcriptome.fa' saved [25210642/25210642]\r\n",
"\r\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#tab delimited version\n",
"!perl -e '$count=0; $len=0; while(<>) {s/\\r?\\n//; s/\\t/ /g; if (s/^>//) { if ($. != 1) {print \"\\n\"} s/ |$/\\t/; $count++; $_ .= \"\\t\";} else {s/ //g; $len += length($_)} print $_;} print \"\\n\"; warn \"\\nConverted $count FASTA records in $. lines to tabular format\\nTotal sequence length: $len\\n\\n\";' Roberts_Sigenae6_transcriptome.fa > Roberts_Sigenae6_transcriptome.tab"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r\n",
"Converted 31952 FASTA records in 451733 lines to tabular format\r\n",
"Total sequence length: 24247683\r\n",
"\r\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!awk -F \",\" '{print \">\"$1\"\\n\"$2}' /Volumes/web/cnidarian/GPL11353_v6fasta.csv > /Volumes/web/cnidarian/GPL11353_v6fasta.fa"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!tail /Volumes/web/cnidarian/GPL11353_v6fasta.fa"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
">AM860865.p.cg.6\r\n",
"CACAGTGTGAAGCTGTTACATGGTTGCAGCTCAGGTGATGGGAAACCATGTTGCAGTGACGGTAGGTGGAAGTGACGGCCATTTTCAGTTGAATGTATTCAAACCAGTGATCGTCACAAATGTACTACAGTCTGTTAGACTGATCGGAGACTCTTGTGTGTCATTCTCCAAAAACTGTGTGGATGGAATAATACCGAATACAGAGAAGATTAATCGTTTTGTAAATGAATCTCTGATGCTGGTCACAGCACTGAATCCACACATAGGGTACGACCGCGCGGCAAAGATAGCTAAGCATGCTCATACAGGGTCCGCGTTAAAAATGGCCGCCGTGTCACTGCAGATGCTGACTGAGGAGGAGTTTGACATCAAAGTTCAACCAAAAGACATGTTAGGACCAAAGTAATGCCACTGCAGCAGAACAAATTCCTAGACCCAATGAGAGTCATTCGAACATGTATTGGTGGTACACATGTAATAAAATTCAAATCTTGCAAAGAACATACATTAATGTTTTATTTCATCTGTTGGTCTGTGACAATATTTGTCATTCCCAATATTGTCCCCATAATTTTTTCTGTCTTC\r",
"\r\n",
">AM869341.p.cg.6\r\n",
"ATTTTGCAAGATTTTTGTTGGCTTATAAAGTCAAATTTTGTCGGCTAGATGGCTACCCCAGTATCCAGGAGCGCCAGGAGCAGATGGCTGATTTTAACAGGAATCCATAGGTATTCATTTTCCTGTGAAGCACTACAGCTGGAGGGTTGGGGATCAATCTGGTTGGGGCAGACACTGTGATCATATACGACAGTGACTGGAATCCCCAGTGTGATCTCCAGGCCCAAGACAGGTGTCACAGAATAGGACAGAGTATGCCTGTGGTCATCTACCGCTTAGTGACCACCAACACCATTGACCAGCGCATTGT\r",
"\r\n",
">EE677551.p.cg.6\r\n",
"ACCTTTGAACATTAACACTCTTTCACATTACAAAATGCTCATATTACAACACCTAGTATTGACACAATTGCTCTCATGATTACAACTCTGTATCAAACTAAAAATGCCAAGTACCAATTATTAATAAGTTTATATGGATTAGTCCTTTGGTCAGTATTTACATGTCCTCTTCCTCGGCTCTGGTCGGCGCCACGAAGGACAATCTGCCCTCGGGGTCACGACTCACCTTGACCTGCCTGCACTCCATGCCCTCGGGT\r",
"\r\n",
">AM860135.p.cg.6\r\n",
"CGGCAGAACCAGTGGCTGGCCAAAAGGAACAAATCCAGTGTCTTATGCAGCTAAATATGGCATAACGAACCCTTCACCTACGTTAGATCGCCCGATCAATCTGTACCCATTAACAAATTACACATTTGGAACCAAAGAGCCATTGTATGAGAAGGATAGCTCAGTCCCAGCACGGTTTCAGCGTATGCGAGATGAGTTTGAGAAGATTGGAATGAGGCGATCTGTTGAGGGAGTGTTAATTGTACACGAGCACGGACTTCCCCACGTTCTCCTTCTACAACTAGGGACCACATTTTTCAAATTACCTGGGGGAGAGTTGAATTCAGGCGAGGATCAAGTAGAAGGCTTGAAAAGATTATTAACAGAGACCCTGGGAAGACAAGATGGTGGGACTATGGAGTGGGTAGTGGAGGATACTATAGGGAACTGGTGGAGACCAAACTTTGAACCCCCACAGTACCCTTACATCCCAGCACACATCACAAAACCCAAGGAACACAAAAGACTTTTCCTTGTACAGCTACCAGAAAAGGCATTGTTTGCTGTTCCTAGAAATTATAAACTTGTGGCCGCCCCCTTGTTTGAACTATATGACAACAGTGCAGGATATGGGCCAATCATCTCAAGTTTACCTCAAGCTCTAAGTCGGTTCAACTTCATATACAATTGAGTGGGAAGATGCGAAGAACTGTACAACAATATTATTGTGACTTTTTTTATTACTCTGTCTGTAATACTGTCAGCTGTGGCTTGTCAACTTTGTATCACGATCATATCATCATATAATTATATGATTCATCATGGTCGAAAATGTCATTGAGGAATAAAAAAAAATTCAAACAG\r",
"\r\n",
">BQ426856.p.cg.6\r\n",
"CCTGAAACAACCAAGCAATTTTAGAGATCTGTCAAAACATATAGGTGCTCTGAACCCCACAAGAAGAGAATTTTTTCAGGAGCGTTATAACTCATGGGAGCATGATCAAATCCCACCATTCCATTATGGCACTCATTATTCAACGGCAGCTTTTACTCTGAACTGGCTCATAAGAGTGGAGCCATTTTCTACAATGTTTTTGAACCTTCAAGGTGGCAAATTTGACCACGCCAACAGAACCTTCAGCTCTATAGCTCAGTCTTGGAAGAATTGTCAGAGAGACACTTCTGATGTGAAGGAGTTGATCCCTGAGTTCTACTTTCTCCCAGAAATGTTCATCAATCAAAATAGGTACAAGTTTGGAAAGCAAGAAGATGGCGGAGAAGTTGCTGATGTGGAGATGCCACCATGGGCCAAGAACCCTGATGATTTTGTCCGAATAAACAGAATGGCCTTAGAATCTGAATTTGTGTCTTGTCAACTTCATCATTGGATTGATCTGATATTTGGCTACAAACAAAGAGGTCCAGAAGCAGTGAGGTCCACCAATGTCTTCTATTACCTGACCTACGAAGGCAGTGTGAACCTGGAGAGTATGACTGACCTAGTCATGAAGGAGGCCATAGAGAACCAGATTAAGAGTTTTGGACAGACCCCCACCCAGCTCCTGGCTGAGCCCCACCCTCCCCGCAGTTCTCTGATGCACTTGACTCCGATGATGTTTTCGACAGTCCAGGATGATGTGTGTATGATCATGAAGTTCCTGTCCAACTCCCCGGTCATTCACATCGCCGCCAACACCCACCCTGCCGTTCCCAACCCCGCGGTCACAACCATCACATGTAACCACAACTTTGCTGTCAACAAATGGAACACCACATACCAACTAACCAACACTGGCCTTCAGAGACGATCCCTAGGGGACAACTTTGACGAGAGAATAAAACCTACCCACCAGAGCTTTGTCACTTCAGCAGACAACCGCTTCATCTTTGCCTGTGGATTCTGGGACAAGAGCTTCAGAATATTCAGTTCCGAGTCTGGGAAGATCCTTCAGGTGGTGAATGGCCACTTTGATGTGGTGACCTGTATCACGCGCTCTGAGTGTAACCTGAATCAGGACTGTTACATTGTGACCGGGTCCAAGGACTGTACCGCCATGGTCTGGATGTTCACCTCAAGGAACCAGGCCATCATAGGCGACAATGGAAGTC\r",
"\r\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!head /Volumes/web/cnidarian/BiGoRNA_array_v6_tab"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\"Name\"\t\"Chromosome\"\t\"Region\"\t\"Expression value\"\t\"Gene length\"\t\"RPKM\"\t\"Unique gene reads\"\t\"Total gene reads\"\r\n",
"CU995582.p.cg.6\tCU995582.p.cg.6\t1..1092\t4\t1092\t0.236\t4\t4\r\n",
"AM862909.p.cg.6\tAM862909.p.cg.6\t1..595\t0\t595\t0\t0\t0\r\n",
"CU991229.p.cg.6\tCU991229.p.cg.6\t1..1230\t148\t1230\t7.767\t146\t148\r\n",
"AM863560.p.cg.6\tAM863560.p.cg.6\t1..562\t101\t562\t11.601\t101\t101\r\n",
"AM868645.p.cg.6\tAM868645.p.cg.6\t1..462\t2\t462\t0.279\t2\t2\r\n",
"AM856822.p.cg.6\tAM856822.p.cg.6\t1..846\t0\t846\t0\t0\t0\r\n",
"FP006184.p.cg.6\tFP006184.p.cg.6\t1..287\t16\t287\t3.599\t16\t16\r\n",
"AM862004.p.cg.6\tAM862004.p.cg.6\t1..808\t132\t808\t10.545\t132\t132\r\n",
"CU998134.p.cg.6\tCU998134.p.cg.6\t1..1001\t772\t1001\t49.783\t772\t772\r\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!head /Volumes/web/cnidarian/BiGoRNA_array_v6_tab.txt"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"ID\tSig_No\tGene length\tRPKM\tUnique gene reads\tTotal gene reads\r\n",
"CU995582\tCU995582.p.cg.6\t1092\t0.236\t4\t4\r\n",
"AM862909\tAM862909.p.cg.6\t595\t0\t0\t0\r\n",
"CU991229\tCU991229.p.cg.6\t1230\t7.767\t146\t148\r\n",
"AM863560\tAM863560.p.cg.6\t562\t11.601\t101\t101\r\n",
"AM868645\tAM868645.p.cg.6\t462\t0.279\t2\t2\r\n",
"AM856822\tAM856822.p.cg.6\t846\t0\t0\t0\r\n",
"FP006184\tFP006184.p.cg.6\t287\t3.599\t16\t16\r\n",
"AM862004\tAM862004.p.cg.6\t808\t10.545\t132\t132\r\n",
"CU998134\tCU998134.p.cg.6\t1001\t49.783\t772\t772\r\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"spd=\"/Users/sr320/sqlshare-pythonclient/tools/\"\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#uploading to SQLSHARE\n",
"!python {spd}singleupload.py -d BiGoRNA_array_v6 /Volumes/web/cnidarian/BiGoRNA_array_v6_tab.txt"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"processing chunk line 0 to 31918 (1.10805797577 s elapsed)\r\n",
"pushing /Volumes/web/cnidarian/BiGoRNA_array_v6_tab.txt...\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"parsing F8132D65...\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"finished BiGoRNA_array_v6\r\n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!python {spd}fetchdata.py -s \"SELECT * FROM [sr320@washington.edu].[BiGoRNA_array_v6]rna left join [sr320@washington.edu].[GPL11353_array]arr on rna.Sig_No=arr.ContigName\" -f tsv -o /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!head /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"ID\tSig_No\tGene length\tRPKM\tUnique gene reads\tTotal gene reads\tID\tRow\tCol\tProbeName\tContigName\tGB_ACC\tDescription\tSEQUENCE\r",
"\r\n",
"CU995582\tCU995582.p.cg.6\t1092\t0.236\t4\t4\t13\t1\t13\tCUST_15609_PI419289827\tCU995582.p.cg.6\tCU995582\t\tCCTGGAATTTAACAATCCACTCCCCCGGGTTTTTCCCTTTTTAAAAATATAAAAAACAAG\r",
"\r\n",
"AM862909\tAM862909.p.cg.6\t595\t0\t0\t0\t14\t1\t14\tCUST_4684_PI419289827\tAM862909.p.cg.6\tAM862909\t\tCTACGCTCTACTAATGTCATTTTCAGACATTATCATGACATGTACCAGATGAAATACTAC\r",
"\r\n",
"CU991229\tCU991229.p.cg.6\t1230\t7.767\t146\t148\t15\t1\t15\tCUST_13060_PI419289827\tCU991229.p.cg.6\tCU991229\tpredicted protein [Trichoplax adhaerens]\tTAGGCATACCCATCTGTATCATGTCTAGTAGCTATTGCCCTTGAATTATTAGGAATTAAA\r",
"\r\n",
"AM863560\tAM863560.p.cg.6\t562\t11.601\t101\t101\t17\t1\t17\tCUST_4942_PI419289827\tAM863560.p.cg.6\tAM863560\t(sp:Q7ZVC2) Clusterin-associated protein 1 homolog OS=Danio rerio GN=cluap1 PE=2 SV=2\tTAAAAGAAGCAAGAAGACTGGCATCAGAAATAACCACCAAAGGAGCAACTTTATATGATC\r",
"\r\n",
"AM868645\tAM868645.p.cg.6\t462\t0.279\t2\t2\t18\t1\t18\tCUST_25910_PI419289827\tAM868645.p.cg.6\tAM868645\t\tTGAATACCAAATTGTTGCAATAAAAAATCTGCTACAGAAGAACAACAAGGGTTCTTGTGG\r",
"\r\n",
"AM856822\tAM856822.p.cg.6\t846\t0\t0\t0\t19\t1\t19\tCUST_2145_PI419289827\tAM856822.p.cg.6\tAM856822\t\tCCTCGTCAAGACCACAGTGATAAACACAACCACAAATAATGTAGGTTATTAAGATTAGAA\r",
"\r\n",
"FP006184\tFP006184.p.cg.6\t287\t3.599\t16\t16\t20\t1\t20\tCUST_22004_PI419289827\tFP006184.p.cg.6\tFP006184\tHomo sapiens 5.8S ribosomal RNA (LOC100008587), non-coding RNA\tGCTCACTAGTTGACCTAACGGACCATTCTGAATAAAAGATTATCATTTATATTGGACTTC\r",
"\r\n",
"AM862004\tAM862004.p.cg.6\t808\t10.545\t132\t132\t21\t1\t21\tCUST_28017_PI419289827\tAM862004.p.cg.6\tAM862004\t\tAGCTACGAGTGCTGAATGATGCGAGATTACACAATACATGTGTGTGTCATGGCCGAGTTT\r",
"\r\n",
"CU998134\tCU998134.p.cg.6\t1001\t49.783\t772\t772\t22\t1\t22\tCUST_17107_PI419289827\tCU998134.p.cg.6\tCU998134\t\tACCCGAAGTCACTCATTTCCAAATTCAAGGGTTTGGAAAAGTTTCAAATGGGTTTTAAAA\r",
"\r\n"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!python /Applications/q \"SELECT * FROM /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt WHERE c1 like CU995582\""
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"query error: no such column: CU995582\r\n"
]
}
],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!grep \"CU995582\" /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CU995582\tCU995582.p.cg.6\t1092\t0.236\t4\t4\t13\t1\t13\tCUST_15609_PI419289827\tCU995582.p.cg.6\tCU995582\t\tCCTGGAATTTAACAATCCACTCCCCCGGGTTTTTCCCTTTTTAAAAATATAAAAAACAAG\r",
"\r\n"
]
}
],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!head -3 /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"ID\tSig_No\tGene length\tRPKM\tUnique gene reads\tTotal gene reads\tID\tRow\tCol\tProbeName\tContigName\tGB_ACC\tDescription\tSEQUENCE\r",
"\r\n",
"CU995582\tCU995582.p.cg.6\t1092\t0.236\t4\t4\t13\t1\t13\tCUST_15609_PI419289827\tCU995582.p.cg.6\tCU995582\t\tCCTGGAATTTAACAATCCACTCCCCCGGGTTTTTCCCTTTTTAAAAATATAAAAAACAAG\r",
"\r\n",
"AM862909\tAM862909.p.cg.6\t595\t0\t0\t0\t14\t1\t14\tCUST_4684_PI419289827\tAM862909.p.cg.6\tAM862909\t\tCTACGCTCTACTAATGTCATTTTCAGACATTATCATGACATGTACCAGATGAAATACTAC\r",
"\r\n"
]
}
],
"prompt_number": 37
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!egrep -wi --color 'CF369228|EF219426|EF219427|EF219428|EF219429' /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\u001b[01;31m\u001b[KEF219429\u001b[m\u001b[K\t\u001b[01;31m\u001b[KEF219429\u001b[m\u001b[K.p.cg.6\t2460\t10.758\t45\t410\t12243\t145\t3\tCUST_24587_PI419289827\t\u001b[01;31m\u001b[KEF219429\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KEF219429\u001b[m\u001b[K\thypothetical protein BRAFLDRAFT_118409 [Branchiostoma floridae]\tCAGTGTTTATGGCAGTTTCAGAGGACACTATTGTGTCAACGGAAATACAAAAGATTTTTT\r",
"\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\u001b[01;31m\u001b[KEF219426\u001b[m\u001b[K\t\u001b[01;31m\u001b[KEF219426\u001b[m\u001b[K.p.cg.6\t1221\t1.004\t1\t19\t21490\t253\t70\tCUST_24584_PI419289827\t\u001b[01;31m\u001b[KEF219426\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KEF219426\u001b[m\u001b[K\thypothetical protein BRAFLDRAFT_118409 [Branchiostoma floridae]\tTTGATCTTGGGGCTGTTAACAACGTTGACAAAGTGGTCATCTATAACCGGAACGTTCAGG\r",
"\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\u001b[01;31m\u001b[KEF219427\u001b[m\u001b[K\t\u001b[01;31m\u001b[KEF219427\u001b[m\u001b[K.p.cg.6\t1641\t15.656\t52\t398\t32629\t384\t74\tCUST_24585_PI419289827\t\u001b[01;31m\u001b[KEF219427\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KEF219427\u001b[m\u001b[K\thypothetical protein BRAFLDRAFT_118409 [Branchiostoma floridae]\tAGGACGTTTTAGGAATGCATTCATACGTGTTGGAGTATCAATAAATAGACTGAGGAAATG\r",
"\r\n",
"\u001b[01;31m\u001b[KCF369228\u001b[m\u001b[K\t\u001b[01;31m\u001b[KCF369228\u001b[m\u001b[K.p.cg.6\t1088\t926.486\t9709\t15616\t33033\t389\t53\tCUST_24740_PI419289827\t\u001b[01;31m\u001b[KCF369228\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KCF369228\u001b[m\u001b[K\thypothetical protein BRAFLDRAFT_125849 [Branchiostoma floridae]\tCCCAAAATAGAGAATTTTCAAGTATGTTGTACCATTGACAGGGGTAAAAGGGAATGTTAA\r",
"\r\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\u001b[01;31m\u001b[KEF219428\u001b[m\u001b[K\t\u001b[01;31m\u001b[KEF219428\u001b[m\u001b[K.p.cg.6\t2049\t256.783\t773\t8151\t38965\t459\t35\tCUST_24586_PI419289827\t\u001b[01;31m\u001b[KEF219428\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KEF219428\u001b[m\u001b[K\thypothetical protein BRAFLDRAFT_118409 [Branchiostoma floridae]\tGACGTGCAAGTTTTTGGAGGTTAATTAGACTCTGTTAACCAAGAGCATATACTTTGCTTT\r",
"\r\n"
]
}
],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!egrep -wi --color 'CU998852' /Volumes/web/cnidarian/BiRNAseq_GPL11353_v6ref.txt"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\u001b[01;31m\u001b[KCU998852\u001b[m\u001b[K\t\u001b[01;31m\u001b[KCU998852\u001b[m\u001b[K.p.cg.6\t449\t229.304\t1595\t1595\t9555\t113\t35\tCUST_30571_PI419289827\t\u001b[01;31m\u001b[KCU998852\u001b[m\u001b[K.p.cg.6\t\u001b[01;31m\u001b[KCU998852\u001b[m\u001b[K\t(sp:Q9C005) Protein dpy-30 homolog OS=Homo sapiens GN=DPY30 PE=1 SV=1\tCTGTGAACTTGTTACAGTTTTCATGCATTCATCTGTTATTTGTGTCTCGACAGTATCATT\r",
"\r\n"
]
}
],
"prompt_number": 36
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"BiGo Sperm RNAseq v GPL11353 array"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Supplementary tables from Dheilly et al "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"--- \n",
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"--- \n",
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"--- \n",
""
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}