{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Quick mapping of reads" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", "BSMAP v2.74\r\n", "Usage:\tbsmap [options]\r\n", " -a query a file, FASTA/FASTQ/BAM format\r\n", " -d reference sequences file, FASTA format\r\n", " -o output alignment file, BSP/SAM/BAM format, if omitted, the output will be written to STDOUT in SAM format.\r\n", "\r\n", " Options for alignment:\r\n", " -s seed size, default=16(WGBS mode), 12(RRBS mode). min=8, max=16.\r\n", " -v if this value is between 0 and 1, it's interpreted as the mismatch rate w.r.t to the read length.\r\n", " otherwise it's interpreted as the maximum number of mismatches allowed on a read, <=15.\r\n", " example: -v 5 (max #mismatches = 5), -v 0.1 (max #mismatches = read_length * 10%)\r\n", " default=0.08.\r\n", " -g gap size, BSMAP only allow 1 continuous gap (insert or deletion) with up to 3 nucleotides\r\n", " default=0\r\n", " -w maximum number of equal best hits to count, <=1000\r\n", " -3 using 3-nucleotide mapping approach\r\n", " -B start from the Nth read or read pair, default: 1\r\n", " -E end at the Nth read or read pair, default: 4,294,967,295\r\n", " -I index interval, default=4\r\n", " -k set the cut-off ratio for over-represented kmers, default=5e-07\r\n", " example: -k 1e-6 means the top 0.0001% over-represented kmer will be skipped in alignment\r\n", " -p number of processors to use, default=8\r\n", " -D activating RRBS mapping mode and set restriction enzyme digestion sites. \r\n", " digestion position marked by '-', example: -D C-CGG for MspI digestion.\r\n", " default: none (whole genome shotgun bisulfite mapping mode)\r\n", " -S seed for random number generation used in selecting multiple hits\r\n", " other seed values generate pseudo random number based on read index number, to allow reproducible mapping results. \r\n", " default=0. (get seed from system clock, mapping results not resproducible.)\r\n", " -n [0,1] set mapping strand information. default: 0\r\n", " -n 0: only map to 2 forward strands, i.e. BSW(++) and BSC(-+), \r\n", " for PE sequencing, map read#1 to ++ and -+, read#2 to +- and --.\r\n", " -n 1: map SE or PE reads to all 4 strands, i.e. ++, +-, -+, -- \r\n", " -M set alignment information for the additional nucleotide transition. \r\n", " is in the form of two different nucleotides N1N2, \r\n", " indicating N1 in the reads could be mapped to N2 in the reference sequences.\r\n", " default: -M TC, corresponds to C=>U(T) transition in bisulfite conversion. \r\n", " example: -M GA could be used to detect A=>I(G) transition in RNA editing. \r\n", "\r\n", " Options for trimming:\r\n", " -q quality threshold in trimming, 0-40, default=0 (no trim)\r\n", " -z base quality, default=33 [Illumina is using 64, Sanger Institute is using 33]\r\n", " -f filter low-quality reads containing >n Ns, default=5\r\n", " -A 3-end adapter sequence, default: none (no trim)\r\n", " -L map the first N nucleotides of the read, default:144 (map the whole read).\r\n", "\r\n", " Options for reporting:\r\n", " -r [0,1] how to report repeat hits, 0=none(unique hit/pair only); 1=random one, default:1.\r\n", " -R print corresponding reference sequences in SAM output, default=off\r\n", " -u report unmapped reads, default=off\r\n", " -H do not print header information in SAM format output\r\n", "\r\n", " Options for pair-end alignment:\r\n", " -b query b file\r\n", " -m minimal insert size allowed, default=28\r\n", " -x maximal insert size allowed, default=500\r\n", "\r\n", " -h help\r\n", "\r\n" ] } ], "source": [ "!/Applications/bioinfo/BSMAP/bsmap-2.74/bsmap" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 12.8M 100 12.8M 0 0 7965k 0 0:00:01 0:00:01 --:--:-- 7969k\n" ] } ], "source": [ "!curl http://eagle.fish.washington.edu/cnidarian/OlyO_Pat_PacBio_10k_contigs.fa \\\n", "-o /Volumes/Histidine/hectocotylus/OlyO_Pat_PacBio_10k_contigs.fa" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "BSMAP v2.74\n", "Start at: Fri Jan 22 12:23:45 2016\n", "\n", "Input reference file: /Volumes/Histidine/hectocotylus/OlyO_Pat_PacBio_10k_contigs.fa \t(format: FASTA)\n", "Load in 553 db seqs, total size 13282692 bp. 0 secs passed\n", "total_kmers: 43046721\n", "Create seed table. 2 secs passed\n", "max number of mismatches: read_length * 8% \tmax gap size: 0\n", "kmer cut-off ratio: 5e-07\n", "max multi-hits: 100\tmax Ns: 5\tseed size: 16\tindex interval: 4\n", "quality cutoff: 0\tbase quality char: '!'\n", "min fragment size:28\tmax fragemt size:500\n", "start from read #1\tend at read #4294967295\n", "additional alignment: T in reads => C in reference\n", "mapping strand: ++,-+\n", "Single-end alignment(8 threads)\n", "Input read file: /Volumes/web-1/nightingales/O_lurida/1_ATCACG_L001_R1_001.fastq.gz \t(format: gzipped FASTQ)\n", "Output file: /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs.sam\t (format: SAM)\n", "Thread #0: \t50000 reads finished. 2 secs passed\n", "Thread #1: \t100000 reads finished. 2 secs passed\n", "Thread #2: \t150000 reads finished. 3 secs passed\n", "Thread #6: \t200000 reads finished. 3 secs passed\n", "Thread #7: \t250000 reads finished. 3 secs passed\n", "Thread #3: \t300000 reads finished. 3 secs passed\n", "Thread #4: \t350000 reads finished. 3 secs passed\n", "Thread #5: \t400000 reads finished. 3 secs passed\n", "Thread #0: \t450000 reads finished. 3 secs passed\n", "Thread #1: \t500000 reads finished. 3 secs passed\n", "Thread #2: \t550000 reads finished. 4 secs passed\n", "Thread #6: \t600000 reads finished. 4 secs passed\n", "Thread #7: \t650000 reads finished. 4 secs passed\n", "Thread #3: \t700000 reads finished. 4 secs passed\n", "Thread #4: \t750000 reads finished. 4 secs passed\n", "Thread #5: \t800000 reads finished. 4 secs passed\n", "Thread #0: \t850000 reads finished. 4 secs passed\n", "Thread #1: \t900000 reads finished. 5 secs passed\n", "Thread #2: \t950000 reads finished. 5 secs passed\n", "Thread #6: \t1000000 reads finished. 5 secs passed\n", "Thread #7: \t1050000 reads finished. 5 secs passed\n", "Thread #3: \t1100000 reads finished. 5 secs passed\n", "Thread #4: \t1150000 reads finished. 5 secs passed\n", "Thread #5: \t1200000 reads finished. 5 secs passed\n", "Thread #0: \t1250000 reads finished. 5 secs passed\n", "Thread #1: \t1300000 reads finished. 6 secs passed\n", "Thread #2: \t1350000 reads finished. 6 secs passed\n", "Thread #6: \t1400000 reads finished. 6 secs passed\n", "Thread #7: \t1450000 reads finished. 6 secs passed\n", "Thread #3: \t1500000 reads finished. 6 secs passed\n", "Thread #4: \t1550000 reads finished. 6 secs passed\n", "Thread #5: \t1600000 reads finished. 6 secs passed\n", "Thread #0: \t1650000 reads finished. 6 secs passed\n", "Thread #1: \t1700000 reads finished. 7 secs passed\n", "Thread #2: \t1750000 reads finished. 7 secs passed\n", "Thread #6: \t1800000 reads finished. 8 secs passed\n", "Thread #7: \t1850000 reads finished. 8 secs passed\n", "Thread #3: \t1900000 reads finished. 8 secs passed\n", "Thread #4: \t1950000 reads finished. 9 secs passed\n", "Thread #5: \t2000000 reads finished. 9 secs passed\n", "Thread #0: \t2050000 reads finished. 9 secs passed\n", "Thread #1: \t2100000 reads finished. 9 secs passed\n", "Thread #2: \t2150000 reads finished. 9 secs passed\n", "Thread #6: \t2200000 reads finished. 9 secs passed\n", "Thread #7: \t2250000 reads finished. 10 secs passed\n", "Thread #3: \t2300000 reads finished. 10 secs passed\n", "Thread #4: \t2350000 reads finished. 10 secs passed\n", "Thread #5: \t2400000 reads finished. 10 secs passed\n", "Thread #0: \t2450000 reads finished. 10 secs passed\n", "Thread #1: \t2500000 reads finished. 10 secs passed\n", "Thread #2: \t2550000 reads finished. 10 secs passed\n", "Thread #6: \t2600000 reads finished. 10 secs passed\n", "Thread #7: \t2650000 reads finished. 11 secs passed\n", "Thread #3: \t2700000 reads finished. 11 secs passed\n", "Thread #4: \t2750000 reads finished. 11 secs passed\n", "Thread #5: \t2800000 reads finished. 11 secs passed\n", "Thread #0: \t2850000 reads finished. 11 secs passed\n", "Thread #1: \t2900000 reads finished. 11 secs passed\n", "Thread #2: \t2950000 reads finished. 11 secs passed\n", "Thread #6: \t3000000 reads finished. 12 secs passed\n", "Thread #7: \t3050000 reads finished. 12 secs passed\n", "Thread #3: \t3100000 reads finished. 12 secs passed\n", "Thread #4: \t3150000 reads finished. 12 secs passed\n", "Thread #5: \t3200000 reads finished. 12 secs passed\n", "Thread #0: \t3250000 reads finished. 12 secs passed\n", "Thread #1: \t3300000 reads finished. 12 secs passed\n", "Thread #2: \t3350000 reads finished. 13 secs passed\n", "Thread #6: \t3400000 reads finished. 13 secs passed\n", "Thread #7: \t3450000 reads finished. 13 secs passed\n", "Thread #3: \t3500000 reads finished. 13 secs passed\n", "Thread #4: \t3550000 reads finished. 13 secs passed\n", "Thread #5: \t3600000 reads finished. 13 secs passed\n", "Thread #0: \t3650000 reads finished. 13 secs passed\n", "Thread #1: \t3700000 reads finished. 13 secs passed\n", "Thread #2: \t3750000 reads finished. 14 secs passed\n", "Thread #6: \t3800000 reads finished. 14 secs passed\n", "Thread #7: \t3850000 reads finished. 14 secs passed\n", "Thread #3: \t3900000 reads finished. 14 secs passed\n", "Thread #4: \t3950000 reads finished. 14 secs passed\n", "Thread #5: \t4000000 reads finished. 14 secs passed\n", "Thread #0: \t4050000 reads finished. 14 secs passed\n", "Thread #1: \t4100000 reads finished. 14 secs passed\n", "Thread #2: \t4150000 reads finished. 15 secs passed\n", "Thread #6: \t4200000 reads finished. 15 secs passed\n", "Thread #7: \t4250000 reads finished. 15 secs passed\n", "Thread #3: \t4300000 reads finished. 15 secs passed\n", "Thread #4: \t4350000 reads finished. 15 secs passed\n", "Thread #5: \t4400000 reads finished. 15 secs passed\n", "Thread #0: \t4450000 reads finished. 15 secs passed\n", "Thread #1: \t4500000 reads finished. 15 secs passed\n", "Thread #2: \t4550000 reads finished. 16 secs passed\n", "Thread #6: \t4600000 reads finished. 16 secs passed\n", "Thread #7: \t4650000 reads finished. 16 secs passed\n", "Thread #3: \t4700000 reads finished. 16 secs passed\n", "Thread #4: \t4750000 reads finished. 16 secs passed\n", "Thread #5: \t4800000 reads finished. 16 secs passed\n", "Thread #0: \t4850000 reads finished. 16 secs passed\n", "Thread #1: \t4900000 reads finished. 17 secs passed\n", "Thread #2: \t4950000 reads finished. 17 secs passed\n", "Thread #6: \t5000000 reads finished. 17 secs passed\n", "Thread #7: \t5050000 reads finished. 17 secs passed\n", "Thread #3: \t5100000 reads finished. 17 secs passed\n", "Thread #4: \t5150000 reads finished. 17 secs passed\n", "Thread #5: \t5200000 reads finished. 17 secs passed\n", "Thread #0: \t5250000 reads finished. 18 secs passed\n", "Thread #1: \t5300000 reads finished. 18 secs passed\n", "Thread #2: \t5350000 reads finished. 18 secs passed\n", "Thread #6: \t5400000 reads finished. 18 secs passed\n", "Thread #7: \t5450000 reads finished. 18 secs passed\n", "Thread #3: \t5500000 reads finished. 18 secs passed\n", "Thread #4: \t5550000 reads finished. 18 secs passed\n", "Thread #5: \t5600000 reads finished. 18 secs passed\n", "Thread #0: \t5650000 reads finished. 19 secs passed\n", "Thread #1: \t5700000 reads finished. 19 secs passed\n", "Thread #2: \t5750000 reads finished. 19 secs passed\n", "Thread #6: \t5800000 reads finished. 19 secs passed\n", "Thread #7: \t5850000 reads finished. 19 secs passed\n", "Thread #3: \t5900000 reads finished. 19 secs passed\n", "Thread #4: \t5950000 reads finished. 19 secs passed\n", "Thread #5: \t6000000 reads finished. 19 secs passed\n", "Thread #0: \t6050000 reads finished. 20 secs passed\n", "Thread #1: \t6100000 reads finished. 20 secs passed\n", "Thread #2: \t6150000 reads finished. 20 secs passed\n", "Thread #6: \t6200000 reads finished. 20 secs passed\n", "Thread #7: \t6250000 reads finished. 20 secs passed\n", "Thread #3: \t6300000 reads finished. 20 secs passed\n", "Thread #4: \t6350000 reads finished. 20 secs passed\n", "Thread #5: \t6400000 reads finished. 21 secs passed\n", "Thread #0: \t6450000 reads finished. 21 secs passed\n", "Thread #1: \t6500000 reads finished. 21 secs passed\n", "Thread #2: \t6550000 reads finished. 21 secs passed\n", "Thread #6: \t6600000 reads finished. 21 secs passed\n", "Thread #7: \t6650000 reads finished. 21 secs passed\n", "Thread #3: \t6700000 reads finished. 21 secs passed\n", "Thread #4: \t6750000 reads finished. 21 secs passed\n", "Thread #5: \t6800000 reads finished. 22 secs passed\n", "Thread #0: \t6850000 reads finished. 22 secs passed\n", "Thread #1: \t6900000 reads finished. 22 secs passed\n", "Thread #2: \t6950000 reads finished. 22 secs passed\n", "Thread #6: \t7000000 reads finished. 23 secs passed\n", "Thread #7: \t7050000 reads finished. 23 secs passed\n", "Thread #3: \t7100000 reads finished. 23 secs passed\n", "Thread #4: \t7150000 reads finished. 23 secs passed\n", "Thread #5: \t7200000 reads finished. 23 secs passed\n", "Thread #0: \t7250000 reads finished. 23 secs passed\n", "Thread #1: \t7300000 reads finished. 23 secs passed\n", "Thread #2: \t7350000 reads finished. 23 secs passed\n", "Thread #6: \t7400000 reads finished. 24 secs passed\n", "Thread #7: \t7450000 reads finished. 24 secs passed\n", "Thread #3: \t7500000 reads finished. 24 secs passed\n", "Thread #4: \t7550000 reads finished. 24 secs passed\n", "Thread #5: \t7600000 reads finished. 24 secs passed\n", "Thread #0: \t7650000 reads finished. 24 secs passed\n", "Thread #1: \t7700000 reads finished. 24 secs passed\n", "Thread #2: \t7750000 reads finished. 24 secs passed\n", "Thread #6: \t7800000 reads finished. 25 secs passed\n", "Thread #7: \t7850000 reads finished. 25 secs passed\n", "Thread #3: \t7900000 reads finished. 25 secs passed\n", "Thread #4: \t7950000 reads finished. 25 secs passed\n", "Thread #5: \t8000000 reads finished. 25 secs passed\n", "Thread #0: \t8050000 reads finished. 25 secs passed\n", "Thread #1: \t8100000 reads finished. 25 secs passed\n", "Thread #2: \t8150000 reads finished. 26 secs passed\n", "Thread #6: \t8200000 reads finished. 26 secs passed\n", "Thread #7: \t8250000 reads finished. 26 secs passed\n", "Thread #3: \t8300000 reads finished. 26 secs passed\n", "Thread #4: \t8350000 reads finished. 26 secs passed\n", "Thread #5: \t8400000 reads finished. 26 secs passed\n", "Thread #0: \t8450000 reads finished. 26 secs passed\n", "Thread #1: \t8500000 reads finished. 26 secs passed\n", "Thread #2: \t8550000 reads finished. 27 secs passed\n", "Thread #6: \t8600000 reads finished. 27 secs passed\n", "Thread #7: \t8650000 reads finished. 27 secs passed\n", "Thread #3: \t8700000 reads finished. 27 secs passed\n", "Thread #4: \t8750000 reads finished. 27 secs passed\n", "Thread #5: \t8800000 reads finished. 27 secs passed\n", "Thread #0: \t8850000 reads finished. 27 secs passed\n", "Thread #1: \t8900000 reads finished. 27 secs passed\n", "Thread #2: \t8950000 reads finished. 28 secs passed\n", "Thread #6: \t9000000 reads finished. 28 secs passed\n", "Thread #7: \t9050000 reads finished. 28 secs passed\n", "Thread #3: \t9100000 reads finished. 28 secs passed\n", "Thread #4: \t9150000 reads finished. 29 secs passed\n", "Thread #5: \t9200000 reads finished. 29 secs passed\n", "Thread #0: \t9250000 reads finished. 30 secs passed\n", "Thread #7: \t9402890 reads finished. 30 secs passed\n", "Thread #1: \t9300000 reads finished. 30 secs passed\n", "Thread #2: \t9350000 reads finished. 30 secs passed\n", "Thread #6: \t9400000 reads finished. 30 secs passed\n", "Total number of aligned reads: 348658 (3.7%)\n", "Done.\n", "Finished at Fri Jan 22 12:24:15 2016\n", "Total time consumed: 30 secs\n" ] } ], "source": [ "!/Applications/bioinfo/BSMAP/bsmap-2.74/bsmap \\\n", "-a /Volumes/web-1/nightingales/O_lurida/1_ATCACG_L001_R1_001.fastq.gz \\\n", "-d /Volumes/Histidine/hectocotylus/OlyO_Pat_PacBio_10k_contigs.fa \\\n", "-o /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs.sam \\\n", "-p 8" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "@ Fri Jan 22 12:57:43 2016: reading reference /Volumes/Histidine/hectocotylus/OlyO_Pat_PacBio_10k_contigs.fa ...\n", "@ Fri Jan 22 12:57:44 2016: reading /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs.sam ...\n", "[samopen] SAM header is present: 553 sequences.\n", "@ Fri Jan 22 12:57:52 2016: writing /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio.txt ...\n", "@ Fri Jan 22 12:57:54 2016: done.\n", "total 348614 valid mappings, 83251 covered cytosines, average coverage: 35.90 fold.\n" ] } ], "source": [ "!python /Applications/bioinfo/BSMAP/bsmap-2.74/methratio.py \\\n", "-d /Volumes/Histidine/hectocotylus/OlyO_Pat_PacBio_10k_contigs.fa \\\n", "-o /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio.txt \\\n", "-s /Applications/bioinfo/BSMAP/bsmap-2.74/samtools \\\n", "/Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs.sam " ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "chr\tpos\tstrand\tcontext\tratio\teff_CT_count\tC_count\tCT_count\trev_G_count\trev_GA_count\tCI_lower\tCI_upper\r\n", "OlyO_Pat_PacBio_10k_contig_1\t552\t+\tTCCAG\t1.000\t1.00\t1\t1\t0\t0\t0.207\t1.000\r\n", "OlyO_Pat_PacBio_10k_contig_1\t11454\t+\tTTCAG\t0.222\t9.00\t2\t9\t0\t0\t0.063\t0.547\r\n", "OlyO_Pat_PacBio_10k_contig_1\t11468\t+\tGCCAG\t0.062\t16.00\t1\t16\t0\t0\t0.011\t0.283\r\n", "OlyO_Pat_PacBio_10k_contig_1\t16495\t-\tAAGTC\t0.250\t4.00\t1\t4\t2\t2\t0.046\t0.699\r\n", "OlyO_Pat_PacBio_10k_contig_1\t16536\t-\tACGGG\t0.250\t4.00\t1\t4\t2\t2\t0.046\t0.699\r\n", "OlyO_Pat_PacBio_10k_contig_1\t16622\t-\tGGGTG\t1.000\t1.00\t1\t1\t0\t0\t0.207\t1.000\r\n", "OlyO_Pat_PacBio_10k_contig_1\t16649\t-\tACGGA\t0.231\t13.00\t3\t13\t0\t0\t0.082\t0.503\r\n", "OlyO_Pat_PacBio_10k_contig_1\t16659\t-\tCCGTA\t0.154\t13.00\t2\t13\t0\t0\t0.043\t0.422\r\n", "OlyO_Pat_PacBio_10k_contig_1\t16685\t-\tACGGC\t0.167\t12.00\t2\t12\t0\t0\t0.047\t0.448\r\n" ] } ], "source": [ "!head /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio.txt" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 19852 /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio.txt\r\n" ] } ], "source": [ "!wc -l /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio.txt" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!grep \"[A-Z][A-Z]CG[A-Z]\" \\\n", "/Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio_CG.txt" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OlyO_Pat_PacBio_10k_contig_10\t3425\t+\tCGCGT\t1.000\t1.00\t1\t1\t0\t0\t0.207\t1.000\n", "OlyO_Pat_PacBio_10k_contig_10\t3439\t+\tGACGT\t1.000\t1.00\t1\t1\t0\t0\t0.207\t1.000\n", "OlyO_Pat_PacBio_10k_contig_10\t6885\t+\tGCCGC\t1.000\t1.00\t1\t1\t0\t0\t0.207\t1.000\n", "OlyO_Pat_PacBio_10k_contig_10\t6901\t+\tGGCGA\t0.353\t507.17\t179\t537\t153\t162\t0.313\t0.395\n", "OlyO_Pat_PacBio_10k_contig_10\t6928\t+\tCACGT\t0.284\t507.04\t144\t538\t262\t278\t0.246\t0.325\n", "OlyO_Pat_PacBio_10k_contig_10\t6942\t+\tGACGC\t0.269\t490.56\t132\t540\t258\t284\t0.232\t0.310\n", "OlyO_Pat_PacBio_10k_contig_10\t6944\t+\tCGCGG\t0.293\t508.00\t149\t531\t265\t277\t0.255\t0.334\n", "OlyO_Pat_PacBio_10k_contig_10\t6948\t+\tGCCGG\t0.188\t399.57\t75\t413\t238\t246\t0.152\t0.229\n", "OlyO_Pat_PacBio_10k_contig_10\t8610\t+\tTGCGA\t0.091\t11.00\t1\t11\t1\t1\t0.016\t0.377\n", "OlyO_Pat_PacBio_10k_contig_10\t12919\t+\tAGCGT\t0.538\t11.14\t6\t13\t6\t7\t0.276\t0.781\n", " 3188 /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio_CG.txt\n" ] } ], "source": [ "!head /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio_CG.txt\n", "!wc -l /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio_CG.txt" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!awk -f /Users/sr320/git-repos/nb-2016/scripts/mr3x.awk \\\n", "/Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio_CG.txt \\\n", "> /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio_CG3x.txt\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OlyO_Pat_PacBio_10k_contig_10 6901 + GGCGA 0.353 507.17 179 537 153 162 0.313 0.395\n", "OlyO_Pat_PacBio_10k_contig_10 6928 + CACGT 0.284 507.04 144 538 262 278 0.246 0.325\n", "OlyO_Pat_PacBio_10k_contig_10 6942 + GACGC 0.269 490.56 132 540 258 284 0.232 0.310\n", "OlyO_Pat_PacBio_10k_contig_10 6944 + CGCGG 0.293 508.00 149 531 265 277 0.255 0.334\n", "OlyO_Pat_PacBio_10k_contig_10 6948 + GCCGG 0.188 399.57 75 413 238 246 0.152 0.229\n", "OlyO_Pat_PacBio_10k_contig_10 8610 + TGCGA 0.091 11.00 1 11 1 1 0.016 0.377\n", "OlyO_Pat_PacBio_10k_contig_10 12919 + AGCGT 0.538 11.14 6 13 6 7 0.276 0.781\n", "OlyO_Pat_PacBio_10k_contig_10 12935 + TGCGA 0.324 213.00 69 213 18 18 0.265 0.389\n", "OlyO_Pat_PacBio_10k_contig_10 12940 + CACGG 0.305 243.00 74 243 20 20 0.250 0.365\n", "OlyO_Pat_PacBio_10k_contig_10 12949 + TCCGT 0.288 260.18 75 283 57 62 0.237 0.346\n", " 2454 /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio_CG3x.txt\n" ] } ], "source": [ "!head /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio_CG3x.txt\n", "!wc -l /Volumes/Histidine/hectocotylus/1_ATCACG_BSMAP10k_contigs_methratio_CG3x.txt" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "source": [ "%pylab inline\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "ename": "ImportError", "evalue": "No module named rpy2.ipython", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmagic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mu'load_ext rpy2.ipython'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/Users/sr320/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc\u001b[0m in \u001b[0;36mmagic\u001b[0;34m(self, arg_s)\u001b[0m\n\u001b[1;32m 2334\u001b[0m \u001b[0mmagic_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmagic_arg_s\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marg_s\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpartition\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m' '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2335\u001b[0m \u001b[0mmagic_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmagic_name\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprefilter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mESC_MAGIC\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2336\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmagic_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmagic_arg_s\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2337\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2338\u001b[0m \u001b[0;31m#-------------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/sr320/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc\u001b[0m in \u001b[0;36mrun_line_magic\u001b[0;34m(self, magic_name, line)\u001b[0m\n\u001b[1;32m 2255\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'local_ns'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getframe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstack_depth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf_locals\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2256\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2257\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2258\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2259\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m\u001b[0m in \u001b[0;36mload_ext\u001b[0;34m(self, module_str)\u001b[0m\n", "\u001b[0;32m/Users/sr320/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc\u001b[0m in \u001b[0;36m\u001b[0;34m(f, *a, **k)\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 193\u001b[0;31m \u001b[0mcall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/sr320/anaconda/lib/python2.7/site-packages/IPython/core/magics/extension.pyc\u001b[0m in \u001b[0;36mload_ext\u001b[0;34m(self, module_str)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mmodule_str\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mUsageError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Missing module name.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextension_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_extension\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule_str\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'already loaded'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/sr320/anaconda/lib/python2.7/site-packages/IPython/core/extensions.pyc\u001b[0m in \u001b[0;36mload_extension\u001b[0;34m(self, module_str)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmodule_str\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodules\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mprepended_to_syspath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mipython_extension_dir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0m__import__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule_str\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0mmod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodules\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodule_str\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_load_ipython_extension\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mImportError\u001b[0m: No module named rpy2.ipython" ] } ], "source": [ "%load_ext rpy2.ipython\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }