Package Bio :: Package Sequencing :: Package Applications :: Module _Novoalign
[hide private]
[frames] | no frames]

Source Code for Module Bio.Sequencing.Applications._Novoalign

  1  # Copyright 2009 by Osvaldo Zagordi.  All rights reserved. 
  2  # Revisions copyright 2010 by Peter Cock. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """Command line wrapper for the short read aligner Novoalign by Novocraft.""" 
  7  import types 
  8  from Bio.Application import _Option, AbstractCommandline 
  9   
10 -class NovoalignCommandline(AbstractCommandline):
11 """Command line wrapper for novoalign by Novocraft. 12 13 See www.novocraft.com - novoalign is a short read alignment program. 14 15 Example: 16 17 >>> from Bio.Sequencing.Applications import NovoalignCommandline 18 >>> novoalign_cline = NovoalignCommandline(database='some_db', 19 ... readfile='some_seq.txt') 20 >>> print novoalign_cline 21 novoalign -d some_db -f some_seq.txt 22 23 As will all the Biopython application wrappers, you can also add or 24 change options after creating the object: 25 26 >>> novoalign_cline.format = 'PRBnSEQ' 27 >>> novoalign_cline.r_method='0.99' # limited valid values 28 >>> novoalign_cline.fragment = '250 20' # must be given as a string 29 >>> novoalign_cline.miRNA = 100 30 >>> print novoalign_cline 31 novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100 32 33 You would typically run the command line with novoalign_cline() or via 34 the Python subprocess module, as described in the Biopython tutorial. 35 36 Last checked against version: 2.05.04 37 """
38 - def __init__(self, cmd="novoalign", **kwargs):
39 40 READ_FORMAT = ['FA', 'SLXFQ', 'STDFQ', 'ILMFQ', 'PRB', 'PRBnSEQ'] 41 REPORT_FORMAT = ['Native', 'Pairwise', 'SAM'] 42 REPEAT_METHOD = ['None', 'Random', 'All', 'Exhaustive', '0.99'] 43 44 self.parameters = \ 45 [ 46 _Option(["-d", "database"], 47 "database filename", 48 filename=True, 49 equate=False), 50 _Option(["-f", "readfile"], 51 "read file", 52 filename=True, 53 equate=False), 54 _Option(["-F", "format"], 55 "Format of read files.\n\nAllowed values: %s" \ 56 % ", ".join(READ_FORMAT), 57 checker_function=lambda x: x in READ_FORMAT, 58 equate=False), 59 60 # Alignment scoring options 61 _Option(["-t", "threshold"], 62 "Threshold for alignment score", 63 checker_function=lambda x: isinstance(x, types.IntType), 64 equate=False), 65 _Option(["-g", "gap_open"], 66 "Gap opening penalty [default: 40]", 67 checker_function=lambda x: isinstance(x, types.IntType), 68 equate=False), 69 _Option(["-x", "gap_extend"], 70 "Gap extend penalty [default: 15]", 71 checker_function=lambda x: isinstance(x, types.IntType), 72 equate=False), 73 _Option(["-u", "unconverted"], 74 "Experimental: unconverted cytosines penalty in bisulfite mode\n\n" 75 "Default: no penalty", 76 checker_function=lambda x: isinstance(x, types.IntType), 77 equate=False), 78 79 # Quality control and read filtering 80 _Option(["-l", "good_bases"], 81 "Minimum number of good quality bases [default: log(N_g, 4) + 5]", 82 checker_function=lambda x: isinstance(x, types.IntType), 83 equate=False), 84 _Option(["-h", "homopolymer"], 85 "Homopolymer read filter [default: 20; disable: negative value]", 86 checker_function=lambda x: isinstance(x, types.IntType), 87 equate=False), 88 89 # Read preprocessing options 90 _Option(["-a", "adapter3"], 91 "Strips a 3' adapter sequence prior to alignment.\n\n" 92 "With paired ends two adapters can be specified", 93 checker_function=lambda x: isinstance(x, types.StringType), 94 equate=False), 95 _Option(["-n", "truncate"], 96 "Truncate to specific length before alignment", 97 checker_function=lambda x: isinstance(x, types.IntType), 98 equate=False), 99 _Option(["-s", "trimming"], 100 "If fail to align, trim by s bases until they map or become shorter than l.\n\n" 101 "Ddefault: 2", 102 checker_function=lambda x: isinstance(x, types.IntType), 103 equate=False), 104 _Option(["-5", "adapter5"], 105 "Strips a 5' adapter sequence.\n\n" 106 "Similar to -a (adaptor3), but on the 5' end.", 107 checker_function=lambda x: isinstance(x, types.StringType), 108 equate=False), 109 # Reporting options 110 _Option(["-o", "report"], 111 "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" \ 112 % ", ".join(REPORT_FORMAT), 113 checker_function=lambda x: x in REPORT_FORMAT, 114 equate=False), 115 _Option(["-Q", "quality"], 116 "Lower threshold for an alignment to be reported [default: 0]", 117 checker_function=lambda x: isinstance(x, types.IntType), 118 equate=False), 119 _Option(["-R", "repeats"], 120 "If score difference is higher, report repeats.\n\n" 121 "Otherwise -r read method applies [default: 5]", 122 checker_function=lambda x: isinstance(x, types.IntType), 123 equate=False), 124 _Option(["-r", "r_method"], 125 "Methods to report reads with multiple matches.\n\n" 126 "Allowed values: %s\n" 127 "'All' and 'Exhaustive' accept limits." \ 128 % ", ".join(REPEAT_METHOD), 129 checker_function=lambda x: x.split()[0] in REPEAT_METHOD, 130 equate=False), 131 _Option(["-e", "recorded"], 132 "Alignments recorded with score equal to the best.\n\n" 133 "Default: 1000 in default read method, otherwise no limit.", 134 checker_function=lambda x: isinstance(x, types.IntType), 135 equate=False), 136 _Option(["-q", "qual_digits"], 137 "Decimal digits for quality scores [default: 0]", 138 checker_function=lambda x: isinstance(x, types.IntType), 139 equate=False), 140 141 # Paired end options 142 _Option(["-i", "fragment"], 143 "Fragment length (2 reads + insert) and standard deviation [default: 250 30]", 144 checker_function=lambda x: len(x.split()) == 2, 145 equate=False), 146 _Option(["-v", "variation"], 147 "Structural variation penalty [default: 70]", 148 checker_function=lambda x: isinstance(x, types.IntType), 149 equate=False), 150 151 # miRNA mode 152 _Option(["-m", "miRNA"], 153 "Sets miRNA mode and optionally sets a value for the region scanned [default: off]", 154 checker_function=lambda x: isinstance(x, types.IntType), 155 equate=False), 156 157 # Multithreading 158 _Option(["-c", "cores"], 159 "Number of threads, disabled on free versions [default: number of cores]", 160 checker_function=lambda x: isinstance(x, types.IntType), 161 equate=False), 162 163 # Quality calibrations 164 _Option(["-k", "read_cal"], 165 "Read quality calibration from file (mismatch counts)", 166 checker_function=lambda x: isinstance(x, types.StringType), 167 equate=False), 168 _Option(["-K", "write_cal"], 169 "Accumulate mismatch counts and write to file", 170 checker_function=lambda x: isinstance(x, types.StringType), 171 equate=False), 172 ] 173 AbstractCommandline.__init__(self, cmd, **kwargs)
174
175 -def _test():
176 """Run the module's doctests (PRIVATE).""" 177 print "Runing Novoalign doctests..." 178 import doctest 179 doctest.testmod() 180 print "Done"
181 182 if __name__ == "__main__": 183 _test() 184