1
2
3
4
5
6 """Command line wrapper for the short read aligner Novoalign by Novocraft."""
7 import types
8 from Bio.Application import _Option, AbstractCommandline
9
11 """Command line wrapper for novoalign by Novocraft.
12
13 See www.novocraft.com - novoalign is a short read alignment program.
14
15 Example:
16
17 >>> from Bio.Sequencing.Applications import NovoalignCommandline
18 >>> novoalign_cline = NovoalignCommandline(database='some_db',
19 ... readfile='some_seq.txt')
20 >>> print novoalign_cline
21 novoalign -d some_db -f some_seq.txt
22
23 As will all the Biopython application wrappers, you can also add or
24 change options after creating the object:
25
26 >>> novoalign_cline.format = 'PRBnSEQ'
27 >>> novoalign_cline.r_method='0.99' # limited valid values
28 >>> novoalign_cline.fragment = '250 20' # must be given as a string
29 >>> novoalign_cline.miRNA = 100
30 >>> print novoalign_cline
31 novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100
32
33 You would typically run the command line with novoalign_cline() or via
34 the Python subprocess module, as described in the Biopython tutorial.
35
36 Last checked against version: 2.05.04
37 """
38 - def __init__(self, cmd="novoalign", **kwargs):
39
40 READ_FORMAT = ['FA', 'SLXFQ', 'STDFQ', 'ILMFQ', 'PRB', 'PRBnSEQ']
41 REPORT_FORMAT = ['Native', 'Pairwise', 'SAM']
42 REPEAT_METHOD = ['None', 'Random', 'All', 'Exhaustive', '0.99']
43
44 self.parameters = \
45 [
46 _Option(["-d", "database"],
47 "database filename",
48 filename=True,
49 equate=False),
50 _Option(["-f", "readfile"],
51 "read file",
52 filename=True,
53 equate=False),
54 _Option(["-F", "format"],
55 "Format of read files.\n\nAllowed values: %s" \
56 % ", ".join(READ_FORMAT),
57 checker_function=lambda x: x in READ_FORMAT,
58 equate=False),
59
60
61 _Option(["-t", "threshold"],
62 "Threshold for alignment score",
63 checker_function=lambda x: isinstance(x, types.IntType),
64 equate=False),
65 _Option(["-g", "gap_open"],
66 "Gap opening penalty [default: 40]",
67 checker_function=lambda x: isinstance(x, types.IntType),
68 equate=False),
69 _Option(["-x", "gap_extend"],
70 "Gap extend penalty [default: 15]",
71 checker_function=lambda x: isinstance(x, types.IntType),
72 equate=False),
73 _Option(["-u", "unconverted"],
74 "Experimental: unconverted cytosines penalty in bisulfite mode\n\n"
75 "Default: no penalty",
76 checker_function=lambda x: isinstance(x, types.IntType),
77 equate=False),
78
79
80 _Option(["-l", "good_bases"],
81 "Minimum number of good quality bases [default: log(N_g, 4) + 5]",
82 checker_function=lambda x: isinstance(x, types.IntType),
83 equate=False),
84 _Option(["-h", "homopolymer"],
85 "Homopolymer read filter [default: 20; disable: negative value]",
86 checker_function=lambda x: isinstance(x, types.IntType),
87 equate=False),
88
89
90 _Option(["-a", "adapter3"],
91 "Strips a 3' adapter sequence prior to alignment.\n\n"
92 "With paired ends two adapters can be specified",
93 checker_function=lambda x: isinstance(x, types.StringType),
94 equate=False),
95 _Option(["-n", "truncate"],
96 "Truncate to specific length before alignment",
97 checker_function=lambda x: isinstance(x, types.IntType),
98 equate=False),
99 _Option(["-s", "trimming"],
100 "If fail to align, trim by s bases until they map or become shorter than l.\n\n"
101 "Ddefault: 2",
102 checker_function=lambda x: isinstance(x, types.IntType),
103 equate=False),
104 _Option(["-5", "adapter5"],
105 "Strips a 5' adapter sequence.\n\n"
106 "Similar to -a (adaptor3), but on the 5' end.",
107 checker_function=lambda x: isinstance(x, types.StringType),
108 equate=False),
109
110 _Option(["-o", "report"],
111 "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" \
112 % ", ".join(REPORT_FORMAT),
113 checker_function=lambda x: x in REPORT_FORMAT,
114 equate=False),
115 _Option(["-Q", "quality"],
116 "Lower threshold for an alignment to be reported [default: 0]",
117 checker_function=lambda x: isinstance(x, types.IntType),
118 equate=False),
119 _Option(["-R", "repeats"],
120 "If score difference is higher, report repeats.\n\n"
121 "Otherwise -r read method applies [default: 5]",
122 checker_function=lambda x: isinstance(x, types.IntType),
123 equate=False),
124 _Option(["-r", "r_method"],
125 "Methods to report reads with multiple matches.\n\n"
126 "Allowed values: %s\n"
127 "'All' and 'Exhaustive' accept limits." \
128 % ", ".join(REPEAT_METHOD),
129 checker_function=lambda x: x.split()[0] in REPEAT_METHOD,
130 equate=False),
131 _Option(["-e", "recorded"],
132 "Alignments recorded with score equal to the best.\n\n"
133 "Default: 1000 in default read method, otherwise no limit.",
134 checker_function=lambda x: isinstance(x, types.IntType),
135 equate=False),
136 _Option(["-q", "qual_digits"],
137 "Decimal digits for quality scores [default: 0]",
138 checker_function=lambda x: isinstance(x, types.IntType),
139 equate=False),
140
141
142 _Option(["-i", "fragment"],
143 "Fragment length (2 reads + insert) and standard deviation [default: 250 30]",
144 checker_function=lambda x: len(x.split()) == 2,
145 equate=False),
146 _Option(["-v", "variation"],
147 "Structural variation penalty [default: 70]",
148 checker_function=lambda x: isinstance(x, types.IntType),
149 equate=False),
150
151
152 _Option(["-m", "miRNA"],
153 "Sets miRNA mode and optionally sets a value for the region scanned [default: off]",
154 checker_function=lambda x: isinstance(x, types.IntType),
155 equate=False),
156
157
158 _Option(["-c", "cores"],
159 "Number of threads, disabled on free versions [default: number of cores]",
160 checker_function=lambda x: isinstance(x, types.IntType),
161 equate=False),
162
163
164 _Option(["-k", "read_cal"],
165 "Read quality calibration from file (mismatch counts)",
166 checker_function=lambda x: isinstance(x, types.StringType),
167 equate=False),
168 _Option(["-K", "write_cal"],
169 "Accumulate mismatch counts and write to file",
170 checker_function=lambda x: isinstance(x, types.StringType),
171 equate=False),
172 ]
173 AbstractCommandline.__init__(self, cmd, **kwargs)
174
176 """Run the module's doctests (PRIVATE)."""
177 print "Runing Novoalign doctests..."
178 import doctest
179 doctest.testmod()
180 print "Done"
181
182 if __name__ == "__main__":
183 _test()
184