1
2
3
4
5
6
7 from Bio.Alphabet import IUPAC
8 from Bio.Motif.Parsers.MEME import MEMEMotif
9
10
12 """The class for holding the results from a MAST run.
13
14 A MAST.Record holds data about matches between motifs and sequences.
15 The motifs held by the Record are objects of the class MEMEMotif.
16
17 Methods:
18 get_motif_by_name (motif_name): returns a MEMEMotif with the given
19 name.
20 """
21
23 self.sequences = []
24 self.version = ""
25 self.database = ""
26 self.diagrams = {}
27 self.alphabet = None
28 self.motifs = []
29
31 for m in self.motifs:
32 if m.name == name:
33 return m
34
44
45
46
47
48
50 for line in handle:
51 if "MAST version" in line:
52 break
53 else:
54 raise ValueError("Improper input file. Does not begin with a line with 'MAST version'")
55 record.version = line.strip().split()[2]
56
57
59 for line in handle:
60 if line.startswith('DATABASE AND MOTIFS'):
61 break
62 line = handle.next()
63 if not line.startswith('****'):
64 raise ValueError("Line does not start with '****':\n%s" % line)
65 line = handle.next()
66 if not 'DATABASE' in line:
67 raise ValueError("Line does not contain 'DATABASE':\n%s" % line)
68 words = line.strip().split()
69 record.database = words[1]
70 if words[2] == '(nucleotide)':
71 record.alphabet = IUPAC.unambiguous_dna
72 elif words[2] == '(peptide)':
73 record.alphabet = IUPAC.protein
74 for line in handle:
75 if 'MOTIF WIDTH' in line:
76 break
77 line = handle.next()
78 if not '----' in line:
79 raise ValueError("Line does not contain '----':\n%s" % line)
80 for line in handle:
81 if not line.strip():
82 break
83 words = line.strip().split()
84 motif = MEMEMotif()
85 motif.alphabet = record.alphabet
86 motif.name = words[0]
87 motif.length = int(words[1])
88
89 record.motifs.append(motif)
90
91
93 for line in handle:
94 if line.startswith('SECTION I:'):
95 break
96 for line in handle:
97 if line.startswith('SEQUENCE NAME'):
98 break
99 line = handle.next()
100 if not line.startswith('---'):
101 raise ValueError("Line does not start with '---':\n%s" % line)
102 for line in handle:
103 if not line.strip():
104 break
105 else:
106 sequence, description_evalue_length = line.split(None, 1)
107 record.sequences.append(sequence)
108 line = handle.next()
109 if not line.startswith('****'):
110 raise ValueError("Line does not start with '****':\n%s" % line)
111
112
114 for line in handle:
115 if line.startswith('SECTION II:'):
116 break
117 for line in handle:
118 if line.startswith('SEQUENCE NAME'):
119 break
120 line = handle.next()
121 if not line.startswith('---'):
122 raise ValueError("Line does not start with '---':\n%s" % line)
123 for line in handle:
124 if not line.strip():
125 break
126 elif line.startswith(" "):
127 diagram = line.strip()
128 record.diagrams[sequence] += diagram
129 else:
130 sequence, pvalue, diagram = line.split()
131 record.diagrams[sequence] = diagram
132 line = handle.next()
133 if not line.startswith('****'):
134 raise ValueError("Line does not start with '****':\n%s" % line)
135
136
150