Package Bio :: Package Motif :: Module _Motif
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif._Motif

  1  # Copyright 2003-2009 by Bartek Wilczynski.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Implementation of sequence motifs (PRIVATE). 
  6  """ 
  7  from Bio.Seq import Seq 
  8  from Bio.SubsMat import FreqTable 
  9  from Bio.Alphabet import IUPAC 
 10  import math,random 
 11   
12 -class Motif(object):
13 """ 14 A class representing sequence motifs. 15 """
16 - def __init__(self,alphabet=IUPAC.unambiguous_dna):
17 self.instances = [] 18 self.has_instances=False 19 self.counts = {} 20 self.has_counts=False 21 self.mask = [] 22 self._pwm_is_current = False 23 self._pwm = [] 24 self._log_odds_is_current = False 25 self._log_odds = [] 26 self.alphabet=alphabet 27 self.length=None 28 self.background=dict((n, 1.0/len(self.alphabet.letters)) \ 29 for n in self.alphabet.letters) 30 self.beta=1.0 31 self.info=None 32 self.name=""
33
34 - def _check_length(self, len):
35 if self.length==None: 36 self.length = len 37 elif self.length != len: 38 print "len",self.length,self.instances, len 39 raise ValueError("You can't change the length of the motif")
40
41 - def _check_alphabet(self,alphabet):
42 if self.alphabet==None: 43 self.alphabet=alphabet 44 elif self.alphabet != alphabet: 45 raise ValueError("Wrong Alphabet")
46
47 - def add_instance(self,instance):
48 """ 49 adds new instance to the motif 50 """ 51 self._check_alphabet(instance.alphabet) 52 self._check_length(len(instance)) 53 if self.has_counts: 54 for i in range(self.length): 55 let=instance[i] 56 self.counts[let][i]+=1 57 58 if self.has_instances or not self.has_counts: 59 self.instances.append(instance) 60 self.has_instances=True 61 62 self._pwm_is_current = False 63 self._log_odds_is_current = False
64 65
66 - def set_mask(self,mask):
67 """ 68 sets the mask for the motif 69 70 The mask should be a string containing asterisks in the position of significant columns and spaces in other columns 71 """ 72 self._check_length(len(mask)) 73 self.mask=[] 74 for char in mask: 75 if char=="*": 76 self.mask.append(1) 77 elif char==" ": 78 self.mask.append(0) 79 else: 80 raise ValueError("Mask should contain only '*' or ' ' and not a '%s'"%char)
81
82 - def pwm(self,laplace=True):
83 """ 84 returns the PWM computed for the set of instances 85 86 if laplace=True (default), pseudocounts equal to self.background multiplied by self.beta are added to all positions. 87 """ 88 89 if self._pwm_is_current: 90 return self._pwm 91 #we need to compute new pwm 92 self._pwm = [] 93 for i in xrange(self.length): 94 dict = {} 95 #filling the dict with 0's 96 for letter in self.alphabet.letters: 97 if laplace: 98 dict[letter]=self.beta*self.background[letter] 99 else: 100 dict[letter]=0.0 101 if self.has_counts: 102 #taking the raw counts 103 for letter in self.alphabet.letters: 104 dict[letter]+=self.counts[letter][i] 105 elif self.has_instances: 106 #counting the occurences of letters in instances 107 for seq in self.instances: 108 #dict[seq[i]]=dict[seq[i]]+1 109 try: 110 dict[seq[i]]+=1 111 except KeyError: #we need to ignore non-alphabet letters 112 pass 113 self._pwm.append(FreqTable.FreqTable(dict,FreqTable.COUNT,self.alphabet)) 114 self._pwm_is_current=1 115 return self._pwm
116
117 - def log_odds(self,laplace=True):
118 """ 119 returns the logg odds matrix computed for the set of instances 120 """ 121 122 if self._log_odds_is_current: 123 return self._log_odds 124 #we need to compute new pwm 125 self._log_odds = [] 126 pwm=self.pwm(laplace) 127 for i in xrange(self.length): 128 d = {} 129 for a in self.alphabet.letters: 130 d[a]=math.log(pwm[i][a]/self.background[a],2) 131 self._log_odds.append(d) 132 self._log_odds_is_current=1 133 return self._log_odds
134
135 - def ic(self):
136 """Method returning the information content of a motif. 137 """ 138 res=0 139 pwm=self.pwm() 140 for i in range(self.length): 141 res+=2 142 for a in self.alphabet.letters: 143 if pwm[i][a]!=0: 144 res+=pwm[i][a]*math.log(pwm[i][a],2) 145 return res
146
147 - def exp_score(self,st_dev=False):
148 """ 149 Computes expected score of motif's instance and its standard deviation 150 """ 151 exs=0.0 152 var=0.0 153 pwm=self.pwm() 154 for i in range(self.length): 155 ex1=0.0 156 ex2=0.0 157 for a in self.alphabet.letters: 158 if pwm[i][a]!=0: 159 ex1+=pwm[i][a]*(math.log(pwm[i][a],2)-math.log(self.background[a],2)) 160 ex2+=pwm[i][a]*(math.log(pwm[i][a],2)-math.log(self.background[a],2))**2 161 exs+=ex1 162 var+=ex2-ex1**2 163 if st_dev: 164 return exs,math.sqrt(var) 165 else: 166 return exs
167
168 - def search_instances(self,sequence):
169 """ 170 a generator function, returning found positions of instances of the motif in a given sequence 171 """ 172 if not self.has_instances: 173 raise ValueError ("This motif has no instances") 174 for pos in xrange(0,len(sequence)-self.length+1): 175 for instance in self.instances: 176 if instance.tostring()==sequence[pos:pos+self.length].tostring(): 177 yield(pos,instance) 178 break # no other instance will fit (we don't want to return multiple hits)
179
180 - def score_hit(self,sequence,position,normalized=0,masked=0):
181 """ 182 give the pwm score for a given position 183 """ 184 lo=self.log_odds() 185 score = 0.0 186 for pos in xrange(self.length): 187 a = sequence[position+pos] 188 if not masked or self.mask[pos]: 189 try: 190 score += lo[pos][a] 191 except: 192 pass 193 if normalized: 194 if not masked: 195 score/=self.length 196 else: 197 score/=len([x for x in self.mask if x]) 198 return score
199
200 - def search_pwm(self,sequence,normalized=0,masked=0,threshold=0.0,both=True):
201 """ 202 a generator function, returning found hits in a given sequence with the pwm score higher than the threshold 203 """ 204 if both: 205 rc = self.reverse_complement() 206 207 sequence=sequence.tostring().upper() 208 for pos in xrange(0,len(sequence)-self.length+1): 209 score = self.score_hit(sequence,pos,normalized,masked) 210 if score > threshold: 211 yield (pos,score) 212 if both: 213 rev_score = rc.score_hit(sequence,pos,normalized,masked) 214 if rev_score > threshold: 215 yield (-pos,rev_score)
216
217 - def dist_pearson(self, motif, masked = 0):
218 """ 219 return the similarity score based on pearson correlation for the given motif against self. 220 221 We use the Pearson's correlation of the respective probabilities. 222 """ 223 224 if self.alphabet != motif.alphabet: 225 raise ValueError("Cannot compare motifs with different alphabets") 226 227 max_p=-2 228 for offset in range(-self.length+1,motif.length): 229 if offset<0: 230 p = self.dist_pearson_at(motif,-offset) 231 else: #offset>=0 232 p = motif.dist_pearson_at(self,offset) 233 234 if max_p<p: 235 max_p=p 236 max_o=-offset 237 return 1-max_p,max_o
238
239 - def dist_pearson_at(self,motif,offset):
240 sxx = 0 # \sum x^2 241 sxy = 0 # \sum x \cdot y 242 sx = 0 # \sum x 243 sy = 0 # \sum y 244 syy = 0 # \sum x^2 245 norm=max(self.length,offset+motif.length) 246 247 for pos in range(max(self.length,offset+motif.length)): 248 for l in self.alphabet.letters: 249 xi = self[pos][l] 250 yi = motif[pos-offset][l] 251 sx = sx + xi 252 sy = sy + yi 253 sxx = sxx + xi * xi 254 syy = syy + yi * yi 255 sxy = sxy + xi * yi 256 257 norm *= len(self.alphabet.letters) 258 s1 = (sxy - sx*sy*1.0/norm) 259 s2 = (norm*sxx - sx*sx*1.0)*(norm*syy- sy*sy*1.0) 260 return s1/math.sqrt(s2)
261
262 - def dist_product(self,other):
263 """ 264 A similarity measure taking into account a product probability of generating overlaping instances of two motifs 265 """ 266 max_p=0.0 267 for offset in range(-self.length+1,other.length): 268 if offset<0: 269 p = self.dist_product_at(other,-offset) 270 else: #offset>=0 271 p = other.dist_product_at(self,offset) 272 if max_p<p: 273 max_p=p 274 max_o=-offset 275 return 1-max_p/self.dist_product_at(self,0),max_o
276
277 - def dist_product_at(self,other,offset):
278 s=0 279 for i in range(max(self.length,offset+other.length)): 280 f1=self[i] 281 f2=other[i-offset] 282 for n,b in self.background.iteritems(): 283 s+=b*f1[n]*f2[n] 284 return s/i
285
286 - def dist_dpq(self,other):
287 r"""Calculates the DPQ distance measure between motifs. 288 289 It is calculated as a maximal value of DPQ formula (shown using LaTeX 290 markup, familiar to mathematicians): 291 292 \sqrt{\sum_{i=1}^{alignment.len()} \sum_{k=1}^alphabet.len() \ 293 \{ m1[i].freq(alphabet[k])*log_2(m1[i].freq(alphabet[k])/m2[i].freq(alphabet[k])) + 294 m2[i].freq(alphabet[k])*log_2(m2[i].freq(alphabet[k])/m1[i].freq(alphabet[k])) 295 } 296 297 over possible non-spaced alignemts of two motifs. See this reference: 298 299 D. M Endres and J. E Schindelin, "A new metric for probability 300 distributions", IEEE transactions on Information Theory 49, no. 7 301 (July 2003): 1858-1860. 302 """ 303 304 min_d=float("inf") 305 min_o=-1 306 d_s=[] 307 for offset in range(-self.length+1,other.length): 308 #print "%2.3d"%offset, 309 if offset<0: 310 d = self.dist_dpq_at(other,-offset) 311 overlap = self.length+offset 312 else: #offset>=0 313 d = other.dist_dpq_at(self,offset) 314 overlap = other.length-offset 315 overlap = min(self.length,other.length,overlap) 316 out = self.length+other.length-2*overlap 317 #print d,1.0*(overlap+out)/overlap,d*(overlap+out)/overlap 318 #d = d/(2*overlap) 319 d = (d/(out+overlap))*(2*overlap+out)/(2*overlap) 320 #print d 321 d_s.append((offset,d)) 322 if min_d> d: 323 min_d=d 324 min_o=-offset 325 return min_d,min_o#,d_s
326
327 - def dist_dpq_at(self,other,offset):
328 """ 329 calculates the dist_dpq measure with a given offset. 330 331 offset should satisfy 0<=offset<=len(self) 332 """ 333 def dpq (f1,f2,alpha): 334 s=0 335 for n in alpha.letters: 336 avg=(f1[n]+f2[n])/2 337 s+=f1[n]*math.log(f1[n]/avg,2)+f2[n]*math.log(f2[n]/avg,2) 338 return math.sqrt(s)
339 340 s=0 341 for i in range(max(self.length,offset+other.length)): 342 f1=self[i] 343 f2=other[i-offset] 344 s+=dpq(f1,f2,self.alphabet) 345 return s
346
347 - def _read(self,stream):
348 """Reads the motif from the stream (in AlignAce format). 349 350 the self.alphabet variable must be set beforehand. 351 If the last line contains asterisks it is used for setting mask 352 """ 353 354 while 1: 355 ln = stream.readline() 356 if "*" in ln: 357 self.set_mask(ln.strip("\n\c")) 358 break 359 self.add_instance(Seq(ln.strip(),self.alphabet))
360
361 - def __str__(self,masked=False):
362 """ string representation of a motif. 363 """ 364 str = "" 365 for inst in self.instances: 366 str = str + inst.tostring() + "\n" 367 368 if masked: 369 for i in xrange(self.length): 370 if self.mask[i]: 371 str = str + "*" 372 else: 373 str = str + " " 374 str = str + "\n" 375 return str
376
377 - def __len__(self):
378 """return the length of a motif 379 380 Please use this method (i.e. invoke len(m)) instead of refering to the m.length directly. 381 """ 382 if self.length==None: 383 return 0 384 else: 385 return self.length
386
387 - def _write(self,stream):
388 """ 389 writes the motif to the stream 390 """ 391 392 stream.write(self.__str__())
393 394 395
396 - def _to_fasta(self):
397 """ 398 FASTA representation of motif 399 """ 400 if not self.has_instances: 401 self.make_instances_from_counts() 402 str = "" 403 for i,inst in enumerate(self.instances): 404 str = str + ">instance%d\n"%i + inst.tostring() + "\n" 405 406 return str
407
408 - def reverse_complement(self):
409 """ 410 Gives the reverse complement of the motif 411 """ 412 res = Motif() 413 if self.has_instances: 414 for i in self.instances: 415 res.add_instance(i.reverse_complement()) 416 else: # has counts 417 res.has_counts=True 418 res.counts["A"]=self.counts["T"][:] 419 res.counts["T"]=self.counts["A"][:] 420 res.counts["G"]=self.counts["C"][:] 421 res.counts["C"]=self.counts["G"][:] 422 res.counts["A"].reverse() 423 res.counts["C"].reverse() 424 res.counts["G"].reverse() 425 res.counts["T"].reverse() 426 res.length=self.length 427 res.mask = self.mask 428 return res
429 430
431 - def _from_jaspar_pfm(self,stream,make_instances=False):
432 """ 433 reads the motif from Jaspar .pfm file 434 435 The instances are fake, but the pwm is accurate. 436 """ 437 return self._from_horiz_matrix(stream,letters="ACGT",make_instances=make_instances)
438
439 - def _from_vert_matrix(self,stream,letters=None,make_instances=False):
440 """reads a vertical count matrix from stream and fill in the counts. 441 """ 442 443 self.counts = {} 444 self.has_counts=True 445 if letters==None: 446 letters=self.alphabet.letters 447 self.length=0 448 for i in letters: 449 self.counts[i]=[] 450 for ln in stream.readlines(): 451 rec=map(float,ln.strip().split()) 452 for k,v in zip(letters,rec): 453 self.counts[k].append(v) 454 self.length+=1 455 self.set_mask("*"*self.length) 456 if make_instances==True: 457 self.make_instances_from_counts() 458 return self
459
460 - def _from_horiz_matrix(self,stream,letters=None,make_instances=False):
461 """reads a horizontal count matrix from stream and fill in the counts. 462 """ 463 if letters==None: 464 letters=self.alphabet.letters 465 self.counts = {} 466 self.has_counts=True 467 468 for i in letters: 469 ln = stream.readline().strip().split() 470 #if there is a letter in the beginning, ignore it 471 if ln[0]==i: 472 ln=ln[1:] 473 #print ln 474 try: 475 self.counts[i]=map(int,ln) 476 except ValueError: #not integers 477 self.counts[i]=map(float,ln) #map(lambda s: int(100*float(s)),ln) 478 #print counts[i] 479 480 s = sum(self.counts[nuc][0] for nuc in letters) 481 l = len(self.counts[letters[0]]) 482 self.length=l 483 self.set_mask("*"*l) 484 if make_instances==True: 485 self.make_instances_from_counts() 486 return self
487 488
489 - def make_instances_from_counts(self):
490 """Creates "fake" instances for a motif created from a count matrix. 491 492 In case the sums of counts are different for different columnes, the 493 shorter columns are padded with background. 494 """ 495 alpha="".join(self.alphabet.letters) 496 #col[i] is a column taken from aligned motif instances 497 col=[] 498 self.has_instances=True 499 self.instances=[] 500 s = sum(map(lambda nuc: self.counts[nuc][0],self.alphabet.letters)) 501 for i in range(self.length): 502 col.append("") 503 for n in self.alphabet.letters: 504 col[i] = col[i]+ (n*(self.counts[n][i])) 505 if len(col[i])<s: 506 print "WARNING, column too short",len(col[i]),s 507 col[i]+=(alpha*s)[:(s-len(col[i]))] 508 #print i,col[i] 509 #iterate over instances 510 for i in range(s): 511 inst="" #start with empty seq 512 for j in range(self.length): #iterate over positions 513 inst+=col[j][i] 514 #print i,inst 515 inst=Seq(inst,self.alphabet) 516 self.add_instance(inst) 517 return self.instances
518
519 - def make_counts_from_instances(self):
520 """Creates the count matrix for a motif with instances. 521 522 """ 523 #make strings for "columns" of motifs 524 #col[i] is a column taken from aligned motif instances 525 counts={} 526 for a in self.alphabet.letters: 527 counts[a]=[] 528 self.has_counts=True 529 s = len(self.instances) 530 for i in range(self.length): 531 ci = dict((a,0) for a in self.alphabet.letters) 532 for inst in self.instances: 533 ci[inst[i]]+=1 534 for a in self.alphabet.letters: 535 counts[a].append(ci[a]) 536 self.counts=counts 537 return counts
538
539 - def _from_jaspar_sites(self,stream):
540 """ 541 reads the motif from Jaspar .sites file 542 543 The instances and pwm are OK. 544 """ 545 546 while True: 547 ln = stream.readline()# read the header "$>...." 548 if ln=="" or ln[0]!=">": 549 break 550 551 ln=stream.readline().strip()#read the actual sequence 552 i=0 553 while ln[i]==ln[i].lower(): 554 i+=1 555 inst="" 556 while i<len(ln) and ln[i]==ln[i].upper(): 557 inst+=ln[i] 558 i+=1 559 inst=Seq(inst,self.alphabet) 560 self.add_instance(inst) 561 562 self.set_mask("*"*len(inst)) 563 return self
564 565
566 - def __getitem__(self,index):
567 """Returns the probability distribution over symbols at a given position, padding with background. 568 569 If the requested index is out of bounds, the returned distribution comes from background. 570 """ 571 if index in range(self.length): 572 return self.pwm()[index] 573 else: 574 return self.background
575
576 - def consensus(self):
577 """Returns the consensus sequence of a motif. 578 """ 579 res="" 580 for i in range(self.length): 581 max_f=0 582 max_n="X" 583 for n in sorted(self[i]): 584 if self[i][n]>max_f: 585 max_f=self[i][n] 586 max_n=n 587 res+=max_n 588 return Seq(res,self.alphabet)
589
590 - def anticonsensus(self):
591 """returns the least probable pattern to be generated from this motif. 592 """ 593 res="" 594 for i in range(self.length): 595 min_f=10.0 596 min_n="X" 597 for n in sorted(self[i]): 598 if self[i][n]<min_f: 599 min_f=self[i][n] 600 min_n=n 601 res+=min_n 602 return Seq(res,self.alphabet)
603
604 - def max_score(self):
605 """Maximal possible score for this motif. 606 607 returns the score computed for the consensus sequence. 608 """ 609 return self.score_hit(self.consensus(),0)
610
611 - def min_score(self):
612 """Minimal possible score for this motif. 613 614 returns the score computed for the anticonsensus sequence. 615 """ 616 return self.score_hit(self.anticonsensus(),0)
617
618 - def weblogo(self,fname,format="PNG",**kwds):
619 """ 620 uses the Berkeley weblogo service to download and save a weblogo of itself 621 622 requires an internet connection. 623 The parameters from **kwds are passed directly to the weblogo server. 624 """ 625 import urllib 626 import urllib2 627 al= self._to_fasta() 628 url = 'http://weblogo.berkeley.edu/logo.cgi' 629 values = {'sequence' : al, 630 'format' : format, 631 'logowidth' : '18', 632 'logoheight' : '5', 633 'logounits' : 'cm', 634 'kind' : 'AUTO', 635 'firstnum' : "1", 636 'command' : 'Create Logo', 637 'smallsamplecorrection' : "on", 638 'symbolsperline' : 32, 639 'res' : '96', 640 'res_units' : 'ppi', 641 'antialias' : 'on', 642 'title' : '', 643 'barbits' : '', 644 'xaxis': 'on', 645 'xaxis_label' : '', 646 'yaxis': 'on', 647 'yaxis_label' : '', 648 'showends' : 'on', 649 'shrink' : '0.5', 650 'fineprint' : 'on', 651 'ticbits' : '1', 652 'colorscheme' : 'DEFAULT', 653 'color1' : 'green', 654 'color2' : 'blue', 655 'color3' : 'red', 656 'color4' : 'black', 657 'color5' : 'purple', 658 'color6' : 'orange', 659 'color1' : 'black', 660 } 661 for k,v in kwds.iteritems(): 662 values[k]=str(v) 663 664 data = urllib.urlencode(values) 665 req = urllib2.Request(url, data) 666 response = urllib2.urlopen(req) 667 f=open(fname,"w") 668 im=response.read() 669 670 f.write(im) 671 f.close()
672 673
674 - def _to_transfac(self):
675 """Write the representation of a motif in TRANSFAC format 676 """ 677 res="XX\nTY Motif\n" #header 678 try: 679 res+="ID %s\n"%self.name 680 except: 681 pass 682 res+="BF undef\nP0" 683 for a in self.alphabet.letters: 684 res+=" %s"%a 685 res+="\n" 686 if not self.has_counts: 687 self.make_counts_from_instances() 688 for i in range(self.length): 689 if i<9: 690 res+="0%d"%(i+1) 691 else: 692 res+="%d"%(i+1) 693 for a in self.alphabet.letters: 694 res+=" %d"%self.counts[a][i] 695 res+="\n" 696 res+="XX\n" 697 return res
698
699 - def _to_vertical_matrix(self,letters=None):
700 """Return string representation of the motif as a matrix. 701 702 """ 703 if letters==None: 704 letters=self.alphabet.letters 705 self._pwm_is_current=False 706 pwm=self.pwm(laplace=False) 707 res="" 708 for i in range(self.length): 709 res+="\t".join([str(pwm[i][a]) for a in letters]) 710 res+="\n" 711 return res
712
713 - def _to_horizontal_matrix(self,letters=None,normalized=True):
714 """Return string representation of the motif as a matrix. 715 716 """ 717 if letters==None: 718 letters=self.alphabet.letters 719 res="" 720 if normalized: #output PWM 721 self._pwm_is_current=False 722 mat=self.pwm(laplace=False) 723 for a in letters: 724 res+="\t".join([str(mat[i][a]) for i in range(self.length)]) 725 res+="\n" 726 else: #output counts 727 if not self.has_counts: 728 self.make_counts_from_instances() 729 mat=self.counts 730 for a in letters: 731 res+="\t".join([str(mat[a][i]) for i in range(self.length)]) 732 res+="\n" 733 return res
734
735 - def _to_jaspar_pfm(self):
736 """Returns the pfm representation of the motif 737 """ 738 return self._to_horizontal_matrix(normalized=False,letters="ACGT")
739
740 - def format(self,format):
741 """Returns a string representation of the Motif in a given format 742 743 Currently supported fromats: 744 - jaspar-pfm : JASPAR Position Frequency Matrix 745 - transfac : TRANSFAC like files 746 - fasta : FASTA file with instances 747 """ 748 749 formatters={ 750 "jaspar-pfm": self._to_jaspar_pfm, 751 "transfac": self._to_transfac, 752 "fasta" : self._to_fasta, 753 } 754 755 try: 756 return formatters[format]() 757 except KeyError: 758 raise ValueError("Wrong format type")
759
760 - def scanPWM(self,seq):
761 """Matrix of log-odds scores for a nucleotide sequence. 762 763 scans a nucleotide sequence and returns the matrix of log-odds 764 scores for all positions. 765 766 - the result is a one-dimensional list or numpy array 767 - the sequence can only be a DNA sequence 768 - the search is performed only on one strand 769 """ 770 if self.alphabet!=IUPAC.unambiguous_dna: 771 raise ValueError("Wrong alphabet! Use only with DNA motifs") 772 if seq.alphabet!=IUPAC.unambiguous_dna: 773 raise ValueError("Wrong alphabet! Use only with DNA sequences") 774 775 seq = seq.tostring() 776 777 # check if the fast C code can be used 778 try: 779 import _pwm 780 except ImportError: 781 # use the slower Python code otherwise 782 return self._pwm_calculate(seq) 783 784 # get the log-odds matrix into a proper shape 785 # (each row contains sorted (ACGT) log-odds values) 786 logodds=[[y[1] for y in sorted(x.items())] for x in self.log_odds()] 787 return _pwm.calculate(seq, logodds)
788
789 - def _pwm_calculate(self, sequence):
790 logodds = self.log_odds() 791 m = len(logodds) 792 s = len(sequence) 793 n = s - m + 1 794 result = [None] * n 795 for i in xrange(n): 796 score = 0.0 797 for j in xrange(m): 798 c = sequence[i+j] 799 temp = logodds[j].get(c) 800 if temp==None: 801 break 802 score += temp 803 else: 804 result[i] = score 805 return result
806