#!/usr/bin/env python #Author: Shriphani Palakodety (spalakod@cs.purdue.edu) from math import sqrt, factorial def combs(num1, num2): '''Return the value of num1 choose num2''' return float(factorial(num1)) / (factorial(num1-num2) * factorial(num2)) class Protein: '''Describes the protein''' def __init__(self, vector): self.vector = vector #stores the vector representation of the protein's functions def __str__(self): return str(self.vector) def similarityMeasure(self, protein2): '''Takes a protein for input and determines the similarity between this protein and that''' similarity = 0 for i in self.vector: if i in protein2.vector: similarity += 1 return float(similarity)/(sqrt(len(self.vector)) * sqrt(len(protein2.vector))) class ProteinSet: '''Describes a set of proteins''' def __init__(self): self.proteins = [] self.coherence_scr = 0 #self.setCoherenceScore() self.core = [] self.similarityCritereon1(self) def __str__(self): print "List of proteins in this set: " for prot in self.proteins: print "-> ", prot return "" def addProtein(self, protein): '''Add a protein to this set''' self.proteins.append(protein) self.setCoherenceScore() self.similarityCritereon1(self) def removeProtein(self, protein): '''Remove a protein from this set''' self.proteins.pop(self.proteins.index(protein)) self.setCoherenceScore() self.similarityCritereon1(self) def setCoherenceScore(self): '''Set the coherence score of the set''' self.coherence_scr = 0 if len(self.proteins) <= 1: self.coherence_scr = 1 return for i in xrange(len(self.proteins)): for j in xrange(i+1, len(self.proteins)): self.coherence_scr += self.proteins[i].similarityMeasure(self.proteins[j]) self.coherence_scr = 2 * float(self.coherence_scr)/(len(self.proteins) * (len(self.proteins) - 1)) def similarityMeasureSet(self, protein): '''Given a protein and a set, this function returns the similarity between the protein and the set''' similarity = 0 set_prot_list = self.proteins for prot in set_prot_list: similarity += protein.similarityMeasure(prot) return float(similarity)/len(set_prot_list) def similarityCritereon1(self, set): '''Given a set, use the first criterion to figure out the similarity''' prots = 0 if self.proteins == set.proteins: self.core = [] for prot in set.proteins: if self.similarityMeasureSet(prot) >= self.coherence_scr: prots += 1 self.core.append(prot) else: for prot in set.proteins: if self.similarityMeasureSet(prot) >= self.coherence_scr: prots += 1 return prots def similarityCritereon2(self, set): '''Given a set, use the second criterion to figure out the similarity''' prots = 0 for prot in set.proteins: for set_prot in self.core: print str(prot) + "," + str(set_prot) if set_prot.similarityMeasure(prot) >= self.coherence_scr: prots += 1 return prots def similarityCritereon3(self, set): '''Given a set, use the third critereon to figure out the similarity''' prots = 0 for prot in set.proteins: for set_prot in self.proteins: if set_prot.similarityMeasure(prot) >= self.coherence_scr: prots += 1 return prots def getPValue(num_prots, set1_num, set2_num): p_value = 0 for i in xrange(set1_num): p_value += combs(num_prots) * combs((set2_num - num_prots), (set1_num -i)) / float(combs(set2_num, set1_num)) return p_value #Creating a bunch of proteins and so on for "fun" and "entertainment" prot1 = Protein(['f1', 'f2', 'f3']) prot2 = Protein(['f1', 'f2', 'f3']) #print prot1.similarityMeasure(prot2) S = ProteinSet() S.addProtein(prot1) S.addProtein(prot2) print S.similarityMeasureSet(prot2) #print S #print S.coherence_scr prot3 = Protein(['f1', 'f2']) #compare the similarities between prot3 and the set S #print S.similarityMeasureSet(prot3) #here onwards we begin implementing the core and so on. print len(S.core) for prot in S.core: print prot R = ProteinSet() R.addProtein(prot3) prot4 = Protein(['f1', 'f2', 'f3']) prot5 = Protein(['f1', 'f5', 'f9']) prot6 = Protein(['f1', 'f3', 'f7']) R.addProtein(prot4) R.addProtein(prot5) R.addProtein(prot6) print R print S.similarityCritereon1(R) print S.similarityCritereon2(R) print S.similarityCritereon3(R)