# Code et données pour le projet du cours MATH1222 # Classe simple pour représenter un arbre phylogénétique (noeud) # Pour visualiser un arbre de racine root, vous pouvez utiliser # root.toNewick() pour générer une représenter newick de l'arbre que # vous pouvez visualiser par exemple sur le site https://beta.phylo.io/ class PTNode: def __init__(self, name, length, treeleft=None, treeright=None): self.length = length self.name = name self.parent = None self.left = None self.right = None if treeleft != None: self.left = treeleft self.left.parent = self if treeright != None: self.right = treeright self.right.parent = self def toNewick(self): """output a newick representation of the tree""" namestr = "" if self.name != None: namestr += self.name if self.length != None: namestr += ":" + str(self.length) if self.left == None: return namestr if self.parent is None: namestr += ";" return "(" + self.left.toNewick() + "," + self.right.toNewick() + ")" + namestr # Données pour les différentes sous-questions # Partie 1 # ======== # 2) Matrice de transition d'une chaîne réversible Prev = [ [0.8860516969572614, 0.04737184130589218, 0.03104758075742727, 0.03552888097941914], [0.011842960326473046, 0.91799183092612, 0.023685920652946092, 0.04647928809446081], [0.01552379037871363, 0.04737184130589217, 0.901575487335975, 0.035528880979419135], [0.011842960326473044, 0.061972384125947765, 0.02368592065294609, 0.9024987348946332] ] # Partie 2 # ======== # Question 1 tree_P2Q1a = PTNode("ROOT", None, PTNode("S1", 1), PTNode("S2", 4)) print("tree_P2Q1a: ", tree_P2Q1a.toNewick()) tree_P2Q1b = PTNode("ROOT", None, PTNode("S1", 3), PTNode("S2", 2)) print("tree_P2Q1b: ", tree_P2Q1b.toNewick()) sequences_P2Q1 = {"S1": "ccat", "S2": "ccgt"} # Question 2 tree_P2Q2 = PTNode( "ROOT", None, PTNode( "N3", 2, PTNode( "N2", 6, PTNode("N1", 2, PTNode("S1", 1), PTNode("S2", 4)), PTNode("S3", 1) ), PTNode("S4", 9), ), PTNode("S5", 9), ) print("tree_P2Q2: ", tree_P2Q2.toNewick()) sequences_P2Q2 = { "S1": "aatta", "S2": "aatca", "S3": "aacca", "S4": "aaaca", "S5": "aacca", } # Question 3 d = 4 tree_P2Q3a = PTNode( None, None, PTNode("Chimp", 25), PTNode(None, d, PTNode("Human", 25 - d), PTNode("Gorilla", 25 - d)), ) print("tree_P2Q3a: ", tree_P2Q3a.toNewick()) tree_P2Q3b = PTNode( None, None, PTNode("Gorilla", 25), PTNode(None, d, PTNode("Human", 25 - d), PTNode("Chimp", 25 - d)), ) print("tree_P2Q3b: ", tree_P2Q3b.toNewick()) sequences_P2Q3 = { "Gorilla": "tttttttaggtcttcctgacacaaaaaccgcgccacctggtattgtgcgttaactcgctgggtagttaactgacgcactacgctcacatggttttgtaaa", "Human": "ctggatgatgccacacctaaattaaagatgagttgttgagtaaactggttcagttccccacgcggttatctagtgcccaactttcgagagggacgattgg", "Chimp": "gagttttagggcctagctaggctaggtccgaattgacgagcgaactggctcacttaatcagggaataatccagtgcccaaccctcgagaggtagggtgag", } # Partie 3 # ======== # L'outgroup est la séquence "Virus0". Le nom de la séquence est la ville # dans laquelle le variant du virus a été trouvé. sequences_P3 = { "Virus0": "cattgagattgccttataccgctgcccccacttgttttgctcccttttgagtcaacgtagttgacgagggtggtatacgatgtacgagccgagtgcgaccgtcctttctgtccctagccacaatggttgaagcatcagcatactggaggtcaatagaaccatccaacgcaatcgctagttcttgtatacgtcgacgataa", "Alger": "aatccgaagagctctactgcaccactgctacttatattgcgccagtctggatggactttaattccggagtacctatgctgcgtaccagacgagtccggcggtacctttgatcaccgggcgcgatggtcgaagcattactggagcgaaggactacgggattaccttatgctcttgttggagaccggagggtcagccgacta", "Amsterdam": "cattgggatggcatgataccggttccttcacttattctgcgccaatgtggatcgacgaagaattcggcatcagtctgtgccgtgccagccgtgtgcggcagttcttcccgtccctagacacatcgcttgaagcatctccgtaacgaacgaccatagaatcaacctgagcccgcgctagttcctagagatttcgacgataa", "Belfast": "cattcctattgcttgagggcggatcccccccttcgggcccgcccgcgcaaaccgacttaggtgtaggaggtaggaagtgacgcgccatacgtgtgaggcagatctttcggggcttacgccaaatggctggttcctcccagcatcgaaggggaatagtctcaccgagaactcgcgctactttccagatatatcggcgatta", "Bologne": "cacgagaatggctttttatgggctctgctacttattttgcgcaagtgcagatggattcggatgccggagtcagtatgcagcgcactagcggagtgcggcggcccatgcgagcaccgaacgcaatggtcgaagcactagcgaaaagaaggaatccacgataatctattccccgcgctaatctctagagattgcgtcgataa", "Bruxelles": "cattgggatgactttataccggttccgccacttattttgcgccaatgtgggtctacgtagatgtcggcgtcagtctgtgacgtgccagccgtctgcggcgctcctttccgtccctaggcgcaatgattgaagcatcaccgtaacgaaggaccatagaatcatgcagagcccgcgctagttcctggatatttcgacgataa", "Casablanca": "aatgctagaacctctatagcacctctgctagtgataatgcgtctctctcgatggactttaatgcgggagaattgatgcgacggacccgacgagtgccagggtccctagcagaaccgaacgcgatggtcgagacattaccacgaggaagtagaatccggtaatccagtgctttcggtcgcgagtgggaagtcagccgatga", "Cork": "cattcaaattgcctgagggagggtcccccccttcgggcccgtcagcgcaacccgagttgggtgtcagaggtgggaagtgatgcgccatacgtgtggggcggatctttagggccttacgccaaatggtaggatcaggccagtatcggaaggggaccgtcccaccgagaactcgcgctactttccagatatttcgctgatta", "Dublin": "cattcaaattgcttgagaccggatcccctccttcgggcccgctagcgcaactcgacttgggtgtcggaggtgggaagtgacgcgccatacgtgtgaggcggggctttcgggccttacgccaaatgattggatcagcccagtatcggagggggatagactcaccgagaactcgtgctactttccagatatatcggcgatta", "Edimbourg": "cattcagattgttttatatcgaatcccctccctcggtcccgctcgggatggtcgacttgggtgtctgggcgagtccgtgacgcgccatacgtgtgatgctgtgctttcgggtcccatgtgaaatgcgttaatcattacagtacctgggagcgatagacccgtcgagaacctgcgctagttcccaggtatattggcgataa", "Glasgow": "cattgagagtgctttgtatcggatcccctccttaggtcccgctcatgaaggtcaacttgggtttctgggcgagtccgtgacgtgccatacgtgtggtgctgtgcgttcgagtcctatgtaaaatgagtgaatcatcacagtaactgggagcgatagaatcgttgagaacctgcacttgttcttaggtatatcggcgatag", "Lilles": "cattgggatggttgtctgtgggtttctccatttgttttgcgctcgtatgggtccacgtagaggccgcaatcagcctgtgacgtgccagccgtgcgtggtggtcgaggacgtccatggacgcaaaggccgaagcgccaccgaaacgaaggaccatagcaccatccagtacccgcgctagttcccagatatttaggggacaa", "Ljubljana": "catgcaaatggctttatcccggctctgcagcttattgtgcgcaaatgcagatggactaggatgcccgagtctgtatgtcgagtactagcgaagtgcggcggaccttgcgagccccaaacacaatggtcgaagcataagcggaacgctggaacacaggatcatccgctccccgcgctaatctctagagattgcgtcgatta", "Londres": "cactaagatcactttaccttggttccttcacttagaaacaccgcatgtatgtcgacttagttgtcgggggaggtcagcgatccgccagtactgcgcggccagtctttcagttcccaggggcaatgattgtagcgtgactgtaacggaggtccacggaatcctccagagcccgcgctctttccttgatatagcggcgataa", "Manchester": "tattgagattactttacatcggttccctcacttaagaaccacccatgcagacaaacttagttggcgggggtggtctgcgataggccagtcgtgtgcggccgttatctcagttctgaggagcaaagagtggagcgataccatactgtagcgtcacagaatcatccaccgcccgcgctaactccttgatatagcggcgaaaa", "Marrakech": "aatactagaacctctatcgcacctctgctagtggtaatgcgtctctctcgatggtctttaatgcgggagaattgatgcgtcggacccgacgcgtgccagggtccctaggagaaccgaactcgatggtcgagacatcaccaggaggaagtagaatccggtactccagagctttccgttgcgagtggggagtctgccgatga", "Marseille": "cattggaacggctttatatcacctcggctactcattttgcgcccttggggatggacgtggatgccggtgtcactatgtgacgtgcgagccgagtgcggcggtcgttgccgtctccggacgcaatggtggaagcatcaccgaaacgaaggaatataggactatccagtccccgcgcaagtttccagagatttcgccgatta", "Milan": "catgggaatggctttatatcggctctaccacttattttgcgtcagtgcagatggacttggatgccggagtcagtatgcggcgtactagccgagcgcgccggtccctgcgatcaccggacgctatggtcggagcattaccgaaacgaaggaatataggatcatctactccctgcgctagcctctatacattacgccgatta", "Paris": "catcggaatggctttctattacctcggctacttattttgcgcccgtgcggatagacgtggatgccggagtcagtatgtgacgtgccagccgagttcggcagtccttagcgtcaccgaacgcaatggtcgaagcatcaccgaaacgaagggatataggatgatccagtcaccgcgttagttcctagagatttcgccgatta", "Tunis": "aatgcgagaacctttatagcatctctgttagtgataacgcgtcactctggatgggctctaatgcgcgagaattgatagggcggaccagacgagtgcaagggtcccttcgagagcggaacgcaatggtccagacgttaccgaaacgaaggaaaattggggaattcaatgctttcggtgtttagcggagagtccgccgatga", "Venise": "cacgagaatggcttgatatgggctctgctacttattttgcgccagtgcagatggactaggctgccggagtcagtatgcagcgtactagcggagtgcggcggcccgtgcgcgcactgaacgcaatgttcgaagcactagcgaaaccaaggaataaaggataatctattccccgcgctaatctctagagattgcgtcgatta", }