Source code for bg.tree

# -*- coding: utf-8 -*-
from collections import deque
from copy import deepcopy

from ete3 import Tree

from bg.genome import BGGenome
from bg.multicolor import Multicolor

__author__ = "Sergey Aganezov"
__email__ = "aganezov(at)cs.jhu.edu"
__status__ = "production"

# defines a default edge length in phylogenetic tree
DEFAULT_EDGE_LENGTH = 1


[docs]class BGTree(object): """ Class that is designed to store information about phylogenetic information and relations between multiple genomes Class utilizes a ete3.Tree object as an internal storage This tree can store information about: * edge lengths * tree topology """ # class defined variables that are used as keys when storing edge specific data in the edge attribute dicts edge_length_attribute_name = "edge_length"
[docs] def __init__(self, newick=None, newick_format=1, dist=DEFAULT_EDGE_LENGTH, leaf_wrapper=BGGenome): self.tree = Tree(newick=newick, format=newick_format, dist=dist) self.__root = self.tree self.__leaf_wrapper = leaf_wrapper # a callable, that would be called with leaf name as an argument for Multicolor class self.multicolors_are_up_to_date = False self.__tree_consistent_multicolors_set = {Multicolor().hashable_representation} self.__tree_consistent_multicolors = [Multicolor()] self.__vtree_consistent_multicolors_set = {Multicolor().hashable_representation} self.__vtree_consistent_multicolors = [Multicolor()]
[docs] def nodes(self): """ Proxies iteration to the underlying Tree.iter_descendants iterator, but first yielding a root element :return: iterator over all descendants of a root, starting with a root, in current tree :rtype: iterator """ yield self.__root for entry in self.__root.iter_descendants(): yield entry
[docs] def edges(self): """ :return: iterator over edges in current tree. :rtype: iterator """ if self.__root is None: return nodes = deque([self.__root]) while len(nodes) > 0: current_node = nodes.popleft() if current_node.is_leaf(): continue else: for child in current_node.children: yield current_node, child if not child.is_leaf() else self.__leaf_wrapper(child.name) if not child.is_leaf(): nodes.append(child)
[docs] def add_edge(self, node1_name, node2_name, edge_length=DEFAULT_EDGE_LENGTH): """ Adds a new edge to the current tree with specified characteristics Forbids addition of an edge, if a parent node is not present Forbids addition of an edge, if a child node already exists :param node1_name: name of the parent node, to which an edge shall be added :param node2_name: name of newly added child node :param edge_length: a length of specified edge :return: nothing, inplace changes :raises: ValueError (if parent node IS NOT present in the tree, or child node IS already present in the tree) """ if not self.__has_node(name=node1_name): raise ValueError("Can not add an edge to a non-existing node {name}".format(name=node1_name)) if self.__has_node(name=node2_name): raise ValueError("Can not add an edge to already existing node {name}".format(name=node2_name)) self.multicolors_are_up_to_date = False self.__get_node_by_name(name=node1_name).add_child(name=node2_name, dist=edge_length)
[docs] def get_node_by_name(self, name): """ Proxies the call to the __get_node_by_name method """ return self.__get_node_by_name(name=name)
def __get_node_by_name(self, name): """ Returns a first TreeNode object, which name matches the specified argument :raises: ValueError (if no node with specified name is present in the tree) """ try: for entry in filter(lambda x: x.name == name, self.nodes()): return entry except StopIteration: raise ValueError("Attempted to retrieve a non-existing tree node with name: {name}" "".format(name=name)) def __has_edge(self, node1_name, node2_name, account_for_direction=True): """ Returns a boolean flag, telling if a tree has an edge with two nodes, specified by their names as arguments If a account_for_direction is specified as True, the order of specified node names has to relate to parent - child relation, otherwise both possibilities are checked """ try: p1 = self.__get_node_by_name(name=node1_name) wdir = node2_name in (node.name for node in p1.children) if account_for_direction: return wdir else: p2 = self.__get_node_by_name(name=node2_name) return wdir or node1_name in (node.name for node in p2.children) except ValueError: return False
[docs] def has_edge(self, node1_name, node2_name, account_for_direction=True): """ Proxies a call to the __has_edge method """ return self.__has_edge(node1_name=node1_name, node2_name=node2_name, account_for_direction=account_for_direction)
def __has_node(self, name): """ Check is the current Tree has a node, matching by name to the specified argument """ result = self.__get_node_by_name(name=name) return result is not None
[docs] def has_node(self, name): """ Proxies a call to __has_node method """ return self.__has_node(name=name)
@property def root(self): """ A property based call for the root pointer in current tree """ return self.__root
[docs] def get_distance(self, node1_name, node2_name): """ Returns a length of an edge / path, if exists, from the current tree :param node1_name: a first node name in current tree :param node2_name: a second node name in current tree :return: a length of specified by a pair of vertices edge / path :rtype: `Number` :raises: ValueError, if requested a length of an edge, that is not present in current tree """ return self.__root.get_distance(target=node1_name, target2=node2_name)
def __vertex_is_leaf(self, node_name): """ Checks if a node specified by its name as an argument is a leaf in the current Tree :raises: ValueError (if no node with specified name is present in the tree) """ return self.__get_node_by_name(name=node_name).is_leaf() def __get_v_tree_consistent_leaf_based_hashable_multicolors(self): """ Internally used method, that recalculates VTree-consistent sets of leaves in the current tree """ result = [] nodes = deque([self.__root]) while len(nodes) > 0: current_node = nodes.popleft() children = current_node.children nodes.extend(children) if not current_node.is_leaf(): leaves = filter(lambda node: node.is_leaf(), current_node.get_descendants()) result.append(Multicolor(*[self.__leaf_wrapper(leaf.name) for leaf in leaves])) else: result.append(Multicolor(self.__leaf_wrapper(current_node.name))) result.append(Multicolor()) return result
[docs] def get_tree_consistent_multicolors(self): """ Returns a copy of the list of T-consistent multicolors from current tree """ return deepcopy(self.tree_consistent_multicolors)
[docs] def get_vtree_consistent_multicolors(self): """ Returns a copy of the list of VT-consistent multicolors from current tree """ return deepcopy(self.vtree_consistent_multicolors)
def __update_consistent_multicolors(self): """ Internally used method, that recalculates T-consistent / VT-consistent multicolors for current tree topology """ v_t_consistent_multicolors = self.__get_v_tree_consistent_leaf_based_hashable_multicolors() hashed_vtree_consistent_leaves_multicolors = {mc.hashable_representation for mc in v_t_consistent_multicolors} self.vtree_consistent_multicolors_set = hashed_vtree_consistent_leaves_multicolors self.vtree_consistent_multicolors = [Multicolor(*hashed_multicolor) for hashed_multicolor in hashed_vtree_consistent_leaves_multicolors] result = [] # T-consistent multicolors can be viewed as VT-consistent multicolors united with all of their complements full_multicolor = v_t_consistent_multicolors[0] for multicolor in v_t_consistent_multicolors: result.append(multicolor) result.append(full_multicolor - multicolor) hashed_tree_consistent_leaves_multicolors = {mc.hashable_representation for mc in result} self.tree_consistent_multicolors_set = hashed_tree_consistent_leaves_multicolors self.tree_consistent_multicolors = [Multicolor(*hashed_multicolor) for hashed_multicolor in hashed_tree_consistent_leaves_multicolors] self.multicolors_are_up_to_date = True @property def tree_consistent_multicolors(self): """ Property based getter, that checks for consistency in terms of precomputed T-consistent multicolors, recomputes all consistent multicolors if tree topology has changed and returns internally stored list of T-consistent multicolors """ if not self.multicolors_are_up_to_date: self.__update_consistent_multicolors() return self.__tree_consistent_multicolors @property def tree_consistent_multicolors_set(self): """ Property based getter, that checks for consistency in terms of precomputed T-consistent multicolors, recomputes all consistent multicolors if tree topology has changed and returns internally stored set of hashable representation of T-consistent multicolors """ if not self.multicolors_are_up_to_date: self.__update_consistent_multicolors() return self.__tree_consistent_multicolors_set @tree_consistent_multicolors.setter def tree_consistent_multicolors(self, value): self.__tree_consistent_multicolors = value @tree_consistent_multicolors_set.setter def tree_consistent_multicolors_set(self, value): self.__tree_consistent_multicolors_set = value @property def vtree_consistent_multicolors(self): """ Property based getter, that checks for consistency in terms of precomputed VT-consistent multicolors, recomputes all consistent multicolors if tree topology has changed and returns internally stored list of VT-consistent multicolors """ if not self.multicolors_are_up_to_date: self.__update_consistent_multicolors() return self.__vtree_consistent_multicolors @property def vtree_consistent_multicolors_set(self): """ Property based getter, that checks for consistency in terms of precomputed VT-consistent multicolors, recomputes all consistent multicolors if tree topology has changed and returns internally stored set of hashable representation of VT-consistent multicolors """ if not self.multicolors_are_up_to_date: self.__update_consistent_multicolors() return self.__vtree_consistent_multicolors_set @vtree_consistent_multicolors.setter def vtree_consistent_multicolors(self, value): self.__vtree_consistent_multicolors = value @vtree_consistent_multicolors_set.setter def vtree_consistent_multicolors_set(self, value): self.__vtree_consistent_multicolors_set = value
[docs] def multicolor_is_tree_consistent(self, multicolor): """ Checks is supplied multicolor is T-consistent """ return multicolor.hashable_representation in self.tree_consistent_multicolors_set
[docs] def multicolor_is_vtree_consistent(self, multicolor): """ Checks is supplied multicolor is VT-consistent """ return multicolor.hashable_representation in self.vtree_consistent_multicolors_set
[docs] def bgedge_is_vtree_consistent(self, bgedge): """ Checks is supplied BGEdge (from the perspective of its multicolor is VT-consistent) """ return self.multicolor_is_vtree_consistent(bgedge.multicolor)
[docs] def bgedge_is_tree_consistent(self, bgedge): """ Checks is supplied BGEdge (from the perspective of its multicolor is T-consistent) """ return self.multicolor_is_tree_consistent(bgedge.multicolor)
[docs] def append(self, node_name, tree, copy=False): """ Append a specified tree (represented by a root TreeNode element) to the node, specified by its name :param copy: a flag denoting if the appended tree has to be added as is, or is the deepcopy of it has to be used :type copy: Boolean :raises: ValueError (if no node with a specified name, to which the specified tree has to be appended, is present in the current tree) """ self.multicolors_are_up_to_date = False tree_to_append = tree.__root if not copy else deepcopy(tree.__root) self.__get_node_by_name(node_name).add_child(tree_to_append)