Phylogenetic tree of CYP enzymes in gammaproteobacteria

The data has been published in: N.W. Msomi et al., “In Silico Analysis of P450s and Their Role in Secondary Metabolism in the Bacterial Class Gammaproteobacteria” Molecules 2021 26(6):1538

from browser import document, html
from javascript import JSON

from visualife.core import Plot, HtmlViewport
from visualife.widget import AjaxCommunication, TooltipWidget
from visualife.data import tree_from_dictionary
from visualife.core.styles import make_darker, make_brighter
from visualife.diagrams import CircularDendrogram, TreeNode


width, height = 500, 500
root = None                                 # --- root of the node that is drawn
leaves = []                                 # --- leaf nodes i.e. the actual proteins

# ---------- Generated with http://vrl.cs.brown.edu/color
palette = ['#56ebd3', '#074d65', '#bfd6fa', '#2b72e7', '#710c9e', '#f79dcc', '#834c77', '#ab5cf6', '#9ae871', '#056e12', '#2da0a1', '#cadba5', '#84241a', '#fb2d4c', '#f97930', '#bd854a', '#dfd945', '#52461f', '#36e515', '#75859d']
color_for_family = {}                       # --- provide a color (as string) for a  CYP name (e.g. "1157")
family_for_index = {}                       # --- provide CYP family number (as string) for a node id (as string)


def receive_tree(evt):

    global root, leaves, color_for_family, family_for_index
    # ---------- Parse the tree that came in JSON format (you can use VL to convert newick to json)
    root = tree_from_dictionary(JSON.parse(evt.text))

    # ---------- Assign family ID based on CYP id, e.g. CYP153A13 => 153
    leaves = TreeNode.collect_leaves(root)
    cyp_names = []                                  # --- holds names of all sequences
    for node in leaves:
        node.group_id = cyp_family(node.value)
        cyp_names.append(node.group_id)
        family_for_index[node.id] = node.group_id

    # ---------- Count how many times each CYP appears in a tree
    cyp_names = sorted([(cyp_names.count(f), f) for f in set(cyp_names)], reverse=True)
    top_families = [cyp_names[i][1] for i in range(20)]             # --- 20 largest families
    # ---------- Assign a color to each of the 20 most populated families
    color_for_family = {family: color for family, color in zip(top_families, palette)}

    vp = HtmlViewport(document['svg-tree'], width, height, download_button=True)
    drawing = CircularDendrogram("tree", vp, width, height, width=3, height=3, separation=1)
    drawing.x_margin = 10
    drawing.y_margin = 10
    drawing.draw(root, scale_edges=True, node_size=2, node_stroke_width=0.2, node_color="grey", node_stroke="black",
                 leaf_size=3, leaf_color=color_leaves, leaf_stroke="brighter", leaf_stroke_width=0.3,
                 leaves_on_axis=False, draw_labels=False,
                 edge_width=0.3, arc=350, rotation=270, edge_color="black")
    draw_legend(vp, width*0.75, 50, 4)
    vp.close()

    document["tree:leaves"].bind("mouseover", mouse_over_node)


def draw_legend(viewport, x, y, r):
    for name, color in color_for_family.items():
        viewport.circle(name + "-color", x, y, r, fill=color, stroke_width=0.5, stroke=make_brighter(color, 0.6))
        viewport.text(name + "-label", x + 1.5 * r, y + 0.4 * r, "CYP"+name, text_anchor="start", font_size=6)
        y += r * 2.5

def color_leaves(leaf_element_id):
    """Returns a color for a given CYP node given its ID, e.g. tree-123

    :param leaf_element_id: (``string``) e.g.  ``tree-123`` where ``tree`` root is assigned
        by CircularDendrogram() constructor and the number is node ID from the input data
    :return: (``string``) node color
    """

    grp_id = family_for_index[leaf_element_id]
    if grp_id in color_for_family:
        return color_for_family[grp_id]
    else:
        return "#aaaaaa"


def cyp_family(cyp_code):
    """Returns a CYP family ID from a full CYP name, e.g 105 from CYP105A12"""
    for pos in range(4, len(cyp_code)):
        if cyp_code[pos].isalpha():
            return cyp_code[3:pos]
    return cyp_code[3:]


def mouse_over_node(ev):

    # --- each node's id is "tree-%d" where %d is the integer ID of that node
    node_id = int(ev.target.id[5:])
    for node in root:
        if node.id == node_id:
            print(node.id, node.value)
            break

AjaxCommunication("../_static/gammaproteobacterial_p450.fixed.fasttree.json", receive_tree, "GET")()

#tooltip = TooltipWidget("tooltip", "menu", "", 200, 30)