Source code for majortrack.similarities

from __future__ import absolute_import, \
        print_function, unicode_literals, division


[docs]def node_l_index(node_id, *groupings, **kwargs): """ Return the node L-index for a node and a set of groupings. .. todo:: Add option to only consider nodes that ever were in the same group as the focal node. Parameters ---------- node_id: int Identifier of the focal node. groupings: list Each grouping must be a list of lists where each element is a node. So a grouping must be of the form: .. code-block:: python [[n0, node_id, n2], [n3, n4, ...], ...] # < group 1 > < group 2 > ... .. note:: Each grouping must contain the same set of nodes. """ nbr_groupings = kwargs.get('nbr_groupings', len(groupings)) nbr_nodes = kwargs.get( 'nbr_nodes', sum([len(_g) for _g in groupings[0]]) ) # TODO: implement the get all nodes in a single statement _others = kwargs.get('all_nodes', []) # get set of other nodes _others = [] _to_del = [_others.extend(_group) for _group in groupings[0]] del _to_del _others.remove(node_id) # get indexes of group containing node_id in each grouping _foc_group_ids = [] for grouping in groupings: _foc_group_ids.append( next( idx for idx in range(len(grouping)) if node_id in grouping[idx] ) ) # get the per node 'energy' per_node_count = [ # for each _grouping: [ 1 if _o_node in groupings[i][_foc_group_ids[i]] else - 1 for _o_node in _others ] for i in range(len(groupings)) ] # sum over the groupings for each node # and normalize by the number of groupings # absolute value per_node_e = [ abs(_per_node_sum / nbr_groupings) for _per_node_sum in map(sum, zip(*per_node_count)) ] # sum and normalize by the number of other nodes return 1 / (nbr_nodes - 1) * sum( per_node_e )
[docs]def l_index(*groupings): """ .. todo:: This is not implemented. """ # nbr_groupings = len(groupings) # nbr_nodes = sum([len(_g) for _g in groupings[0]]) # get set of other nodes _others = [] _to_del = [_others.extend(_group) for _group in groupings[0]] del _to_del
[docs]def group_similarity_fraction(from_group, to_group): """ Fraction of nodes present in one group that come from the another group. Parameters ========== from_group: set group from which nodes might come. to_group: set group in which the nodes are. """ if len(to_group): return len(to_group.intersection(from_group))/len(to_group) else: return 0.0
[docs]def group_similarity_jaccard(group_1, group_2, N=None): """ Compute the similarity, i.e. the fraction of agreeing nodes, between two groups of different groupings. The similarity is given by 1 - the fraction of nodes belonging to the symmetric difference (or disjunctive union) between the two groups. Or, said differently, the fraction of nodes that agree on their respective status towards both provided groups. The status of a node towards a group can either be member or non-member of that group. A node with identical status towards both groups contributes with a summand of 1/N to the similarity of those two groups. If the total size of the networks, N, is provided, then also nodes contribute to the similarity of the groups which are non-members for both of the groups. Without the total size of the network, `N=None`, the groups similarity only considers nodes present in both groups as contributing to the similarity and thus is equivalent to the Jaccard index. Parameter --------- group_1: set members of a group group_2: set members of another group N: int (default=None) Number of nodes in the network. If not provided, then the Jaccard index is computed. """ if N is None: N = len(group_1.union(group_2)) if N: return (N - len(group_1.symmetric_difference(group_2))) / N else: return 0.0