#!/usr/bin/env python
from __future__ import division
import logging
import networkx as nx

#==============================================================================
#                              READ INPUT
#==============================================================================
def read_network(filename):
    """ Reads oriented, directed, weighted graph from file.
        Format: Source\tType\tTarget\tOriented\tWeight.
    """
    G = nx.DiGraph()
    with open(filename) as f:
        for line_num,line in enumerate(f):
            if line_num == 0:
                assert line.startswith("Source")
                continue
            cols = line.strip().split("\t")

            assert cols[1] == "pp"
            assert cols[3] == "true"

            u,v,weight = cols[0],cols[2],float(cols[4])
            G.add_edge(u,v,weight=1-weight) # 1-weight bc we optimize for SHORT paths.
    return G


def read_network_unweighted(filename):
    """ Reads oriented, directed, weighted graph from file.
            Format: Source\tType\tTarget\tOriented.
        Weights are ignored.
    """
    G = nx.DiGraph()
    with open(filename) as f:
        for line_num,line in enumerate(f):
            if line_num == 0:
                assert line.startswith("Source")
                continue
            cols = line.strip().split("\t")

            assert cols[1] == "pp"
            assert cols[3] == "true"

            u,v = cols[0],cols[2]
            G.add_edge(u,v)
    return G


def read_potential_edges(G,filename="../data/YEAST_protein.links.v9.0.txt"):
    """ Reads weights for edges that are not in G (potential edges).
        #All other edges are given weight=DEFAULT_EDGE_WEIGHT.
    """
    Gp = nx.DiGraph(name="STRING Potential") # Potential directed, weighted edges.

    with open(filename) as f:
        for line in f:
            cols = line.strip().split(" ")
            u,v,weight = cols[0],cols[1],float(cols[2])/1000
            assert u.startswith("4932.") and v.startswith("4932.")
            u,v = u[5:],v[5:] # 4932.Q0010 --> Q0010

            if G.has_edge(u,v): continue
            elif u not in G or v not in G: continue # ignores nodes not in G.
            else: Gp.add_edge(u,v,weight=1-weight)

            # The file contains the two edge directions twice, so we can ignore the
            # check for G.has_edge(v,u) here.


    # For all remaining potential edges, set their weight to DEFAULT_EDGE_WEIGHT.
    # Commented out for now because it adds too many edges and produces memory issues.
    #for u in G:
    #    for v in G:
    #        if u >= v: continue
    #        if not G.has_edge(u,v) and not Gp.has_edge(u,v):
    #            Gp.add_edge(u,v,weight=1-DEFAULT_EDGE_WEIGHT)
    #        if not G.has_edge(v,u) and not Gp.has_edge(v,u):
    #            Gp.add_edge(v,u,weight=1-DEFAULT_EDGE_WEIGHT)
    #assert G.size() + Gp.size() == G.order()*(G.order()-1)

    logging.info("Potential network: %s" %(filename))
    logging.info(" #nodes=%i, #edges=%i" %(Gp.order(),Gp.size()))

    return Gp


def read_nodes(filename):
    """ Reads sources / targets from file. """
    X = set()
    with open(filename) as f:
        for line in f:
            X.add(line.strip())
    return X
