import sys
import math
import fractions
import string

# NOTE: This is to calculate mapping from j2me -> android. Note the direction of mapping. It is important.

# Calculates the CPD of all the count factors
# Usage: python count_factor_CPD.py <android list file> <j2me list file> <mat_code file>
# <android list file> : input file having a list ( [(<BB number> <relative count of BB> <absolute count of BB> <predecessor list of BB> ppp <whom_am_i_pred list of BB>)], ... )
# <j2me list file> : input file having similar list as above
# <mat_code file> : output file in which matlab code will be written
# <Version> : "frequent" : for frequently occurring calls
#	      "rest" : for rest of the lot
# <numbers file> : input file having mappings in terms of node numbers (android node, j2me node, prob. of false, prob. of true). This option is given only if <Version> = "rest"
#

def LowerOfTheTwo(num1, num2):
   if (num1 < num2):
        return num1
   else:
        return num2


def isNullBB(j, countList):

# The index of the absolute count in the count list is assumed to be 2.(Order is : bb number, relative count, absolute count, ...)
   if (countList[j][2] == 0):
	return True
   else:
	return False

# Function to compute how "similar" the two counts are.

def computeCountSimilarity(c1, c2): 

# c1/c2 is a string which should be converted to a decimal number using float(). Further, we round up the final result to two decimal places.
   if (c1 < c2):
	metric = round(float(c1/c2), 4)
# 	(c2)^2 - (c1)^2
#	prob = math.sqrt(math.pow(c2, 2) - math.pow(c1, 2))
   else:	
	metric = round(float(c2/c1), 4)
#	prob = math.sqrt(math.pow(c1, 2) - math.pow(c2, 2))
   return metric

# Given : entry from the list
# Output : true if (BB count) is high
#          false otherwise        

def isHigh(entry):
    relcount_threshold = 0.1
    count = entry[1]
    if (float(count) > relcount_threshold):
        return True
    else:
        return False

def areSimilarlyLow(j2me_entry, android_entry):

# Absolute counts are at index 2 in the android_list and j2me_list.
    j2me_count = j2me_entry[2]
    android_count = android_entry[2]
# The condition should be something like: ( number of digits in abs_count_j2me == number of digits in abs_count_android )
    if ((math.fabs(j2me_count - android_count) <= 10) or (computeCountSimilarity(j2me_count, android_count) >= 0.5)):
        return True
    else:
        return False

def computePositionSimilarity(pos1, pos2):

    if (pos1 < pos2):
        metric = round(float(pos1/pos2), 4)
    else:
        metric = round(float(pos2/pos1), 4)
    return metric
    

def computePositionDifference(pos1, pos2):

    diff = round(pos1, 2) - round(pos2, 2)
    return math.fabs(diff)
    

# Note that the format of the "list file" i.e. android list file and j2me list file is:
#  33 164/333 328 ppp 30 34
# i.e. <BB number> <relative count of BB> <absolute count of BB> <first position> <predecessor list of BB> ppp <whom_am_i_pred list of BB>

# Function:
#   returns the index of the pred_list in the list file. 

def get_predListIndex_ListFile():
   return 4;

# Function:
#   returns the index of the succ_list in the list file.
# Note that this index is for the *internal data structures* - j2me_list and android_list -
# whose format is different than the j2me_list file and android_list file

def get_succListIndex_InternalList(list):
   return 6 

def get_posIndex_InternalList():
   return 7

# Function: 
# input: a pair of (BB, BB)
# output: index in the array node_list (node number of the variable in G) corresponding to the pair (BB, BB)

def getNodeNum(bb1, bb2):

# go to k'th entry in array 
   k = 0
   while (k < len(node_list)):
        if ((int(node_list[k][0]) == bb1) and (int(node_list[k][1]) == bb2)):
# Array index starts from 0. So, if 0th entry is the match, what you should return instead is 1, since in matlab, we start counting nodes from 1.
# So in general, return k+1
                return k+1
        k += 1
   return -1

# Function to check member in the frequent list
#   returns index of (abb, jbb) in the list
#   returns -1 if not found

def checkMemberInList(list, abb, jbb):
   i = 0
   while (i < len(list)):
        if ((list[i][0] == abb) and (list[i][1] == jbb) ):
            return i
        i += 1

   return -1

# Function that returns index of a bb in list

def findBB(list, bb):
    i = 0
    while (i < len(list)):
        if (list[i][0] == bb):
            return i
        else:
            i += 1
    return -1

# Function that takes as input : a BB number and internal list having BB strings
# returns : string corresponding to the give BB number

def getString(bbNum, bbStrList):

    # search for this BB in bbStrList
    k = 0
    while (k < len(bbStrList)):
        if (bbStrList[k][0] == bbNum):
            break
        else:
            k += 1
    if (k >= len(bbStrList)):
        print "Error: Android BB " + str(bbNum) + " Not found"
        return "000" 

    return bbStrList[k][1] 

# Function that takes as input : file name having the BB strings
# returns : an internal list containing bb numbers and bb strings
# list[k][0] = bb number
# list[k][1] = bb string

def readBBs(bbFile):

    # read the numbered android BBs
    bbList = []
    entry = ''
#   i = -1
#   bbList.append([-1, "null"])
    i = 0
    for line in open(bbFile,'r'):
        if (len(line) == 1):
            continue
        if (line[-2] != '*'):
            entry += line
        elif (entry != ''):
            bbList.append([i, entry])
            i += 1
            entry = ''
    return bbList

def summarizeBB(bbStr):
    getScore = 0
    setScore = 0
    drawScore = 0
    descrpt = ["", ""]

# Split the BB string along newline character. So, calls[i] = i th line in the BB string.
    calls = bbStr.splitlines()

# Create one new variable j2meFirstCall to represent the first line in the BB string. First line has to be checked separately, since while checking for a "uniform BB" (BB which has all the lines as identical), we have to store the first line somewhere, and then compare all other lines with it. 
    firstCall = calls[0]

# If First line starts with "*get", increment getScore.
# If First line starts with "*set", increment setScore.

    if ( string.find(firstCall, "*get") == 0 ):
        getScore += 1
    elif ( string.find(firstCall, "*set") == 0 ):
        setScore += 1
    elif ( string.find(firstCall, "*draw") == 0 ):
        drawScore += 1

    i = 1
    while (i < len(calls)):
        line = calls[i]
#        print "line =", line

        if ( string.find(line, "*get") == 0 ):
            getScore += 1
        if ( string.find(line, "*set") == 0 ):
            setScore += 1
        if ( string.find(line, "*draw") == 0 ):
            drawScore += 1
        if (line != firstCall):
            break
        i += 1

# If each call in j2me BB is a "get" call, declare the BB as "get" type.
# If each call in android BB is a "set" call, declare the BB as "set" type.

    if (getScore == len(calls)):
        descrpt[0] = "get"
    elif (setScore == len(calls)):
        descrpt[0] = "set"
    elif (drawScore == len(calls)):
        descrpt[0] = "draw"

    if (i == len(calls)):
        descrpt[1] = "uniform"
    else:
        descrpt[1] = "non-uniform"

    return descrpt

def methodTypesSimilarity(type1, type2):
    score = 0

    if ((type1 == type2 == "get") or (type1 == type2 == "set") or (type1 == type2 == "draw")):
        score = 1
    elif ( ((type1 == "get") and (type2 == "set")) or ((type1 == "set") and (type2 == "get")) or ((type1 == "get") and (type2 == "draw")) or ((type1 == "draw") and (type2 == "get")) ):
        score = -1

    return score

def computeTextSimilarity(bb1Str, bb2Str):
    similarityScore = 0

    j2meCalls = bb1Str.splitlines()
    j2meFirstCall = j2meCalls[0]
    j2meMethod = j2meFirstCall.split()

    androidCalls = bb2Str.splitlines()
    androidFirstCall = androidCalls[0]
    androidMethod = androidFirstCall.split()

# case 1: if (j2me BB) is uniform, and (android bb) is uniform, and j2meMethod == androidMethod), perfect.
# case 2: if j2me_descrpt == get and android_descrpt == get, high
# case 3: if j2me_descrpt == set and android_descrpt == set, high
# case 4: if j2me_descrpt == get and android_descrpt == set, low
# i.e. if (j2me_descrpt_type == android_descrpt_type), high
# if if (j2me_descrpt_type (opposite of) android_descrpt_type), low


# If both calls are "get" type, assign some similarity.
# If both calls are "set" type, assign some similarity.
    bb1type = summarizeBB(bb1Str)
    bb2type = summarizeBB(bb2Str)
#    print "bb1type", bb1type
#    print "bb2type", bb2type

# If the two strings are exactly similar, they have highest score.
    if (methodTypesSimilarity(bb1type[0], bb2type[0]) == 1):
            similarityScore = 0.8
    elif (methodTypesSimilarity(bb1type[0], bb2type[0]) == -1):
            similarityScore = 0
    elif (methodTypesSimilarity(bb1type[0], bb2type[0]) == 0):
            similarityScore = 0
    if (bb1type[1] == bb2type[1] == "uniform"):
        if (j2meMethod[0] == androidMethod[0]):
            similarityScore = 1

    return similarityScore


# error checking
if (len(sys.argv) != 11):
    print "# Usage: python count_factor_CPD.py <android list file> <j2me list file> <node_list> <mat_code file> <prior numbers file> <code_file_nodes> <code_file_cpd> <code_file_num> <android BB file> <j2me BB file>"
    sys.exit() 

# sys.argv[1] : android_list
# sys.argv[2] : j2me_list
# sys.argv[3] : node_list
# sys.argv[4] : code1
# sys.argv[5] : num0*.txt 
# sys.argv[6] : code2 i.e. code_file_nodes
# sys.argv[7] : code3 i.e. code_file_cpd
# sys.argv[8] : code4 i.e. code_file_num

# start
countList1 = []
countList2 = []
frequentList = []
node_list = []

# countList1 reads from android list
for line in open(sys.argv[1],'r'):
    values = line.split()

# Initialize i to the index of the predecessor list in the list file
    i = get_predListIndex_ListFile()
    bb_pred_list = []
    bb_whom_i_pred_list = []
    bb_succ_list = []
 # bb_list is the list of BBs that are predecessors of this BB in the given trace.
    while (values[i] != "ppp"):
        bb_pred_list.append(int(values[i]))
        i += 1

# the next item in the list file after "ppp" is the whom_am_i_pred list.
    i += 1
    while (values[i] != "sss"):
        bb_whom_i_pred_list.append(int(values[i]))
        i += 1

    i += 1
    while (i < (len(values)-1)):
        bb_succ_list.append(int(values[i]))
        i += 1

# the last member in the "values" array is the position. Since we don't know the length of pred/succ lists, last element is indexed by len(values)-1
    countList1.append( [ int(values[0]), fractions.Fraction(values[1]), int(values[2]), 0, bb_pred_list, bb_whom_i_pred_list, bb_succ_list, float(values[len(values)-1])] )

# countList1 has format: BB number, relative count, absolute count, flag which when turned ON would indicate that BB is frequent

countList1 = sorted(countList1, key=lambda entry: entry[2]);

# countList2 reads from j2me list
for line in open(sys.argv[2],'r'):
    values = line.split()

# Initialize i to the index of the predecessor list in the list file
    i = get_predListIndex_ListFile()
    bb_pred_list = []
    bb_whom_i_pred_list = []
    bb_succ_list = []
 # bb_list is the list of BBs that are predecessors of this BB in the given trace.
    while (values[i] != "ppp"):
        bb_pred_list.append(int(values[i]))
        i += 1

# the next item in the list file after "ppp" is the whom_am_i_pred list.
    i += 1
    while (values[i] != "sss"):
        bb_whom_i_pred_list.append(int(values[i]))
        i += 1

    i += 1
    while (i < (len(values)-1)):
        bb_succ_list.append(int(values[i]))
        i += 1

    countList2.append( [ int(values[0]), fractions.Fraction(values[1]), int(values[2]), 0, bb_pred_list, bb_whom_i_pred_list, bb_succ_list, float(values[len(values)-1]) ] )

countList2 = sorted(countList2, key=lambda entry: entry[2]);

# keep in mind that the lists are sorted temporarily in ascending order of relative/absolute counts, but later on, we sort them in ascending order of BB numbers while filling up the "cpd" table.

for line in open(sys.argv[5],'r'):
    values = line.split()
    frequentList.append( [ int(values[0]), int(values[1]), float(values[3]) ])

# frequentList stores the mappings of frequently occurring BBs that are present in the input num* file. (obtained as the result of the first pass)
# Format is : BB number in j2me trace, BB number in android trace, Probability of mapping being false, Probability of mapping being true


# initialize output list
cpd = []

# Top n value: Find the "right" value of "n". We will consider only the top "n" BBs from the list.
i = 0
k = len(countList2) - 1
top_2 = 0
max_count = countList2[len(countList2)-1][2]
while (i < len(countList2)):
   my_count = countList2[k][2]
   if ((countList2[k][1] >= 0.1) and (float(max_count/my_count) < 10)):
# Change the frequent flag to 1
	countList2[k][3] = 1
	i += 1
	k -= 1
	top_2 += 1
   else:
	break

i = 0
k = len(countList1) - 1
top_1 = 0
max_count = countList1[len(countList1) - 1][2]
while (i < len(countList1)):
   my_count = countList1[k][2]
   if ((countList1[k][1] >= 0.1) and (float(max_count/my_count) < 10)):
# Change the frequent flag to 1
	countList1[k][3] = 1
        i += 1
        k -= 1
        top_1 += 1
   else:
        break

# Bring back the lists sorted in the ascending order of BB numbers.

# We sort the android and j2me node lists in ascending order of BB numbers. Reason is that, when writing the CPD tablein the matlab code file, we want to follow a particular order of BBs, so that it later becomes easy for interpretation of probabilities. (I don't know whether a particular order is requirement of matlab). 
countList1 = sorted(countList1, key=lambda entry: entry[0]);
countList2 = sorted(countList2, key=lambda entry: entry[0]);

# Open output file in which node list is written. [<node number> <j2me node> <android node>]
nodeListFile = open(sys.argv[3], 'w');
var_index = 1
for jline in open(sys.argv[2],'r'):
    jvalues = jline.split()
    for aline in open(sys.argv[1],'r'):
        avalues = aline.split()
        node_list.append([jvalues[0], avalues[0]])
#       print var_index, " [", jvalues[0], avalues[0], "]"
        s = str(var_index) + " " + str(jvalues[0]) + " " + str(avalues[0]) + "\n"
        nodeListFile.write(s)
        var_index += 1

nodeListFile.close()

# for each node in node_list
#       spit out "G(node, node) = 1"

# Output file in which nodes are written: G(34, 56) = 1
codeFileNodes = open(sys.argv[6], 'w');

# Output file in which tabular_CPDs for the "ordering factor" are written:
#                 tabular_CPD(bnet, fc78, 'CPT', [1 1 1 0]), ...
codeFileCPD = open(sys.argv[7], 'w');

i = 0
nodeNum = 1
while (i < len(node_list)):
    s = "G(" + str(nodeNum) + "," + str(nodeNum) + ") = 1;\n"
    codeFileNodes.write(s)
    nodeNum += 1
    i += 1

factorNum = nodeNum

# Algo:
# (Loop the provided "prior information" list)
# Copy prior list to a running inferred_mappings list
# for each (A, alpha) in prior list do
#       Find (B, beta) such that : B is successor of A AND beta is successor of alpha
#       Since A -> alpha, by the implies rule, B -> beta. Add (B, beta to the inferred_mappings list
#       Mark (A, alpha) pair as "Done" in the inferred_mappings list
#       NOTE: As you loop through the inferred_mappings list, you will encounter (B, beta) some time. And then
#        you will find predecessors to (B, beta) and infer mappings. and so on.

# You need to have a successor list: A list of BBs which appear after me most of the time.
# Question: What if you have 2 successorss, B and C? Would both (B, beta), (C, beta) mappings be correct? One of them got to be wrong!
# So my suggestion is, don't proceed if you have two or more successors.

#print "frequent :" , frequentList

i = 0

inferred_maps = list(frequentList)
while ((i < len(inferred_maps))):

# Check for "done" flag
    if (inferred_maps[i][2] == 1):
        i += 1
        continue
    A = inferred_maps[i][0]
    alpha = inferred_maps[i][1]
    index_A = findBB(countList2, A)
    index_alpha = findBB(countList1, alpha)

# Find (B, beta)
# Find the index of the successor list in the entry for A i.e. 1-D array countList2[index_A]
    j = get_succListIndex_InternalList(countList2[index_A])

    A_succ_list = countList2[index_A][j]
    k = get_succListIndex_InternalList(countList1[index_alpha])
    alpha_succ_list = countList1[index_alpha][k]

# if both, A and alpha, have a single successor
    if (( len(A_succ_list) == 1 ) and ( len(alpha_succ_list) == 1 )):
        B = A_succ_list[0]
        beta = alpha_succ_list[0]
        inferred_maps.append([B, beta, 0])

# Flag (A, alpha) pair as done
    inferred_maps[i][2] = 1

# Loop increment
    i += 1

#print " INferred: ", inferred_maps

cpd_for_inferred = "0.2 0.8"


########### For reading BBs into internal bb list
androidBBList = readBBs(sys.argv[9])
j2meBBList = readBBs(sys.argv[10])
###########


# Algo:
# for each bb i of countList1
#   for each bb j of countList2
#	create a variable 'X' + 'i' + 'j'
#	calculate f(rel_count1, rel_count2) 
# 	calculate [P[fc = false], P[fc = true]]
#	store the above values in a list


# loop index
i = 0
# variable number
k = 1
while (i < len(countList2)):
# i loop start
   j = 0
   bb1 = countList2[i][0]	
   while (j < len(countList1)):
	# j loop start
	bb2 = countList1[j][0]
	var = 'X' + str(bb1) + str(bb2)	
	trueProb = 0

        if ( not(isNullBB(j, countList1)) ):
# Note: c1 is from countList2, c2 is from countList1
	    c1 = countList2[i][1]
	    c2 = countList1[j][1]

	    pos1 = countList2[i][get_posIndex_InternalList()]
	    pos2 = countList1[j][get_posIndex_InternalList()]
# debug
#	print "(", bb1, bb2, ")"

# if count_c1 = low and count_c2 = low, and they are similarly low, assign some fixed prob.
#	    if (not(isHigh(countList2[i])) and not(isHigh(countList1[j])) and areSimilarlyLow(countList2[i], countList1[j])  ):
#		countSimilarity = 0.6	    
#	    else:
#	    	countSimilarity = computeCountSimilarity(c1, c2)
    	    countSimilarity = computeCountSimilarity(c1, c2)


# Search whether (bb1, bb2) pair belongs to the list of inferred_maps list. If it does (i.e. index != -1),
# we need to assign higher true probability to the potential mapping (bb1, bb2)

#            if (checkMemberInList(inferred_maps, bb1, bb2) != -1):
#	        trueProb = 0.90

	    posSimilarity = computePositionSimilarity(pos1, pos2)
	    posDifference= computePositionDifference(pos1, pos2)

#	    trueProb = trueProb - posFactor
#	    if (trueProb < 0):
#	        trueProb = 0

# End of -- if (bb2 != nullBB)	

############## Domain specific factor
# I want to incorporate a domain-specific factor. A factor that would assign a certain probability to the mapping, based on
# the text of the API call. For that, I need certain functions: 
# - function takes a BB number as input, and gives the text string for that BB.
# - A definition of NULL BB. A null BB is one whose corresponding string representation is "".
# - We will check whether string_bb1 starts with "get" and string_bb2 == null_BB_string. If so, bb1 -> bb2 is a correct potential mapping.

	    bb1Str = getString(bb1, j2meBBList)
	    bb2Str = getString(bb2, androidBBList)

	    textSimilarity = computeTextSimilarity(bb1Str, bb2Str)

	    print "(", bb1, bb2, ")", "countSim = ", countSimilarity, "posSim =", posSimilarity, "posDifference = ", posDifference, "(", pos1, pos2, ")", "textSim = ", textSimilarity

# this is clearly an ad-hoc attempt to get the mappings right for calls such as "drawArc" and "drawRect"
#	if (textSimilarity == 1):
#	    trueProb = 0.9

# I would like the computation of trueProb as follows:
# trueProb = x * textSimilarity + y * posSimilarity + z * countSimilarity
#	    x = 0.5
#	    y = 0.2
#	    z = 0.8
	    x = z = 1
# If using computePositionSimilarity, set y = 1
# If using computePositionDifference, set y = -1	
	    y = -1
	    confidenceQuotient = LowerOfTheTwo(c1, c2)
 
# If using computePositionSimilarity, the normalization constant is (x + y + z).
# If using computePositionDifference, the normalization constant is (y + z)

# If using computePositionSimilarity
#	    trueProb = (x * textSimilarity + y * posSimilarity + z * countSimilarity) / (x + y + z)
# If using computePositionDifference,
	    trueProb = (x * textSimilarity + y * posDifference+ z * countSimilarity) / (x + z)
	    if (trueProb < 0):
		trueProb = 0 
#	    trueProb = trueProb * confidenceQuotient

#	if ((string.find(bb1Str, "*get") == 0) and (string.find(bb2Str, "*null") == 0) ):
#	    print "BB1", bb1, " bb1 str:", bb1Str, "BB2", bb2, "bb2 str:", bb2Str
#	    trueProb = 0.8

# Find the first "word" in bb1Str and bb2Str. If the two words match exactly (setColor vs. setColor OR drawRect vs. drawRect etc.), 
# there is a high possibility that the two calls map to each other.

############ Domain specific factor

	print round(trueProb, 2)

	cpd.append([k, var, 1 - trueProb, trueProb])
	j +=1 
	k += 1
	# j loop end
   
   i += 1
# i loop end

codeFile = open(sys.argv[4], 'w');

r = 0
while (r < len(cpd)):
    s = "\t\ttabular_CPD(bnet, x78, 'CPT', [" + str(cpd[r][2]) + ", " + str(cpd[r][3]) + "]), ...\n"
    codeFile.write(s)
    r += 1
	
# Output file in which only two lines are written:
#               nfactors = 331;
#               nvars = 325;
codeFileNum = open(sys.argv[8], 'w');

s = "nfactors = " + str(factorNum-1) + ";\n"
codeFileNum.write(s)
s = "nvars = " + str(nodeNum-1) + ";\n"
codeFileNum.write(s)

codeFileNum.close()
codeFile.close();
codeFileNodes.close()
codeFileCPD.close()

print "Count factor done"
