import sys
import re
import fractions

# Refer to this file Tests/README for explanation of data structures.

# traces_from_bb.py: Having given a trace and a list of BBs, this script produces the original trace in terms of BBs. Also, it produces a list
# (<bb>, <rel_count>), ... which is written into a file. This list contains only those BBs that appear in the particular trace given. 
#
# Usage: python traces_from_bb.py <tracefile> <BB file> <list file>
# <tracefile> : input file having the trace 
# <BB file> : input file having the building blocks
# <list file> : output file which will contain the list 
#                            [(<BB number> <relative count of BB> <absolute count of BB> <predecessor list of BB> ppp <whom_am_i_pred list of BB>), ...]

###########################
# Nov. 20:
# I am introducing one more information of BB that we should keep track of. It is the position of first occurrence of BB in the trace. This information will be used for the disambiguation factor.

###########################
# Dec. 16:
# Adding one more piece of information: "average" position of a BB in the trace


###########################
# Algorithm:
#
# read trace into string
# open the file containing the BBs
# Read file such that the block appearing in between the two "****" is read as a single string.
# For each such block bb, do
#	indices = [m.start() for m in re.finditer('bb', 'trace')]
#	add (bb, indices) to an array C
#	make pairs (bb, idx1) (bb, idx2) etc.
# close the BB file
# Sort the entries (bbi, idxj) based on idxj.
# Print the bb in this sorted array

# For each BB X in the list, you have to compute a list of BBs such that X appears as predecessor for these building blocks.
#
#
#
#

def getNullBB():
   return "*null"

def isNullBB(i):
   if (bb[i] == getNullBB()):
        return True 
   else:
	return False

def isMember(array, ibb, ipred):

# go to k'th entry in array 
# check whether the array has entry for ipred
   k = 0
   while (k < len(array[ibb])):
	if (array[ibb][k][0] == ipred):
		return k
	k += 1
   return -1

# Instead of a single BB, this function returns an array of BBs.
def LikelyPredSucc(array, ibb):
   # return array  
   set = []
# go to i'th entry in array 
   k = 0
   while (k < len(array[ibb])):
# if count of pred is at least > = 75% of count of ibb
        if (array[ibb][k][1] != 1 ) and  (array[ibb][k][1] >= 0.75 * len(indices[ibb])):
           set.append( array[ibb][k][0] )
	k += 1
   return set 

# array: input array having a list of (<bb>, <list of BBs which are predecessors of given bb>)
# output array: list of BBs for which I am a predecessor
#
def toWhomAmPred(array, bb):
   set = []
   i = 0
   while (i < len(array)):   
	pred_list = array[i][1]
	j = 0
	while (j < len(pred_list)):	
	    if (pred_list[j] == bb):
		set.append(array[i][0])
	    j += 1
	i += 1

   return set

def printArray(array, st):
   lenArr = len(array)
   i = 0
   s = st
   while (i < lenArr):
	s += str(array[i]) + ", "
	i += 1
   return s

def addMember(array, ibb, ipred):
   array[ibb].append([ipred, 1])

def getAvgPos(trace, bbi):
   i = 0
   pos_count = 0
   pos = 0

# trace is a list containing BB numbers. So len(trace) = # BBs in the trace
   while (i < len(trace)):
	if (trace[i] == bbi):
# 	    'i' starts from 0 whereas position would start from 1. So add position = i+1
	    pos = pos + (i+1)
	    pos_count += 1
        i +=  1
#  if pos_count == 0, it means this BB never appeared in the trace. So return -1
   if (pos_count == 0):
	return -1
   else:
	pos = pos/pos_count
# Find relative position = position/total length of the trace
   frac_pos = float(fractions.Fraction(pos, len(trace)))

   return frac_pos

#######################################################3  start

# check number of arguments
if ((len(sys.argv) != 4) and (len(sys.argv) != 6)):
    print "# Usage: python traces_from_bb.py <tracefile> <BB file> <output list file> OR"
    print "# Usage: python traces_from_bb.py <tracefile> <BB file> <output list file> <num**.txt> <android/j2me>"
    sys.exit() 
f = open(sys.argv[1], 'r')
trace = f.read()
f.close()

# Error checkin    
if (trace[0] != '*'):
       print "******************************************************************"
       print "Files not in the expected format. Strings don't have a '*' prepended to them."
       print "******************************************************************"
       sys.exit()

# The following array stores the (<androidBB> <j2meBB>) pair for the mappings that were output in the first pass. The idea was that in the phased approach, you would ignore the BBs that appear in these mappings when re-constructing the trace in terms of BBs. So the new trace will NOT have these BBs.
blkList=[]

# If the num**.txt file is given as the last argument, we will process it here. The file is optional, so process it only if it is given.
if (len(sys.argv) == 6):
   for line in open(sys.argv[4], 'r'):
	values = line.split()
	if (sys.argv[5] == "android"):
	    blkList.append(int(values[1]))	    
	elif (sys.argv[5] == "j2me"):
	    blkList.append(int(values[0]))	    
	else:
	    print "# Usage: python traces_from_bb.py <tracefile> <BB file> <output list file> <num**.txt> <android/j2me>"
	    print "Last argument given was wrong"
	    sys.exit()

# An unordered set in which membership testing becomes easier
blkListSet = set(blkList)

# start
bb = []
entry = ''
i = 0
for line in open(sys.argv[2],'r'):
   if (len(line) == 1):
	continue
   if (line[-2] != '*'):
	entry += line
   elif (entry != ''):
	bb.append(entry)
	i += 1
	entry = ''

i = 0
traceLen = 0
#while (i < len(bb)):
#    print [m.start() for m in re.finditer(bb[i], trace)] 
#    i = i+1
indices = []
order = []
i = 0
while (i < len(bb)):
   bb[i] = bb[i].rstrip('\n')
#   print "*******************     ", i,"     ******************"
#   print bb[i]

   start = 0
   indices.append([])
   while True:
       # the function "find()" returns index of bb[i] as equal to the number of characters appearing before bb[i] in the trace. So it is not really an index of the Building Block, but rather index of the characters in the building block.
	if (not(i in blkListSet)):

            index = trace.find(bb[i], start)
        # if search string not found, find() returns -1
        # search is complete, break out of the while loop
        #print( "%s found at index %d" % (str, index) )
            if index == -1:
               break
	    indices[i].append(index)
	    order.append([index, i])
       # move to next possible start position
            start = index + 1
	else:
#	    print i
	    break
   traceLen += len(indices[i])
   i = i+1

######### Append the last "null" BB to indices manually. This won't be added in the above loop, 
# since this "null" BB is not present in the "text trace". But we want it to be present in the numbered trace. i.e. in the "android_list"
# But make sure that you do this only for anroid and not for j2me.
# For android, there is a "null" bb in the list of BBs, so the "bb" data structure should have last entry as "null".

if ( isNullBB(len(bb)-1) ):

# I am not sure whether I should change either "indices" or "order" data structures for this NULL BB.
   
   indices[len(bb)-1].append(-1)
   i += 1
   traceLen += 1

# The array "order" is nothing but the trace re-written in terms of BB numbers.
# order: array of tuples (<index of building block in the trace>, <building block number>)

# Sort the array "order" on the first key i.e. index of building block in the trace.
order = sorted(order)

# Create an internal data structure "trace_BB" which is nothing but the trace in terms of BBs.
trace_BB = []

#print "\n-----------------------Start of trace printed as a sequence of building blocks --------------------- "
i = 0
if order != None:
   while (i < len(order)):
      print order[i][1]
      trace_BB.append(order[i][1])
      i += 1 
#print "----------------------- Trace end --------------------- "


# how to find the successor and predecessor
# for each bb "i" do
#   Go over the array "order"
#   for every occurrence at index "j" of bb "i", do 
#      check order[j-1] and order[j+1]
# 
# There are two arrays, pred and succ.


pred = []
succ = []
i = 0
while (i < len(bb)):

# initialize pred & succ array which you'll need later
   pred.append([])
   succ.append([])
   i += 1

i = 0
# Format of pred array: 
# pred[i] = A list of predecessors of i : (j1, count1), (j2, count2), and so on. where j1 has appeared before i for count1 number of times.
#  pred[i][j][0] = k where BB k is predecessor of BB i
#  pred[i][j][1] = Number of times the BB k is "nearby predecessor" of BB i. 
#
# Sept 9: You are doing something wrong here. The window should be 1/10th of the logical length of the trace, where 
# logical length = length of trace in terms of # building blocks in the trace. And NOT the total number of characters.
# 
# w = a window denoting a proportion of length of the trace within which an appearance of a BB is considered as a
#     "nearby predecessor" of another BB
# w = traceLen * 1/10 
while (i < len(bb)):
   j = 0
   while (j < len(order)):
	if (order[j][1] == i):
	    # Go through all the BBs which are within window "w" before order[j][1]
	    # Here, we are allowing the same BBs to be predecessors of it.
	    # while ((m < w) and (j-m >= 0)):
	    if (j-1 >= 0): 
		idx = isMember(pred, i, order[j-1][1])
	    	if (idx != -1):
	            pred[i][idx][1] += 1
	    	else:
	            addMember(pred, i, order[j-1][1])

            if (j+1 < len(order)):
	    	idx = isMember(succ, i, order[j+1][1])
	    	if (idx != -1):
	            succ[i][idx][1] += 1
	    	else:
	            addMember(succ, i, order[j+1][1])
	j += 1
   i += 1

# First, run the loop just to get a total_count which is count of total number of BBs.
i = 0
total_count = 0
while (i <  len(indices)):
   count = len(indices[i])
   if (count != 0) :
       total_count += count
   i += 1

# Write the (bb, rel_count) list into a file
listFile = open(sys.argv[3], 'w');

# final_preds array stores the final predecessors of each bb
# final_preds[i][0] = m
# final_preds[i][1] = (j, k ,l) means that j, k, and l are "nearby predecessors of m. 
# This array will be input to the function toWhomAmPred()
final_preds = []
i = 0
while (i <  len(indices)):
   bb_pred = LikelyPredSucc(pred, i)
   count = len(indices[i])
   if (count != 0) :
        final_preds.append([i, bb_pred]);
   i += 1

#bbs_and_counts = []
#print "\n<Building Block No.> <No. of occurrences of BB> <Predecessor of BB (if any)> <Successor of BB (if any)>"
i = 0
while (i <  len(indices)):
   count = len(indices[i])
   if (isNullBB(i)):
	count = 0
   bb_pred = LikelyPredSucc(pred, i)
   bb_succ = LikelyPredSucc(succ, i)
   whom_am_pred =  toWhomAmPred(final_preds, i) 

# If count != 0, it means that this BB appears at least once in the trace
   if (count != 0 or isNullBB(i)) :
   	avg_pos = getAvgPos(trace_BB, i)
	rel_count = fractions.Fraction(count, total_count)

	#bbs_and_counts.append([i, rel_count, bb_pred]) 	
	listFile.write(str(i))
	listFile.write(" ")
	listFile.write(str(rel_count))
	listFile.write(" ")
	listFile.write(str(count))
	listFile.write(" ")
	listFile.write(str(indices[i][0]))
	listFile.write(" ")
	for item in bb_pred:
	    listFile.write("%s " % item)
	listFile.write(" ")
	listFile.write("ppp")
	listFile.write(" ")
	for item in whom_am_pred:
	    listFile.write("%s " % item)
	listFile.write("sss ")
	for item in bb_succ: 
	    listFile.write("%s " % item)
	listFile.write(" ")
	listFile.write(str(avg_pos))
	listFile.write("\n")
   i += 1
listFile.close();


# sort the array on 2nd column. 
# bbs_and_counts = sorted(bbs_and_counts, key=lambda entry: entry[1]);
# print bbs_and_counts

i = 0
j = 0
x = 0
y = 0
r = 0
while ((i < len(indices)) and (len(indices[i]) != 0)):
    while ((j < len(indices)) and (len(indices[j]) != 0)): 
	count_i = len(indices[i])
	count_j = len(indices[j])
	bb_pred_i = LikelyPredSucc(pred, i)
	bb_succ_j = LikelyPredSucc(succ, j)
	while (x < len(bb_pred_i)):
	    while (y < len(bb_succ_j)):
#		print "(", i, j, ")", "(", x , y, ")", r
		r += 1					
		y += 1
	    x += 1
	j += 1
    i +=1  

#    print i, printArray(bb_pred, "")
    i += 1
