// Copyright (c) 2003 Raghavan Komondoor and Susan Horwitz

// This file is part of DUP.

// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software DUP and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:

// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.

// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#include <fstream.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <stk.h>
#include <set.h>
#include <list.h>
#include <vector.h>
#include <pair.h>
#include <algorithm>
#include <hash_map.h>

enum WhichComponent {first, second};

struct WhichComponentPair {
    WhichComponent from;
    WhichComponent to;

    WhichComponentPair() {}
    WhichComponentPair(WhichComponent from_a, WhichComponent to_a) :
	from(from_a), to(to_a) {}
};

struct PDGEdge {
  enum EdgeKind {Control, Data, Ignore};
  
  int parent;  // the source of the pdg edge
  int child; // the target of the pdg edge
  EdgeKind kind;
  
  PDGEdge(int parent_a, int child_a, EdgeKind kind_a) :
    parent(parent_a), child(child_a), kind(kind_a) {}
  
  bool operator== (const PDGEdge &other) const {
    return (child == other.child) && (parent == other.parent);
    // it is ok to ignore "kind"
  }
  
  bool operator< (const PDGEdge &other) const {
    return
      (child < other.child)
      ||
      (child == other.child && parent < other.parent);
  }
  
  friend struct SameChild {
    bool operator()(const PDGEdge &edge1, const PDGEdge &edge2) const {
      return
	edge1.child == edge2.child;
    }
  };
};

class Clone {
    char *fileName;
    char *funcName;
    int root_id;
    
    set<PDGEdge> edges; // All methods assume that
    // "edges" is sorted according to the < relation
    vector<PDGEdge> givenEdgeList; // edges in the given order
    vector<PDGEdge> finalEdgeList; // this list is initialized only at
				 // the time the clone groups are
				 // created.

    set<int> nodes;

    int Root() const {return root_id;}
    
  public:
    class NodeSet : private set<int> {
	NodeSet(set<int>::const_iterator first,
		set<int>::const_iterator last) : set<int>(first, last)
	    {}
	friend class Clone;
    }; // Used to pass around a set of nodes without revealing the fact that a
    // node is represented simply by its vertex ID. None of the member
    // functions of this class should reveal to outsiders that nodes are
    // represented by integers.

    Clone(SCM clonePair, WhichComponent);
    const char *FileName() const {return fileName;}
    const char *FuncName() const {return funcName;}
    int Size() const {return nodes.size();}
    
    bool operator==(const Clone &other) const {
	return
	  !strcmp(fileName, other.fileName) &&
	  !strcmp(funcName, other.funcName) &&
	  root_id == other.root_id &&
	  edges.size() == other.edges.size() &&
	  equal(edges.begin(), edges.end(), other.edges.begin()); // this
	// works because of our assumption that "edges" is sorted
    }
    void WriteSTkMapping(ostream &out) const;
    friend void WriteSTkMapping(const Clone &clone1, const Clone &clone2,
				ostream &out);

    NodeSet *GetNodes() const {
	return new NodeSet(nodes.begin(), nodes.end());
    }
    int IntersectionSize(const NodeSet &other) const; // returns number of
    // nodes in the intersection of "other" and "*this"
    void Intersect(NodeSet &other) const; // destructively updates "other" by
    // intersecting it with "*this"

    void initializeFinalEdgeList() {
	finalEdgeList = givenEdgeList;
    }

    void reorderEdgeList(const Clone &otherClone,
			 const Clone &canonicalOther);
    // <"this", otherClone> is assumed to be a clone pair. That is,
    // the ith PDGEdge in givenEdgeList is mapped to the ith PDGEdge
    // in otherClone.givenEdgeList. canonicalOther is assumed to be ==
    // to otherClone, but the order of PDGEdges in
    // canonicalOther.finalEdgeList could be different from the order
    // of PDGEdges in otherClone.givenEdgeList. If p is the jth
    // PDGEdge in givenEdgeList and q is the jth PDGEdge in
    // otherClone.givenEdgeList and q occupies position i in
    // canonicalOther.finalEdgeList, then this method copies
    // givenEdgeList[j] to finalEdgeList[i]. In other words, this
    // method initializes finalEdgeList to be permutation of
    // givenEdgeList.
	
    bool Subsumes(const Clone &other) const;
    enum WhichClone {This, Other, Neither};
    WhichClone MoreControlPreds(const Clone &other) const;
    
    friend struct CompareFirstRoots;
    friend struct CompareSecondRoots;
    friend class CompareFirstOvRoots;
    friend class CompareSecondOvRoots;
};


class FunctionPair {
    const char *firstFile;
    const char *firstFunc;
    const char *secondFile;
    const char *secondFunc;

  public:
    friend struct HashFunctionPair {
	size_t operator()(const FunctionPair &x) const;
    };
    FunctionPair(const char *firstFile_a, const char *firstFunc_a,
		 const char *secondFile_a, const char *secondFunc_a) :
	firstFile(firstFile_a), firstFunc(firstFunc_a),
	secondFile(secondFile_a), secondFunc(secondFunc_a) {}
    bool operator==(const FunctionPair &) const;
};

enum OverlapOrientation {NoOverlap, Straight, Crossed};

class ClonePair {
    Clone *firstClone;
    Clone *secondClone;
    const PRIMITIVE givenList;

  public:
    ClonePair(SCM clonePair) :
	firstClone(new Clone(clonePair, first)),
	secondClone(new Clone(clonePair, second)),
	givenList(clonePair) {} // Assumption: Let f1 and f2 be any two
                                // distinct functions. There must exist an
				// ordering of these two functions,  say
				// <f1, f2> (wolog), such that in every
                                // clonePair that involves f1 and f2 the first
				// component is from f1 and the second
				// component is from f2.
    
    bool SharesClone(const ClonePair &otherPair,
		     WhichComponentPair &how) const;
    Clone *FirstClone() const {return firstClone;}
    Clone *SecondClone() const {return secondClone;}
    ~ClonePair() {
	delete firstClone;
	delete secondClone;
    }
    FunctionPair GetFunctionPair() const {
	return
	  FunctionPair(firstClone->FileName(), firstClone->FuncName(),
		       secondClone->FileName(), secondClone->FuncName());
    }
    PRIMITIVE GetSTkList() const {return givenList;}
    int Size() const {return firstClone->Size();}
    bool Subsumes(const ClonePair &other, bool sameFunc) const {
	// We assume that first components of (*this) and "other" are from the
	// same function. We also assume the same about the second
	// components.

	// sameFunc should be true if all four components are from the same
	// function. Although this can be determined right here, receiving it
	// as a parameter makes things more efficient.
	
	return
	  (firstClone->Subsumes(*other.firstClone) &&
	   secondClone->Subsumes(*other.secondClone))
	  ||
	  (sameFunc &&
	   firstClone->Subsumes(*other.secondClone) &&
	   secondClone->Subsumes(*other.firstClone));
    }
    bool SameFunc() const {
	return
	  !strcmp(firstClone->FileName(), secondClone->FileName()) &&
	  !strcmp(firstClone->FuncName(), secondClone->FuncName());
    }

    OverlapOrientation
    IsOverlapping(const Clone::NodeSet &otherFirst,
		  const Clone::NodeSet &otherSecond,
		  int minIntersectionSize) const;
    // Assumption: otherFirst & the first component of "*this" are from the
    // same function f1, and that otherSecond and the second component of
    // "*this" are from the same function f2. f1 may or may not be equal to
    // f2.
    //
    // Return Straight if in the intersection of this->FirstClone() &
    // otherFirst (this->SecondClone() & otherSecond) there are atleast
    // minIntersectionSize nodes. If the above condition is false, and if
    // f1==f2, then also check the intersection sizes the other way --
    // intersecting this->FirstClone() with otherSecond and
    // this->SecondClone() with otherFirst. If this condition holds return
    // Crossed, else return NoOverlap.

};

struct HashClonePairPtr {
    size_t operator()(const ClonePair *x) const {return (size_t)x;}
};

class CompareFirstOvRoots {
    typedef hash_map<const ClonePair *, OverlapOrientation, HashClonePairPtr>
    OrientationsMap;
    const OrientationsMap &orientations;

  public:
    CompareFirstOvRoots(const OrientationsMap &orientations_a) :
	orientations(orientations_a) {}
    bool operator()(const ClonePair *first, const ClonePair *second) {
	// The assumption here is that there exists some "reference" clone
	// pair r such that each component of "first" ("second") overlaps some
	// component of r. orientations_a specifies the orientation of the
	// overlap. I.e., orientations[first] should be equal Straight if the
	// first (second) component of "first" overlaps the first (second)
	// component of r, and should be equal to Crossed
	// otherwise. orientations[second] should have a similar meaning. Let
	// first.f (second.f) be the component of "first" ("second") that
	// overlaps with the first component of r. This operator first uses
	// orientations to determine first.f and second.f. It then returns
	// true iff the root of first.f is equal to the root of second.f.
	//
	// Unpredictable things will happen if orientations[first] and
	// orientations[second] are not already defined.

	const Clone *firstDotf =
	  (*orientations.find(first)).second == Straight ?
	  first->FirstClone() :
	  first->SecondClone();

	const Clone *secondDotf =
	  (*orientations.find(second)).second == Straight ?
	  second->FirstClone() :
	  second->SecondClone();

	return firstDotf->Root() < secondDotf->Root();
    }
};

class CompareSecondOvRoots {
    typedef hash_map<const ClonePair *, OverlapOrientation, HashClonePairPtr>
    OrientationsMap;
    const OrientationsMap &orientations;

  public:
    CompareSecondOvRoots(const OrientationsMap &orientations_a) :
	orientations(orientations_a) {}
    bool operator()(const ClonePair *first, const ClonePair *second) {
	// See the comment for CompareFirstOvRoots. This one is similar,
	// except that it compares first.s with second.s. first.s (second.s)
	// is that component of "first" ("second") that overlaps the second
	// component of the reference r.

	const Clone *firstDots =
	  (*orientations.find(first)).second == Straight ?
	  first->SecondClone() :
	  first->FirstClone();

	const Clone *secondDots =
	  (*orientations.find(second)).second == Straight ?
	  second->SecondClone() :
	  second->FirstClone();

	return firstDots->Root() < secondDots->Root();
    }
};

struct HashVoidPtr{
    size_t operator()(const void *x) const {return (size_t)x;}
};

struct EqualToVoidPtr{
    size_t operator()(const void *x, const void *y) const {return x == y;}
};
	    
class CloneGroup : protected vector<const Clone *> {    
 public:
    CloneGroup() {}
    CloneGroup(SCM groupsAsList);
    void WriteSTkSexpr(ofstream &out);

    typedef size_type PosInGroup;
    PosInGroup addOneClone(ClonePair *p, WhichComponent whichCloneToAdd,
			   PosInGroup otherClonePos);
    // p is a clone pair. Adds one of the clones in p to "this" and
    // returns its position within "this".  whichCloneToAdd (= first /
    // second) indicates which of the clones in p is to be added.  The
    // other clone needs to be already present in "this" (e.g., if
    // whichCloneToAdd is "second", the first clone is the other
    // clone). otherClonePos indicates the position of the other clone
    // in "this".
    pair<PosInGroup, PosInGroup> addBothClones(ClonePair *p);
};
