/******************************************************************************
 *  This software module is part of the AT&T Self-Service Cloud (SSC) distribution.
 *
 *  Copyright (c) 2010-2013 by AT&T Intellectual Property. All rights reserved.
 *
 *  AT&T and the AT&T logo are trademarks of AT&T Intellectual Property.
 *
 *  Use of this software is permitted only as described by the Research
 *  Collaboration Agreement (RCA) in effect between AT&T, Rutgers, and 
 *  the organization to which this module has been delivered.  
 *  This software module may not be copied or otherwise distributed except 
 *  as described in the (RCA). 
 *
 *  This software is considered Confidential Information as described in section 7 of the RCA.
 *
 *  Information and Software Systems Research
 *  AT&T Labs
 *  Florham Park, NJ
 * patagonix.c                                                                *
 *                                                                            *
 * Author: Jeffrey Bickford                                                   *
 *                                                                            *
 * Purpose: Checks code pages of executables against known code in a          *
 *          database.                                                         * 
 *                                                                            *    
 * Usage: ./patagonix <domid>                                                 *
 ******************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
#include <sys/mman.h>
#include <gcrypt.h>
#include <sqlite3.h>
#include <utility>
#include <map>
#include <time.h>
#include <vector>
extern "C" {
#include <xenctrl.h>
}
using namespace std;

#define MAX_PAGE_NUM 4745

#define rdtscll(val) \
     __asm__ __volatile__("rdtsc" : "=A" (val))

unsigned char *build_memory_map(uint32_t domain);
xen_pfn_t *get_p2m_table(int xc_handle, uint32_t dom);
xen_pfn_t *get_m2p_table(int xc_handle, uint32_t dom);

void *patagonix_handle;
int xc_handle, dom;
sqlite3 *db, *kernel_db;
struct patagonix_info *info;
struct patagonix_time *times;
uint64_t patagonix_info_frame;
uint64_t patagonix_time_frame;
static xen_pfn_t *live_p2m;
static xen_pfn_t *live_m2p;
unsigned long p2m_size;	
unsigned char *memory_map;
int dcount, entrypoint_pagenum;
unsigned long entry_point_long;
unsigned long entry_point_pfn;
int found;
map<unsigned long, int> kernel_set;
FILE *log, *log_time;
int no_entry_point;
struct timeval start_exec;
struct timeval end_exec;
struct timeval total_time;
int round;
int kernel_checks;
int proc_checks;

typedef struct {
	unsigned long cr3;
	unsigned long entry_point;
	int file_id;
	map<int,int> checked_pages;
}entry_t;

multimap<unsigned long,entry_t> working_set_map;
multimap<unsigned long, entry_t> no_file_map;

entry_t *current_entry;

struct patagonix_entry {
	unsigned long mfn;
	unsigned long cr3;
	unsigned long va;
};

struct patagonix_info {
	unsigned long count;
	struct patagonix_entry entries[341];
};

struct patagonix_time {
	unsigned long count;
	unsigned long long time[341];
};

int get_shared_page(int dom,int xc_handle){
	unsigned long patagonix_info_frame;

	xc_dominfo_t info;

	if(xc_domain_getinfo(xc_handle,dom,1,&info)!=1) {
		printf("Unable to get info\n");
		exit(-1);
	}

	return 0;
}

char *hash_page(unsigned char *data)
{
    //unsigned int PAGE_SIZE = 0x1000;
    /* Length of resulting sha1 hash - gcry_md_get_algo_dlen
     * returns digest lenght for an algo */
    int hash_len = gcry_md_get_algo_dlen( GCRY_MD_SHA1 );

    /* output sha1 hash - this will be binary data */
    unsigned char hash[ hash_len ];

    /* output sha1 hash - converted to hex representation
     * 2 hex digits for every byte + 1 for trailing \0 */
    char *out = (char *) malloc( sizeof(char) * ((hash_len*2)+1) );
    char *p = out;

    /* calculate the SHA1 digest. This is a bit of a shortcut function
     * most gcrypt operations require the creation of a handle, etc. */
    gcry_md_hash_buffer( GCRY_MD_SHA1, hash, data, PAGE_SIZE );

    /* Convert each byte to its 2 digit ascii
     * hex representation and place in out */
    int i;
    for ( i = 0; i < hash_len; i++, p += 2 ) {
        snprintf ( p, 3, "%02x", hash[i] );
    }

   return out;
   //return *hash;
}

/* Called when we need to check a hash on the first page of a binary.   */
/* It is called when we find a match on an entry point in the database. */
int callback(void *NotUsed, int argc, char **argv, char **azColName)
{	
	char sqlbuf[0x1000];	
	int rc;
	sqlite3_stmt *pStatement;
	int match = 0;

	/* if there's a match, check hash of entry point page */
	char *file_id = argv[0];
	char *name = argv[1];
	char *entry_point = argv[2];
	char *page_num = argv[3];
	char *entry_point_mod = argv[4];
	
	fprintf(log,"\nfile_id = %s\n", file_id);	
	fprintf(log,"name = %s\n", name);			
	fprintf(log,"entry_point = %s\n", entry_point);
	fprintf(log,"cr3: 0x%x\n",info->entries[dcount].cr3);
	entrypoint_pagenum = atoi(page_num);

	entry_point_long = strtol(entry_point, NULL, 0);	
	entry_point_pfn = entry_point_long >> 12;
	
	sprintf(sqlbuf, "SELECT * FROM hashes where file_id = %s",file_id);

	/* find hash, and call check_hash to check against memory pages */
	
	rc = sqlite3_prepare_v2(db,sqlbuf,-1, &pStatement, NULL);
	while (rc == SQLITE_OK && sqlite3_step(pStatement) == SQLITE_ROW) {
		char *database_hash = (char *)sqlite3_column_text(pStatement,2);
		int current_pagenum = atoi((const char *)sqlite3_column_text(pStatement,1));
	
		/* check first page of the file */
		fprintf(log,"entry pagenum: %d current %d\n",entrypoint_pagenum,current_pagenum);
		
		if(entrypoint_pagenum == current_pagenum) {
			unsigned long pfn = live_m2p[info->entries[dcount].mfn];
			char *current_hash = hash_page(memory_map + (pfn * 0x1000));
			
			proc_checks++;
	
			/* if the first page is a match, set flag to continue */
			if(strcmp(current_hash,database_hash) == 0){
				fprintf(log,"first page match\n");
				match = 1;
				
				entry_t node;

				node.cr3 = info->entries[dcount].cr3;
				node.entry_point = entry_point_long; 
				node.file_id = atoi(file_id);

				/* add node to a working set of running binaries */
				working_set_map.insert(make_pair(node.cr3,node));
		
				fprintf(log,"%s %s MATCH on entry point %s\n",file_id,name,entry_point);				
				no_entry_point = 0;
				break;	
			}
			else { 
				fprintf(log,"nomatch current: %s database: %s\n",current_hash,database_hash);
				match = 0;
				break;
			}
		}
	}
	
	sqlite3_finalize(pStatement);

	return 0;
}

/* Checks to see if this cr3 is related to a process */
/* not in our database.                              */
int check_no_file(unsigned long cr3, unsigned long va) 
{
	multimap<unsigned long, entry_t>::iterator iter = no_file_map.find(cr3);
	
	if(iter != no_file_map.end()) {
		pair<multimap<unsigned long,entry_t>::iterator,multimap<unsigned long,entry_t>::iterator> entries;
		entries = no_file_map.equal_range(cr3);
		multimap<unsigned long,entry_t>::iterator itr;

		for(itr=entries.first; itr!=entries.second; itr++) {			
			entry_t *entry = &(itr->second);

			if(entry->entry_point == va)
				return 1;
		}
	}
	
	return 0;
}

/* The cr3 corresponding to this va has not been checked yet.         */
/* We know that the va passed is the entry point of the binary.       */
/* Identify the binary corresponding to this va and check first page. */
int find_file(unsigned long va) {
	int rc;
	char sqlbuf[0x1000];
	char *zErrMsg = 0;

    /* check to see if we already know this address is not an entry point */
	if(check_no_file(info->entries[dcount].cr3,va) == 1) {
		fprintf(log,"va: 0x%x already found to be not an entry_point\n");
		return 0;
	}
	
	sprintf(sqlbuf, "SELECT * FROM files WHERE entry_point = %lu;",va);

	no_entry_point = 1;
	
	/* check to see if entry point is in database */
	/* if so, check page of memory in callback function */
	fprintf(log,"check for entry point 0x%x\n",va);
	rc = sqlite3_exec(db, sqlbuf, callback, 0, &zErrMsg);

	if(no_entry_point == 1) {

		fprintf(log,"not an entry point\n");

		entry_t node;

		node.cr3 = info->entries[dcount].cr3;
		node.entry_point = va; 
		node.file_id = -1;

		no_file_map.insert(make_pair(node.cr3,node));
	}

	if(rc != SQLITE_OK) {
		fprintf(stderr, "SQL error: %s\n", zErrMsg);
		sqlite3_free(zErrMsg);
	}

	return rc;
}

/* Called when we get a fault on a binary already identified by a cr3 */ 
int cr3_file_match(void *NotUsed, int argc, char **argv, char **azColName)
{	
	char sqlbuf[0x1000];	
	int rc;
	sqlite3_stmt *pStatement;
	int vaddr_page_num;	

	/* if there's a match, check hash of entry point page */
	char *file_id = argv[0];
	char *name = argv[1];
	char *entry_point = argv[2];
	char *page_num = argv[3];
	char *entry_point_mod = argv[4];

	unsigned long vaddr = info->entries[dcount].va;
	
	entrypoint_pagenum = atoi(page_num);
	entry_point_long = strtol(entry_point, NULL, 0);	
	entry_point_pfn = entry_point_long >> 12;

	vaddr_page_num = (vaddr >> 12) - entry_point_pfn + entrypoint_pagenum;

	map<int, int>::iterator iter = current_entry->checked_pages.find(vaddr_page_num);

	if(iter == current_entry->checked_pages.end()) {

		fprintf(log,"check match name: %s vaddr: 0x%x vadd_page_num: %d\n",name,vaddr,vaddr_page_num);

		/* ugly for now must figure out what to do instead */
		if(vaddr_page_num <= MAX_PAGE_NUM) {

			sprintf(sqlbuf, "SELECT * FROM hashes WHERE file_id = %s and page_num = %d;", file_id, vaddr_page_num);

			rc = sqlite3_prepare_v2(db,sqlbuf,-1, &pStatement, NULL);
			while (rc == SQLITE_OK && sqlite3_step(pStatement) == SQLITE_ROW) {

				char *database_hash = (char *)sqlite3_column_text(pStatement,2);

				unsigned long mach = info->entries[dcount].mfn;			
				uint32_t pfn = live_m2p[mach];
				char *current_hash = hash_page(memory_map + (pfn*0x1000));
		
				proc_checks++;
			
				if(strcmp(current_hash,database_hash) == 0){
					fprintf(log,"%s %s MATCH on vaddr: 0x%x page num: %d\n",file_id,name,vaddr,vaddr_page_num);
					current_entry->checked_pages.insert(make_pair(vaddr_page_num,1));
					found = 1;
				}
				else	
					fprintf(log,"WARNING %s %s NO NATCH on vaddr: 0x%x page num: %d\n",file_id,name,vaddr,vaddr_page_num);
			
				/* should be only 1 entry from database so break */
				break;
			}

			sqlite3_finalize(pStatement);
		}
	} else {
		fprintf(log,"%s %s vaddr: 0x%x ALREADY VALIDATED\n",file_id,name,vaddr,vaddr_page_num);	
	}
	
	return 0;
}

/* Check if the faulting cr3 is in our working set of binaries. */
/* If so, check the corresponding page of that binary.          */
int find_working_entry(unsigned long cr3) 
{

	multimap<unsigned long, entry_t>::iterator iter = working_set_map.find(cr3);
	found = 0;
	
	if(iter != working_set_map.end()) {
		fprintf(log,"found working set entry for cr3 0x%x\n",cr3);
	
		pair<multimap<unsigned long,entry_t>::iterator,multimap<unsigned long,entry_t>::iterator> entries;
		entries = working_set_map.equal_range(cr3);
		multimap<unsigned long,entry_t>::iterator itr;

		for(itr=entries.first; itr!=entries.second; itr++) {			
        		int rc;
        		char sqlbuf[0x1000];
        		char *zErrMsg = 0;			

			current_entry = &(itr->second);

			sprintf(sqlbuf, "SELECT * FROM files WHERE file_id = %d;",itr->second.file_id);

			rc = sqlite3_exec(db, sqlbuf, cr3_file_match, 0, &zErrMsg);

			if(rc != SQLITE_OK) {
				fprintf(stderr, "SQL error: %s\n", zErrMsg);
				sqlite3_free(zErrMsg);
			}			
		}
	}

	/* if there is a file match return 1 else return 0 */
	return found;
}

void my_handler(int s) {
	printf("caught signal %d\n",s);
	printf("kernel checks: %d user checks: %d\n",kernel_checks,proc_checks); 
	xc_patagonix_destroy(&patagonix_handle);
	sqlite3_close(db);
	sqlite3_close(kernel_db);
	fclose(log);
	exit(1);
}

bool in_rootproc_list(unsigned long entry_point)
{
	vector<unsigned long> rootproc_list;

	rootproc_list.push_back(134518816);
	rootproc_list.push_back(6720);
	rootproc_list.push_back(134530624);
	rootproc_list.push_back(134517600);
	rootproc_list.push_back(134516080);
	rootproc_list.push_back(134519248);
	rootproc_list.push_back(134596544);
	rootproc_list.push_back(27232);
	rootproc_list.push_back(134517280);

	for(int i=0; i<rootproc_list.size(); i++)
	{
		if(rootproc_list[i] == entry_point)
		{
			return(true);
		}
	}
	return(false);
}


int is_domain_paused(int xc_handle,int dom){

	xc_dominfo_t info;

	if(xc_domain_getinfo(xc_handle,dom,1,&info)!=1) {
		printf("Unable to get info\n");
		exit(-1);
	}

	return info.paused;
}

int main(int argc, char **argv) 
{

	int frc,errno,rc;

    if(argc != 2) {
        printf("usage: %s dom_id\n",argv[0]);
        return 0;
    }
	signal(SIGINT,my_handler);

	dom = atoi(argv[1]);

	live_p2m = NULL;
	live_m2p = NULL;

	total_time.tv_sec = 0;
	total_time.tv_usec = 0;
	round = 0;
	proc_checks = 0;
	kernel_checks = 0;

	rc = sqlite3_open("/dev/shm/test.db",&db);
	//rc = sqlite3_open("/root/test.db",&db);
	//rc = sqlite3_open("/home/shakeelb/research/mobisys/patagonix/database/test.db",&db);

	if(rc) {
		fprintf(stderr, "Can't open database: %s",sqlite3_errmsg(db));
		sqlite3_close(db);
		exit(1);
	}
	
	//rc = sqlite3_open("/home/shakeelb/research/mobisys/patagonix/database/kernel.db",&kernel_db);
	//rc = sqlite3_open("/root/kernel.db",&kernel_db);
	rc = sqlite3_open("/dev/shm/kernel.db",&kernel_db);

	if(rc) {
		fprintf(stderr, "Can't open database: %s",sqlite3_errmsg(kernel_db));
		sqlite3_close(kernel_db);
		exit(1);
	}

	//log = fopen("/home/shakeelb/research/mobisys/patagonix/log/log.txt","w");
	log = fopen("/dev/shm/log.txt","w");
	
	if(log == NULL) {
		fprintf(stderr, "Can't open log file.\n");
		exit(1);
	}	
	
	//log_time = fopen("/home/shakeelb/research/mobisys/patagonix/log/log_time.txt","w");
	log_time = fopen("/dev/shm/log_time.txt","w");
	
	if(log_time == NULL) {
		fprintf(stderr, "Can't open log_time file.\n");
		exit(1);
	}	

	xc_handle = xc_interface_open();

	if(xc_handle == -1) {
		fprintf(stderr, "Unable to get handle to hypervisor\n");
		exit(-1);
	}
	
	memory_map = (unsigned char *)build_memory_map(dom);
	live_p2m = (xen_pfn_t *) get_p2m_table(xc_handle, dom);
	live_m2p = (xen_pfn_t *) get_m2p_table(xc_handle, dom);

	p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom) + 1;


	if(xc_patagonix_get_shared_page(xc_handle,dom,&patagonix_info_frame, &patagonix_time_frame) < 0) {
		fprintf(stderr, "Unable to get patagonix shared page\n");
		exit(-1);
	}

	fprintf(log,"shared patagonix page addr: 0x%lx\n",patagonix_info_frame);
	fprintf(log,"shared patagonix time addr: 0x%lx\n",patagonix_time_frame);

	info = (struct patagonix_info *)xc_map_foreign_range(xc_handle, dom,
						0x1000, 
						PROT_READ | PROT_WRITE,
						patagonix_info_frame);

	times = (struct patagonix_time *)xc_map_foreign_range(xc_handle, dom,
						0x1000, 
						PROT_READ | PROT_WRITE,
						patagonix_time_frame);

	/* Live suspend. Enable log-dirty mode. */
	if ( xc_shadow_control(xc_handle, dom,
							XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
							NULL, 0, NULL, 0, NULL) < 0 )
	{
		/* log-dirty already enabled? There's no test op,
		 * so attempt to disable then reenable it */
		frc = xc_shadow_control(xc_handle, dom, 
					XEN_DOMCTL_SHADOW_OP_OFF,
					NULL, 0, NULL, 0, NULL);
		if ( frc >= 0 )
		{
			frc = xc_shadow_control(xc_handle, dom,
									XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
									NULL, 0, NULL, 0, NULL);
		}

		if ( frc < 0 )
		{
			fprintf(stderr,"Couldn't enable shadow mode (rc %d) (errno %d)", frc, errno );
			exit(-1);
		}
	}

	patagonix_handle = (void *)xc_patagonix_prepare(xc_handle,dom);
	
	if(patagonix_handle == NULL)
		printf("Unable to get patagonix handle\n");

	int start = xc_patagonix_start(patagonix_handle);
	if (start < 0)
		printf("Unable to start patagonix\n");

	dcount = 0;
	
	fprintf(log,"start patagonix\n");	

    while(is_domain_paused(xc_handle,dom));

	while(1)
	{
		xc_patagonix_wait(patagonix_handle);
//printf("info->count %d\n",info->count);return 0;		
		xc_domain_pause(xc_handle, dom);
    /*    if(info->count == 0) {
            xc_domain_unpause(xc_handle, dom);
            continue;
        }*/
		gettimeofday(&start_exec,NULL);
	//	printf("cr3: 0x%lx mfn: 0x%lx va: 0x%lx\n",
	//		info->entries[dcount].cr3,
	//		info->entries[dcount].mfn,
	//		info->entries[dcount].va);			
		
		while(dcount < info->count){	
			unsigned long va = info->entries[dcount].va;		
				
			unsigned long long verify_time;
			rdtscll(verify_time);
			unsigned long long diff = verify_time - times->time[dcount];	
		
			//if(verify_time < times->time[dcount])
			//	printf("wrong\n");			
		
	//		printf("verify: %llu times: %llu\n",verify_time, times->time[dcount]);
		
			fprintf(log_time,"%llu %llu %llu\n",verify_time, times->time[dcount],diff);

			/* there are no virtual addresses in our database starting with 0xb and 0xa */
			if((va >> 28) == 0xc || (va >> 28) == 0xb || (va >> 28) == 0xa) {
				/* if the va is within the kernel range that contains code */
				if((va >= 0xc0100000 && va < 0xc0373000) || (va >= 0xc03bd000 && va < 0xc03d600)) {
					char sqlbuf[0x1000];	
					int rc;
					sqlite3_stmt *pStatement;
					map<unsigned long, int>::iterator iter = kernel_set.find(va);

					/* if we did not check this kernel address yet */
					if(iter == kernel_set.end()) {
								
						sprintf(sqlbuf, "SELECT * FROM hashes WHERE vaddr = '0x%lx'",(va >> 12) << 12);
						rc = sqlite3_prepare_v2(kernel_db,sqlbuf,-1, &pStatement, NULL);

						while (rc == SQLITE_OK && sqlite3_step(pStatement) == SQLITE_ROW) {
							char *database_hash = (char *)sqlite3_column_text(pStatement,1);
	
							unsigned long mach = info->entries[dcount].mfn;			
							uint32_t pfn = live_m2p[mach];
							char *current_hash = hash_page(memory_map + (pfn*0x1000));
							fprintf(log,"current: %s database: %s\n",current_hash, database_hash);
			
							kernel_checks++;
						
							if(strcmp(current_hash,database_hash) == 0){
								fprintf(log, "kernel validated at vaddr: 0x%x\n",va);
								kernel_set.insert(make_pair(va,1)); // add to checked kernel addresses
							}
							else
								fprintf(log, "kernel NOT validated at vaddr: 0x%x\n",va);
			
						}		
	
						sqlite3_finalize(pStatement);
					}// else {
					//	fprintf(log,"addr: 0x%x already validated\n",va);
					//}
				}
		 	} else {	
				if(find_working_entry(info->entries[dcount].cr3) == 0){
		//			if(in_rootproc_list(va))
						find_file(va);
				}
			}

			dcount++;	
		}	
		
		info->count = 0;
		times->count = 0;
		dcount = 0;

		gettimeofday(&end_exec,NULL);
		int total = (end_exec.tv_sec - start_exec.tv_sec)*1000000 + end_exec.tv_usec - start_exec.tv_usec;
		total_time.tv_usec += total;
		total_time.tv_sec += total_time.tv_usec / 1000000;
		total_time.tv_usec = total_time.tv_usec % 1000000;	
		
		round++;
		fprintf(log,"round %d done\n",round);
		fprintf(log,"round sec: %d round usec: %d\n",end_exec.tv_sec-start_exec.tv_sec,end_exec.tv_usec-start_exec.tv_usec);
		fprintf(log,"total sec: %d total usec: %d\n",total_time.tv_sec,total_time.tv_usec);
		xc_domain_unpause(xc_handle,dom);
	}	
}

