
	/*
	** This program parses an EMA response CSV file
	** and a dietary data CSV file.
	** It correlates them to create output:
	**
	** per meal:
	** (1) PID (participant ID)
	** (2) date
	** (3) time
	** (4) meal type (planned, lapse)
	** (5) delta match (hr from closest EMA to the dietary report)
	** (6) survey ID (EMA identifier used to report planned/lapse)
	** (7)-(17) info from dietary report about the meal (copied verbatim)
	**
	** Delta match time is the nearest EMA to this dietary report;
	** can be positive or negative (printing in hours,
	** storing in program in sec).
	**
        ** In EMA file, subject ID# is taken from field B.  Lapse meal
	** days/times are taken from field N.  Planned meal days/times
	** are taken from field T.
	**
	** The date range to be processed is the union of all dates
	** covered in both the EMA and dietary data files.
	*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define	DEBUG		0
#define	MAX_EMA		1000
#define	MAX_DIET	100
#define	MAX_MEALS	1000
#define	MAX_CALENDAR	365	/* units are days */
#define	DELTA_W		7200	/* units are seconds */
#define	SEPARATOR	','

#define	SWAP_INT(x,y)	{ swap_int=x; x=y; y=swap_int; }
#define	SWAP_DOUBLE(x,y) { swap_double=x; x=y; y=swap_double; }
#define	SWAP_STR(x,y)	{strcpy(swap_str,x); strcpy(x,y); strcpy(y,swap_str);}

	/* conversion functions unix-to-timestamp (below) */
void DateFromTimestamp();
int TimeStamp(unsigned char, unsigned char, unsigned char,
		unsigned char, unsigned char, unsigned char);


int main(int argc, char *argv[])

{
FILE		*fpt;
int		i,j,k,field,length;
int		a,b,c,d,e,subID;
int		TotalEMA,ValidEMA;
char		text[320],byte;
int		ema_year,ema_month,ema_day;
int		ema_hour,ema_minute,ema_second;
int		ema_timestamp[MAX_EMA];
int		smallest;
char		swap_str[320];
int		swap_int;
double		swap_double;
int		meal_year,meal_month,meal_day;
int		meal_hour,meal_minute,meal_second;
int		meal_timestamp[MAX_MEALS];
char		meal_name[MAX_MEALS][80];
int		meal_surv_num[MAX_MEALS],SurveyNumber;
int		TotalMeals;

int		TotalDiet;
int		diet_year,diet_month,diet_day;
int		diet_hour,diet_minute,diet_second;
int		diet_timestamp[MAX_DIET];
char		diet_details[MAX_DIET][11][16];

char            s_filename[320],s_start[32],s_end[32];
unsigned char   cyear,cmonth,cday,chour,cminute,csecond;
int		eyear,emonth,eday,ehour,eminute,esecond;
int		iyear,imonth,iday,ihour,iminute,isecond;

			/* meal index matched to this diet record */
int		match_diet_to_ema[MAX_DIET];
			/* time (sec) between matched records */
int		delta_diet_to_ema[MAX_DIET];
			/* diet index matched to this EMA meal record */
int		match_ema_to_diet[MAX_MEALS];
			/* time (sec) between matched records */
int		delta_ema_to_diet[MAX_MEALS];
int		closest_index,closest_diff,time_diff;
double		hr_diff;
int		output;


if (argc != 5)
  {
  printf("Usage:  match-ema-diet [subID] [ema.csv] [diet.csv] [output]\n");
  exit(0);
  }

subID=atoi(argv[1]);
if (subID < 101  ||  subID > 141)
  {
  printf("Expected subject ID >= 101 and <= 141\n");
  exit(0);
  }

output=atoi(argv[4]);
if (output < 1  ||  output > 2)
  {
  printf("output must be 1 (ema->diet match) or 2 (diet->ema match)\n");
  exit(0);
  }


	/* read the EMA file */
if ((fpt=fopen(argv[2],"r")) == NULL)
  {
  printf("Unable to open %s for reading\n",argv[1]);
  exit(0);
  }
TotalMeals=0;
TotalEMA=0;
field=0;
length=0;
while (1)
  {
  i=fread(&byte,1,1,fpt);
  if (i == 1  &&  byte == '\r')
    continue;
  if (i == 0  ||  byte == ','  ||  byte == '\n')
    {
    text[length]=0;
    if (DEBUG)
      printf("%d => %s\n",field,text);
    if (field == 0)
      {
      if (atoi(text) > 0)	/* survey number */
        {
        ValidEMA=1;
	SurveyNumber=atoi(text);
	}
      else
        ValidEMA=0;
      }
    if (field == 1)
      {
      if (atoi(text) != subID)
	ValidEMA=0;	/* wrong person */
      }
    if (ValidEMA == 1  &&  field == 9)	/* EMA session end day/time */
      {
      if (strlen(text) > 1)
        {
        a=0;
        while (text[a] != '/'  &&  text[a] != 0)
          a++;
        b=a+1;
        while (text[b] != '/'  &&  text[b] != 0)
          b++;
        c=b+1;
        while (text[c] != ' '  &&  text[c] != 0)
          c++;
        d=c+1;
        while (text[d] != ':'  &&  text[d] != 0)
          d++;
        e=d+1;
        while (text[e] != ':'  &&  text[e] != 0)
          e++;
        text[a]=text[b]=text[c]=text[d]=0;
        ema_month=atoi(&(text[0]));
        ema_day=atoi(&(text[a+1]));
        ema_year=atoi(&(text[b+1]));
        ema_hour=atoi(&(text[c+1]));
        ema_minute=atoi(&(text[d+1]));
        ema_second=atoi(&(text[e+1]));
		/* convert date to unix-timestamps */
        ema_timestamp[TotalEMA]=TimeStamp((unsigned char)(ema_year-2000),
		(unsigned char)(ema_month),
		(unsigned char)(ema_day),
		(unsigned char)ema_hour,
		(unsigned char)ema_minute,
		(unsigned char)ema_second);
	TotalEMA++;
        if (TotalEMA >= MAX_EMA)
	  {
	  printf("MAX_EMA (%d) exceeded\n",MAX_EMA);
	  exit(0);
	  }
        }
      }
    if (ValidEMA == 1  &&
        (field == 13  ||  field == 19)) /* 13=lapse, 19=planned */
      {
      if (strlen(text) > 1)
        {
        a=0;
        while (text[a] != '/'  &&  text[a] != 0)
          a++;
        b=a+1;
        while (text[b] != '/'  &&  text[b] != 0)
          b++;
        c=b+1;
        while (text[c] != ' '  &&  text[c] != 0)
          c++;
        d=c+1;
        while (text[d] != ':'  &&  text[d] != 0)
          d++;
        text[a]=text[b]=text[c]=0;
        meal_month=atoi(&(text[0]));
        meal_day=atoi(&(text[a+1]));
        meal_year=atoi(&(text[b+1]));
        meal_hour=atoi(&(text[c+1]));
        meal_minute=atoi(&(text[d+1]));
	meal_second=0;
        if (field == 13)
          strcpy(meal_name[TotalMeals],"lapse");
        else
          strcpy(meal_name[TotalMeals],"planned");
		/* convert date to unix-timestamps */
        meal_timestamp[TotalMeals]=TimeStamp((unsigned char)(meal_year-2000),
		(unsigned char)(meal_month),
		(unsigned char)(meal_day),
		(unsigned char)meal_hour,
		(unsigned char)meal_minute,
		(unsigned char)meal_second);
	meal_surv_num[TotalMeals]=SurveyNumber;
        TotalMeals++;
        }
      }
    field++;
    length=0;
    if (i == 0)
      break;
    if (byte == '\n')
      {
      if (ValidEMA)
        {
        }
      field=0;
      }
    continue;
    }
  text[length]=byte;
  length++;
  }
fclose(fpt);

        /* sort EMA timestamps */
for (i=0; i<TotalEMA; i++)
  {
  smallest=i;
  for (j=i+1; j<TotalEMA; j++)
    if (ema_timestamp[j] < ema_timestamp[smallest])
      smallest=j;
  if (smallest != i)
    {
    SWAP_INT(ema_timestamp[i],ema_timestamp[smallest]);
    }
  }


if (0)	/* print out all EMAs */
for (i=0; i<TotalEMA; i++)
  {
  DateFromTimestamp(ema_timestamp[i],&ema_year,&ema_month,&ema_day,
	&ema_hour,&ema_minute,&ema_second);
  printf("%d-%d-%d\t%d\n",
	ema_year+2000,ema_month+1,ema_day+1,
	ema_timestamp[i]);
  }

        /* sort meal timestamps */
if (0)
for (i=0; i<TotalMeals; i++)
  {
  smallest=i;
  for (j=i+1; j<TotalMeals; j++)
    if (meal_timestamp[j] < meal_timestamp[smallest])
      smallest=j;
  if (smallest != i)
    {
    SWAP_INT(meal_timestamp[i],meal_timestamp[smallest]);
    SWAP_INT(meal_surv_num[i],meal_surv_num[smallest]);
    SWAP_STR(meal_name[i],meal_name[smallest]);
	/* need to swap more fields here if I want to use this */
    }
  }

if (0)	/* print out all EMA meals */
for (i=0; i<TotalMeals; i++)
  {
  DateFromTimestamp(meal_timestamp[i],&meal_year,&meal_month,&meal_day,
	&meal_hour,&meal_minute,&meal_second);
  printf("%d-%d-%d\t%d\t%d\t%s\n",
	meal_year+2000,meal_month+1,meal_day+1,
	meal_timestamp[i],meal_surv_num[i],meal_name[i]);
  }



	/* read the diet data file */
if ((fpt=fopen(argv[3],"r")) == NULL)
  {
  printf("Unable to open %s for reading\n",argv[1]);
  exit(0);
  }
TotalDiet=0;
field=0;
length=0;
while (1)
  {
  i=fread(&byte,1,1,fpt);
  if (i == 1  &&  byte == '\r')
    continue;
  if (i == 0  ||  byte == ','  ||  byte == '\n')
    {
    text[length]=0;
    if (DEBUG)
      printf("%d => %s\n",field,text);
    if (field == 0)
      {
      if (atoi(text) == subID)
	ValidEMA=1;	/* right person */
      else
	ValidEMA=0;	/* wrong person */
      }
    if (ValidEMA == 1  &&  field == 1)	/* diet day */
      {
      if (strlen(text) > 1)
        {
        a=0;
        while (text[a] != '/'  &&  text[a] != 0)
          a++;
        b=a+1;
        while (text[b] != '/'  &&  text[b] != 0)
          b++;
        text[a]=text[b]=0;
        diet_month=atoi(&(text[0]));
        diet_day=atoi(&(text[a+1]));
        diet_year=atoi(&(text[b+1]));
	}
      }
    if (ValidEMA == 1  &&  field == 2)	/* diet time */
      {
      if (strlen(text) > 1)
	{
        a=0;
        while (text[a] != ':'  &&  text[a] != 0)
          a++;
        b=a+1;
        while (text[b] != ':'  &&  text[b] != 0)
          b++;
        text[a]=text[b]=0;
        diet_hour=atoi(&(text[0]));
        diet_minute=atoi(&(text[a+1]));
        diet_second=atoi(&(text[b+1]));
		/* convert date to unix-timestamps */
        diet_timestamp[TotalDiet]=TimeStamp((unsigned char)(diet_year-2000),
		(unsigned char)(diet_month),
		(unsigned char)(diet_day),
		(unsigned char)diet_hour,
		(unsigned char)diet_minute,
		(unsigned char)diet_second);
	TotalDiet++;
        if (TotalDiet >= MAX_DIET)
	  {
	  printf("MAX_DIET (%d) exceeded\n",MAX_DIET);
	  exit(0);
	  }
        }
      }
    if (ValidEMA == 1  &&  (field >= 3  &&  field <= 13))  /* details */
      {
      strcpy(diet_details[TotalDiet-1][field-3],text);
      }
    field++;
    length=0;
    if (i == 0)
      break;
    if (byte == '\n')
      {
      if (ValidEMA)
        {
        }
      field=0;
      }
    continue;
    }
  text[length]=byte;
  length++;
  }
fclose(fpt);

        /* sort diet timestamps */
for (i=0; i<TotalDiet; i++)
  {
  smallest=i;
  for (j=i+1; j<TotalDiet; j++)
    if (diet_timestamp[j] < diet_timestamp[smallest])
      smallest=j;
  if (smallest != i)
    {
    SWAP_INT(diet_timestamp[i],diet_timestamp[smallest]);
    for (k=0; k<11; k++)
      SWAP_STR(diet_details[i][k],diet_details[smallest][k]);
    }
  }

if (0)	/* print out all diet records */
{
for (i=0; i<TotalDiet; i++)
  {
  DateFromTimestamp(diet_timestamp[i],&diet_year,&diet_month,&diet_day,
	&diet_hour,&diet_minute,&diet_second);
  printf("%d-%d-%d\t%2d:%02d:%02d\t%d\t%s\n",
	diet_year+2000,diet_month+1,diet_day+1,
	diet_hour,diet_minute,diet_second,
	diet_timestamp[i],diet_details[i][2]);
  }
}



	/*
	** For each EMA meal, find its closest diet record.  If that record
	** already is connected to another EMA meal, determine if the new
	** EMA meal is closer.  If it is closer, unmatch the previous EMA
	** meal (it will be left unmatched) and match the new EMA meal.
	*/

for (i=0; i<TotalMeals; i++)
  match_ema_to_diet[i]=-1;	/* unmatched */
for (i=0; i<TotalDiet; i++)
  match_diet_to_ema[i]=-1;	/* unmatched */
for (i=0; i<TotalMeals; i++)
  {
  closest_index=-1;
  for (j=0; j<TotalDiet; j++)
    {
    DateFromTimestamp(diet_timestamp[i],&diet_year,&diet_month,&diet_day,
	&diet_hour,&diet_minute,&diet_second);
    time_diff=abs(meal_timestamp[i]-diet_timestamp[j]);
    if (closest_index == -1  ||  time_diff < closest_diff)
      {
      closest_index=j;
      closest_diff=time_diff;
      }
    }
  if (closest_diff > 60*60*4)	/* 4 hours max */
    continue;	/* no possible match */
  if (match_diet_to_ema[closest_index] != -1)
    {		/* this diet record already matched to another EMA meal */
    if (closest_diff < abs(delta_diet_to_ema[closest_index]))
      {	/* but this match is even closer */
      match_ema_to_diet[match_diet_to_ema[closest_index]]=-1;	/* unmatch */
		/* new match */
      match_ema_to_diet[i]=closest_index;
      delta_ema_to_diet[i]=closest_diff;
      match_diet_to_ema[closest_index]=i;
      delta_diet_to_ema[closest_index]=closest_diff;
      }
    }
  else	/* new match */
    {
    match_ema_to_diet[i]=closest_index;
    delta_ema_to_diet[i]=closest_diff;
    match_diet_to_ema[closest_index]=i;
    delta_diet_to_ema[closest_index]=closest_diff;
    }
  }


	/* print out each EMA meal with corresponding match */
if (output == 1)
for (i=0; i<TotalMeals; i++)
  {
  DateFromTimestamp(meal_timestamp[i],&meal_year,&meal_month,&meal_day,
	&meal_hour,&meal_minute,&meal_second);
  printf("%d%c%d-%d-%d%c%2d:%02d:%02d%c%d%c%s",
	subID,SEPARATOR,
	meal_year+2000,meal_month+1,meal_day+1,SEPARATOR,
	meal_hour,meal_minute,meal_second,SEPARATOR,
	meal_surv_num[i],SEPARATOR,meal_name[i]);
  if (match_ema_to_diet[i] != -1)
    {
    hr_diff=(double)(diet_timestamp[match_ema_to_diet[i]]-meal_timestamp[i])
	/(double)(60*60);	/* units are hr */
    DateFromTimestamp(diet_timestamp[match_ema_to_diet[i]],
	&diet_year,&diet_month,&diet_day,
	&diet_hour,&diet_minute,&diet_second);
    printf("%c%2d:%02d:%02d%c%.1lf",
	SEPARATOR,diet_hour,diet_minute,diet_second,
	SEPARATOR,hr_diff);
    for (j=0; j<11; j++)
      printf("%c%s",SEPARATOR,diet_details[match_ema_to_diet[i]][j]);
    printf("\n");
    }
  else
    {
    printf("%cunmatched",SEPARATOR);
    for (j=0; j<12; j++)
      printf("%cNA",SEPARATOR);
    printf("\n");
    }
  }


	/* print out each diet record with corresponding match */
if (output == 2)
for (i=0; i<TotalDiet; i++)
  {
  DateFromTimestamp(diet_timestamp[i],&diet_year,&diet_month,&diet_day,
	&diet_hour,&diet_minute,&diet_second);
  printf("%d%c%d-%d-%d%c%2d:%02d:%02d",
	subID,SEPARATOR,
	diet_year+2000,diet_month+1,diet_day+1,SEPARATOR,
	diet_hour,diet_minute,diet_second);
  for (j=0; j<11; j++)
    printf("%c%s",SEPARATOR,diet_details[i][j]);
  if (match_diet_to_ema[i] != -1)
    {
    hr_diff=(double)(meal_timestamp[match_diet_to_ema[i]]-diet_timestamp[i])
	/(double)(60*60);	/* units are hr */
    DateFromTimestamp(meal_timestamp[match_diet_to_ema[i]],
	&ema_year,&ema_month,&ema_day,
	&ema_hour,&ema_minute,&ema_second);
    printf("%c%2d:%02d:%02d%c%.1lf%c%d%c%s\n",SEPARATOR,
	ema_hour,ema_minute,ema_second,
	SEPARATOR,hr_diff,
	SEPARATOR,meal_surv_num[match_diet_to_ema[i]],
	SEPARATOR,meal_name[match_diet_to_ema[i]]);
    }
  else
    printf("%cunmatched%cNA%cNA%cNA\n",
	SEPARATOR,SEPARATOR,SEPARATOR,SEPARATOR);
  }

}
	






	/* convert a seconds-since-Jan1,2000 timestamp into date components */
void DateFromTimestamp(unsigned int timestamp,
                        int     *Year,		  /* 0...256 (can be added to 2000 to get 4-digit year) */
                        int     *Month,		  /* 0...11 */
                        int     *Day,		  /* 0...30 */
                        int     *Hour,		  /* 0...23 */
                        int     *Min,		  /* 0...59 */
                        int     *Sec)		  /* 0...59 */

{
unsigned int    total_seconds;
unsigned int    days_in_month[12]={31,28,31,30,31,30,31,31,30,31,30,31};

total_seconds=timestamp;
*Year=0;
while (total_seconds >= 31536000)
  {
  if ((*Year)%4 == 0)
    {
    if (total_seconds < 31536000+86400)
      break;
    total_seconds-=(31536000+86400);
    }
  else
    total_seconds-=(31536000);
  (*Year)++;
  }
if ((*Year)%4 == 0)
  days_in_month[1]=29;	/* leap year */
*Month=0;
while (total_seconds >= days_in_month[*Month]*86400)
  {
  total_seconds-=(days_in_month[*Month]*86400);
  (*Month)++;
  }
*Day=0;
while (total_seconds >= 86400)
  {
  total_seconds-=(86400);
  (*Day)++;
  }
*Hour=(total_seconds/3600);
total_seconds-=((*Hour)*3600);
*Min=(total_seconds/60);
*Sec=(total_seconds%60);
}



	/* compute the time in seconds, since Jan 1 2000 12:00am */
	/* the month and day are 1-indexed */
int TimeStamp(unsigned char RTCYEARL,		/* 0...256 */
			  unsigned char	RTCMON,			/* 1...12 */
			  unsigned char	RTCDAY,			/* 1...31 */
			  unsigned char	RTCHOUR,		/* 0...23 */
			  unsigned char	RTCMIN,			/* 0...59 */
			  unsigned char	RTCSEC)			/* 0...59 */
{
const int   days[12]={31,28,31,30,31,30,31,31,30,31,30,31};
int         j;
int			elapsed;

elapsed=0;
for (j=0; j<RTCYEARL; j++)
  elapsed+=31536000;
j=(RTCYEARL+3)/4;
elapsed+=(j*86400);
for (j=1; j<RTCMON; j++)
  elapsed+=(days[j-1]*86400);
if (RTCYEARL%4 == 0  &&  RTCMON > 2)
  elapsed+=86400;
elapsed+=((RTCDAY-1)*86400);
elapsed+=(RTCHOUR*3600);
elapsed+=(RTCMIN*60);
elapsed+=RTCSEC;
return(elapsed);
}
















