{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Adam Patyk\n",
    "# Clemson University\n",
    "# MS Thesis: Daily Pattern Classifier\n",
    "# Summer 2021\n",
    "\n",
    "# GenerateSamples.ipynb\n",
    "# Purpose: Generates daily samples for data augmentation\n",
    "\n",
    "import sys\n",
    "import os\n",
    "import tensorflow as tf # updated for TensorFlow 2.2.0\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import random\n",
    "import math\n",
    "from datetime import datetime\n",
    "from tqdm import tqdm\n",
    "\n",
    "sys.path.append('../') # for .py files in ../common/\n",
    "import common.loadfile as loadfile\n",
    "import common.training as training\n",
    "import common.testing as testing\n",
    "\n",
    "# prepare TensorFlow for GPU usage\n",
    "gpus = tf.config.experimental.list_physical_devices('GPU')\n",
    "for gpu in gpus:\n",
    "    tf.config.experimental.set_memory_growth(gpu, True)\n",
    "logical_gpus = tf.config.experimental.list_logical_devices('GPU')\n",
    "print(len(gpus), \"Physical GPU,\", len(logical_gpus), \"Logical GPUs\")\n",
    "\n",
    "epochs = 30\n",
    "win_min = 6\n",
    "train_stride_sec = 15\n",
    "test_stride_sec = 100\n",
    "\n",
    "win_len = int(win_min * 60 * 15)\n",
    "train_step = int(train_stride_sec * 15)\n",
    "test_step = int(test_stride_sec * 15)\n",
    "start_time = datetime.now()\n",
    "\n",
    "save_dir = 'samples/'\n",
    "os.makedirs(save_dir, exist_ok=True)\n",
    "\n",
    "random.seed(0)\n",
    "np.random.seed(0)\n",
    "\n",
    "arr = [\"echo -n 'PBS: node is '; cat $PBS_NODEFILE\",\\\n",
    "      \"echo PBS: job identifier is $PBS_JOBID\",\\\n",
    "      \"echo PBS: job name is $PBS_JOBNAME\"]\n",
    "\n",
    "[os.system(cmd) for cmd in arr]\n",
    "\n",
    "print(\"*****************************************************************\", flush=True)\n",
    "print(\"Execution Started at \" + start_time.strftime(\"%m/%d/%Y, %H:%M:%S\"), flush=True)\n",
    "print(\"Window Length: {:.2f} min ({:d} data)\\tTraining Slide: {:d} sec ({:d} data)\\tTesting Slide: {:d} sec ({:d} data)\\tEpochs: {:d}\".format(win_min, win_len, train_stride_sec, train_step, test_stride_sec, test_step, epochs), flush=True)\n",
    "\n",
    "# load the dataset for training wiht majority vote GT labeling for windows \n",
    "num_files, all_training_data, training_samples_array, training_labels_array = loadfile.loadAllData3(win_len,\n",
    "                                                                                                    train_step,\n",
    "                                                                                                    removerest=0,\n",
    "                                                                                                    removewalk=0,\n",
    "                                                                                                    removebias=1)\n",
    "\n",
    "# load the dataset for testing with a different stride and GT labeling (center point)\n",
    "all_testing_data, testing_samples_array, testing_labels_array = loadfile.loadAllDataTesting('../common/batch-unix.txt', \n",
    "                                                                                              win_len, \n",
    "                                                                                              test_step, \n",
    "                                                                                              removebias=1)\n",
    "\n",
    "print(\"Data loaded.\", flush=True)\n",
    "\n",
    "# normalize the dataset\n",
    "shimmer_global_mean = [-0.012359981,-0.0051663737,0.011612018,\n",
    "                        0.05796114,0.1477952,-0.034395125 ]\n",
    "\n",
    "shimmer_global_stddev = [0.05756385,0.040893298,0.043825723, \n",
    "                        17.199743,15.311142,21.229317 ]\n",
    "\n",
    "shimmer_trended_mean = [-0.000002,-0.000002,-0.000000,\n",
    "                0.058144,0.147621,-0.033260 ]\n",
    "\n",
    "shimmer_trended_stddev = [0.037592,0.034135,0.032263,\n",
    "                17.209038,15.321441,21.242532 ]\n",
    "\n",
    "all_zero_means = [0,0,0,0,0,0]\n",
    "\n",
    "mean_vals = all_zero_means\n",
    "std_vals = shimmer_trended_stddev\n",
    "\n",
    "all_training_normalized = loadfile.globalZscoreNormalize(all_training_data, mean_vals, std_vals)\n",
    "all_testing_normalized = loadfile.globalZscoreNormalize(all_testing_data, mean_vals, std_vals)\n",
    "del all_training_data\n",
    "del all_testing_data\n",
    "\n",
    "print(\"Data normalized.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# generate training samples from trained model\n",
    "num_samples = 200000\n",
    "subjects = [*range(num_files)]\n",
    "num_subjects = len(subjects)\n",
    "num_iterations = math.ceil(num_samples / num_subjects)\n",
    "\n",
    "print(f'Generating training samples ({num_subjects} subjects)', flush=True)\n",
    "\n",
    "for i in tqdm(range(num_iterations)):\n",
    "    start_time = datetime.now()\n",
    "    \n",
    "    # train model on all training data\n",
    "    H, model = training.trainModel(training_samples_array, training_labels_array, all_training_normalized, win_len, epochs, save_dir + f'tmp_{win_min}min.h5')\n",
    "    \n",
    "    # output P(E) and GT to text file for each recording using the trained model\n",
    "    for s in subjects:\n",
    "        subject_bool = np.isin(testing_samples_array[:,0], s)\n",
    "        s_samples = testing_samples_array[subject_bool]\n",
    "        s_labels = testing_labels_array[subject_bool]\n",
    "        raw_samples, gt_labels = testing.get_raw_data(s_samples, s_labels, all_testing_normalized)\n",
    "        if raw_samples.size != 0:\n",
    "            probs = model.predict(raw_samples, batch_size=1024)\n",
    "            result = np.hstack((np.reshape(gt_labels,(1,-1)).T, probs))\n",
    "            np.savetxt(save_dir + f'W{win_min}_P{s:03.0f}_I{i:03.0f}.txt', result)\n",
    "    \n",
    "    tf.keras.backend.clear_session()\n",
    "    del model\n",
    "    \n",
    "    end_time = datetime.now()\n",
    "    print(f'Iteration Duration: {end_time - start_time}', flush=True)\n",
    "\n",
    "print(f'{num_iterations * num_subjects} testing samples saved.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "TensorFlow GPU 2.2.0",
   "language": "python",
   "name": "tf_gpu_env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
