#include "filelist.h"

#include<stdio.h>
#include<stdlib.h>
#include<unistd.h>
#include<errno.h>
#include<string.h>
#include<sys/types.h>
#include<dirent.h>
#include<fcntl.h>
#include<sys/stat.h>
#include<sys/param.h>

/*******************************************************************************
 * getFileSize takes the path to a regular file and measures its size by reading
 * the content of the file. This should be only used when STAT(2) returns a file
 * size of 0.
 *
 * @param path Path to the file that should be measured
 * @return Size of the measured file in bytes
 ******************************************************************************/
size_t getFileSize(const char *path) {
  int fd = open(path, O_RDONLY);
  if(fd == -1) {
    dprintf(2, "Unable to open %s. Error: %s\n", path, strerror(errno));
    errno = 0;
    return 0;
  }

  size_t fileSize = 0;
  ssize_t readBytes = 0;

  size_t bufSize = 4096;
  char *buf = malloc(sizeof(char) * (bufSize + 1));

  while((readBytes = read(fd, buf, bufSize)) > 0) {
    fileSize += readBytes;
  }

  if(readBytes != 0) {
    dprintf(2, "Unable to read %s. Error: %s\n", path, strerror(errno));
    errno = 0;
    fileSize = 0;
  }

  if(close(fd) != 0) {
    dprintf(2, "Unable to close %s. Error: %s\n", path, strerror(errno));
    errno = 0;
  }

  return fileSize;
}

/*******************************************************************************
 * getFileListHandleFile takes the path to a file and adds it to the list of
 * file paths. In order to avoid problems when the submitted path is on the
 * stack and therefore its runtime may expire before the runtime of the list
 * expires, the path is copied to the heap.
 *
 * @param path Path of the file that should be added to the list
 * @param currentFileList Pointer to the array that contains the file paths
 * @param nFiles Pointer to the variable storing the number of elements in
 *        the currentFileList array
 * @return 0 is returned on success, -1 on error.
 ******************************************************************************/
int getFileListHandleFile(const char *path, char ***currentFileList, size_t *nFiles) {
  char *pathStr = malloc(sizeof(char) * (strlen(path) + 1));
  strcpy(pathStr, path);

  *nFiles = *nFiles + 1;
  *currentFileList = realloc(*currentFileList, *nFiles * sizeof(char **));
  (*currentFileList)[*nFiles - 1] = pathStr;

  return 0;
}

/*******************************************************************************
 * getFileListRecursive is a helper function that iterates over a directory and
 * calls itself for all directories found within that directory. If it finds a
 * regular file, its size is measured and the path is added to the submitted
 * list of file paths.
 *
 * @param path Path of the directory which content should be handled
 * @param currentFileList Pointer to an array with file paths to which the newly
 *        found paths should be appended
 * @param nFiles Pointer to a variable that stores the number of files in the
 *        currentFileList array
 * @param nBytes Pointer to a variable that stores the number of bytes which
 *        is used by all files in sum.
 * @param measureManually If set to 0, the size of files in virtual file systems
 *        is used (which is 0 for regular files). Otherwise, the size of those
 *        files is measured manually.
 * @return 0 is returned on success, -1 on error.
 ******************************************************************************/
int getFileListRecursive(const char *path, char ***currentFileList, size_t *nFiles, size_t *nBytes, int measureManually) {
  char currentPath[MAXPATHLEN];

  DIR *dir = opendir(path);
  if(dir == NULL) {
    dprintf(2, "Unable to open directory %s. Error: %s\n", path, strerror(errno));
    errno = 0;
    return -1;
  }

  struct dirent *dirEntry = NULL;
  struct stat dirEntryStat;

  while((dirEntry = readdir(dir)) != NULL) {
    if (strcmp(dirEntry->d_name, ".") == 0 || strcmp(dirEntry->d_name, "..") == 0) {
      continue;
    }

    snprintf(currentPath, MAXPATHLEN, "%s/%s", path, dirEntry->d_name);

    if(stat(currentPath, &dirEntryStat) != 0) {
      dprintf(2, "Unable to stat %s. Error: %s\n", currentPath, strerror(errno));
      errno = 0;
      continue;
    }

    if((dirEntryStat.st_mode & S_IFMT) == S_IFDIR) {
      if(getFileListRecursive(currentPath, currentFileList, nFiles, nBytes, measureManually) == -1) {
        return -1;
      }
    } else if((dirEntryStat.st_mode & S_IFMT) == S_IFREG) {
      if(dirEntryStat.st_size != 0) {
        *nBytes = *nBytes + dirEntryStat.st_size;
      } else if(measureManually) {
        *nBytes = *nBytes + getFileSize(currentPath);
      }
      getFileListHandleFile(currentPath, currentFileList, nFiles);
    }
  }

  int retVal = 0;

  if(errno != 0) {
    dprintf(2, "Unable to read content of dir %s. Error: %s\n", path, strerror(errno));
    errno = 0;
    retVal = -1;
  }

  if(closedir(dir) != 0) {
    dprintf(2, "Unable to close dir %s. Error: %s\n", path, strerror(errno));
    errno = 0;
    retVal = -1;
  }

  return retVal;
}

/*******************************************************************************
 * getFileList performs a directory walk and stores the paths of all found
 * retular files in an array. Additionally, the size of the files is measured.
 * If possible, this is done using STAT(2). Because that is not possible for
 * memory based file systems (e.g. in /proc, the size of regular files is always
 * 0), the size is measured in that case by reading the file, if measureManually
 * is not set to 0.
 *
 * @param baseDir Path to the directory to start the recursive directory walk
 *        from
 * @param nFiles Pointer to store the number of file paths in the array
 * @param nBytes Pointer to store the sum of file sizes of all files in bytes
 * @param measureManually If set to 0, the size of files in memory based file
 *        systems is not measured manually but taken as 0. Otherwise, the size
 *        is measured manually by reading the file.
 * @return Array of paths to regular files under the specified baseDir
 ******************************************************************************/
char **getFileList(const char *baseDir, size_t *nFiles, size_t *nBytes, int measureManually) {
  char **fileList = NULL;
  *nFiles = 0;
  *nBytes = 0;
  if(getFileListRecursive(baseDir, &fileList, nFiles, nBytes, measureManually) == -1) {
    dprintf(2, "Unable to fetch file list. See error messages above for more information.\n");
    errno = 0;
    exit(EXIT_FAILURE);
  }

  return fileList;
}

/*******************************************************************************
 * freeFileList takes a pointer to a file list that was generated by getFileList
 * and frees it properly.
 *
 * @param currentFileList Pointer to the fileList returned by getFileList
 * @param nFiles Pointer to the number of elements in the file list
 * @return 0 is returned on success, -1 on error.
 ******************************************************************************/
int freeFileList(char ***currentFileList, size_t *nFiles) {
  for(u_int64_t i = 0; i < *nFiles; i++) {
    free((*currentFileList)[i]);
  }
  free(*currentFileList);

  *currentFileList = NULL;
  *nFiles = 0;

  return 0;
}
