// Copyright 2019 Bloomberg Finance L.P
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef INCLUDED_BUILDBOXCOMMON_MERKLIZE
#define INCLUDED_BUILDBOXCOMMON_MERKLIZE

#include <buildboxcommon_digestgenerator.h>
#include <buildboxcommon_futuregroup.h>
#include <buildboxcommon_protos.h>
#include <buildboxcommon_timeutils.h>

#include <cstddef>
#include <filesystem>
#include <future>
#include <google/protobuf/message.h>
#include <google/protobuf/util/message_differencer.h>
#include <memory>
#include <string>
#include <ThreadPool.h>
#include <unordered_map>
#include <vector>

namespace buildboxcommon {

class CASClient;

typedef std::unordered_map<buildboxcommon::Digest, std::string>
    digest_string_map;

typedef std::unordered_map<buildboxcommon::Digest, std::string>::iterator
    digest_string_map_it;

typedef std::function<Digest(int fd)> FileDigestFunction;

typedef std::function<mode_t(mode_t mode)> UnixModeUpdater;

// Create a UnixModeUpdater that applies a given mask
UnixModeUpdater unixModeMaskUpdater(mode_t mask);

/**
 * Represents a single file.
 */
struct File {
    Digest d_digest;
    bool d_executable = false;
    NodeProperties d_nodeProperties;

    File() {};

    File(Digest digest, bool executable, const NodeProperties &nodeProperties)
        : d_digest(digest), d_executable(executable),
          d_nodeProperties(nodeProperties) {};

    /**
     * Constructs a File given the path to a file on disk.
     */
    File(const char *path,
         const std::vector<std::string> &capture_properties = {},
         const UnixModeUpdater &unixModeUpdater = {},
         const std::map<std::string, std::string> &nodeProperties = {},
         bool allowChmodToRead = false);
    File(const char *path, const FileDigestFunction &fileDigestFunc,
         const std::vector<std::string> &capture_properties = {},
         const UnixModeUpdater &unixModeUpdater = {},
         const std::map<std::string, std::string> &nodeProperties = {},
         bool allowChmodToRead = false);
    File(int dirfd, const char *path, const FileDigestFunction &fileDigestFunc,
         const std::vector<std::string> &capture_properties = {},
         const UnixModeUpdater &unixModeUpdater = {},
         const std::map<std::string, std::string> &nodeProperties = {},
         bool allowChmodToRead = false);
    File(int fd, const std::vector<std::string> &capture_properties = {},
         const UnixModeUpdater &unixModeUpdater = {},
         const std::map<std::string, std::string> &nodeProperties = {},
         bool allowChmodToRead = false,
         const std::optional<mode_t> &modeOverride = std::nullopt);
    File(int fd, const FileDigestFunction &fileDigestFunc,
         const std::vector<std::string> &capture_properties = {},
         const UnixModeUpdater &unixModeUpdater = {},
         const std::map<std::string, std::string> &nodeProperties = {},
         bool allowChmodToRead = false,
         const std::optional<mode_t> &modeOverride = std::nullopt);

    bool operator==(const File &other) const
    {
        return d_digest == other.d_digest &&
               d_executable == other.d_executable &&
               google::protobuf::util::MessageDifferencer::Equals(
                   d_nodeProperties, other.d_nodeProperties);
    }

    /**
     * Converts a File to a FileNode with the given name.
     */
    FileNode to_filenode(const std::string &name) const;

  private:
    void init(int fd, const FileDigestFunction &fileDigestFunc,
              const std::vector<std::string> &capture_properties = {},
              const UnixModeUpdater &unixModeUpdater = {},
              const std::map<std::string, std::string> &nodeProperties = {},
              const std::optional<mode_t> &modeOverride = std::nullopt);
};

/**
 * Represent an ignore pattern with the same format as fnmatch
 */
struct IgnorePattern {
    IgnorePattern(const std::string &pattern, const bool matchBasenameOnly,
                  const bool matchDirectoryOnly)
        : d_pattern(pattern), d_matchBasenameOnly(matchBasenameOnly),
          d_matchDirectoryOnly(matchDirectoryOnly)
    {
    }

    bool operator==(const IgnorePattern &other) const;

    /**
     * Parse an IgnorePattern form a string.
     *
     * Beginning slashes will be trimmed so patterns are relative to
     * a path prefix.
     */
    static IgnorePattern fromString(const std::string &s);

    std::string d_pattern;

    // If this flag is true, the matching only consider the basename
    // while ignoring the directory path.
    // For example, `*.o` match with `dir1/dir2/a.o`
    bool d_matchBasenameOnly;

    // Only match directory
    bool d_matchDirectoryOnly;
};

std::ostream &operator<<(std::ostream &o, const IgnorePattern &ignorePattern);

/**
 * Represent a collection of IgnorePatterns and the
 * path prefix the patterns should work within
 */
class IgnoreMatcher {
  public:
    /**
     * Parse a vector of ignore patterns.
     *
     * The result is a shared pointer because patterns might be shared
     * between multiple matchers.
     */
    static std::shared_ptr<std::vector<IgnorePattern>>
    parseIgnorePatterns(std::istream &is);
    static std::shared_ptr<std::vector<IgnorePattern>>
    parseIgnorePatterns(const std::vector<std::string> &patterns);

    /**
     * Construct `IgnoreMatcher` from a path prefix which can be empty,
     * and a vector of fnmatch patterns relative to it
     */
    IgnoreMatcher(
        const std::string &pathPrefix,
        const std::shared_ptr<std::vector<IgnorePattern>> &ignorePatterns);

    bool operator==(const IgnoreMatcher &other) const;

    /**
     * Given a  a path name,
     * return true if the path matches any of the patterns.
     * See more details in `man 3 fnmatch`.
     */
    bool match(const std::string &path, const int fnmatchFlags = 0,
               const bool isDirectory = false) const;

  private:
    /**
     * Remove `d_pathPrefix` and slashes from the beginning of `path`.
     * If  `d_pathPrefix` is empty or it is not the prefix of `path`,
     * the original `path` is returned.
     */
    std::string trimPrefix(const std::string &path) const;

    std::string d_pathPrefix;
    std::shared_ptr<std::vector<IgnorePattern>> d_ignorePatterns;
};

/**
 * Represents a directory that, optionally, has other directories inside.
 */
struct NestedDirectory {
  public:
    // Important to use a sorted map to keep subdirectories ordered by name
    typedef std::map<std::string, NestedDirectory> subdir_map;
    typedef std::map<std::string, NestedDirectory>::iterator subdir_map_it;
    std::unique_ptr<subdir_map> d_subdirs;
    // Important to use a sorted map to keep files ordered by name
    std::map<std::string, File> d_files;
    std::map<std::string, std::string> d_symlinks;
    // Path mappings of files that haven't been hashed yet
    std::unordered_map<std::string, std::filesystem::path> d_filePaths;
    // Files that are currently being hashed
    std::unordered_map<std::string, std::shared_future<File>> d_fileFutures;
    NodeProperties d_nodeProperties;

    NestedDirectory() : d_subdirs(new subdir_map) {};

    /**
     * Add the given File to this NestedDirectory at the given relative path,
     * which may include subdirectories.
     *
     *`tryAddFile()` returns `false` if the path already exists. `add()` throws
     * an exception if the path already exists.
     */
    bool tryAddFile(const File &file, const char *relativePath);
    void add(const File &file, const char *relativePath);

    /**
     * Add the given file to this NestedDirectory at the given relative path,
     * which may include subdirectories. This does not yet hash the file at
     * the specified path. `hashFiles()` must be called to hash the contents
     # of all files added with these methods.
     *
     *`tryAddFilePath()` returns `false` if the path already exists.
     * `addFilePath()` throws an exception if the path already exists.
     */
    bool tryAddFilePath(const std::filesystem::path &path,
                        const char *relativePath);
    void addFilePath(const std::filesystem::path &path,
                     const char *relativePath);

    /**
     * Add the given symlink to this NestedDirectory at the given relative
     * path, which may include subdirectories.
     *
     * `tryAddSymlink()` returns `false` if the path already exists.
     * `addSymlink()` throws an exception if the path already exists.
     */
    bool tryAddSymlink(const std::string &target, const char *relativePath);
    void addSymlink(const std::string &target, const char *relativePath);

    /**
     * Add the given Directory to this NestedDirectory at a given relative
     * path. If the directory has contents, the add method should be used
     * instead.
     *
     * `tryAddDirectory()` returns a pointer to the new or already existing
     * `NestedDirectory` or `nullptr` if the path already exists but is not a
     * directory. `addDirectory()` throws an exception if the path already
     * exists but is not a directory.
     */
    NestedDirectory *tryAddDirectory(const char *directory);
    void addDirectory(const char *directory);

    /**
     * Convert this NestedDirectory to a Directory message and return its
     * Digest.
     *
     * If a digestMap is passed, serialized Directory messages corresponding to
     * this directory and its subdirectories will be stored in it using their
     * Digest messages as the keys. (This is recursive -- nested subdirectories
     * will also be stored.
     */
    Digest to_digest(digest_string_map *digestMap = nullptr) const;

    /**
     * Add a key-value node property
     */
    void addNodeProperty(const std::string &name, const std::string &value);
    /**
     * Add a key-value node property pairs
     */
    void addNodeProperties(
        const std::map<std::string, std::string> &nodeProperties);

    /**
     * Hash the contents of all files added with `addFilePath()` using the
     * specified thread pool.
     */
    void hashFiles(ThreadPool *threadPool,
                   digest_string_map *digestToFilepaths);

    void hashFiles(CASClient *casClient, digest_string_map *digestToFilepaths);

    void captureFiles(CASClient *casClient,
                      digest_string_map *digestToFilepaths);

    template <typename Response>
    void processResponse(
        std::unordered_map<std::string, const Response *> *pathResponseMap);

    /**
     * Convert this NestedDirectory to a Tree message.
     */
    Tree to_tree() const;

    void print(std::ostream &out, const std::string &dirName = "") const;

  private:
    bool getSubdirAndNameForAdd(const char *relativePath,
                                NestedDirectory **subdir, std::string *name);

    void createFileFutures(FutureGroup<File> *futureGroup);
    void waitForFileFutures(digest_string_map *digestToFilepaths);

    void collectFilePaths(std::vector<std::string> *paths);
    void processCapturedFiles(
        std::unordered_map<std::string, const CaptureFilesResponse_Response *>
            *pathResponseMap);
};

std::ostream &operator<<(std::ostream &out, const NestedDirectory &obj);

struct MerklizeResult {
    Digest d_rootDigest;
    // Using `std::map` to keep the order of the directories deterministic when
    // building tree
    std::map<Digest, Directory> d_digestToDirectory;
    digest_string_map d_digestToDirectoryBlob;
    digest_string_map d_digestToPath;

    Tree tree() const;
};

/*
 * Class the merklize a directory tree.
 */
class Merklizer {
  public:
    Merklizer(bool followSymlinks,
              const std::vector<std::string> &captureProperties = {},
              const std::shared_ptr<IgnoreMatcher> &ignoreMatcher = nullptr,
              ThreadPool *threadPool = nullptr);
    Merklizer() = delete;
    Merklizer(const Merklizer &) = delete;
    Merklizer &operator=(const Merklizer &) = delete;
    Merklizer(Merklizer &&) = delete;
    Merklizer &operator=(Merklizer &&) = delete;
    ~Merklizer() = default;

    /*
     * Create a merkle tree from `rootDirFd`.
     *
     * `fileDigestFunc`: function compute the digest of files.
     *
     * `pathPrefix`: a prefix to prepend to all filepaths in the output.
     *
     * `unixModeUpdater`: a function that can be used to mask the permission
     * bits of files and directories
     *
     * `rootNodeProperties`: key-value pairs that will be added to
     * the root node properties.
     */
    MerklizeResult
    merklize(int rootDirFd, const std::string &pathPrefix = "",
             const FileDigestFunction &fileDigestFunc = hashFile,
             const UnixModeUpdater &unixModeUpdater = {},
             const std::map<std::string, std::string> &rootNodeProperties = {},
             bool allowChmodToRead = false) const;
    /*
     * Create a `NestedDirectory` from `rootDirFd`, which supports dynamic
     * editing. The implementation that how nodes are traversed should be in
     * sync with `merklize`.
     *
     * `fileDigestFunc`: function compute the digest of files.
     *
     * `pathPrefix`: a prefix to prepend to all filepaths in the output.
     *
     * `unixModeUpdater`: a function that can be used to mask the permission
     * bits of files and directories
     *
     * `rootNodeProperties`: key-value pairs that will be added to
     * the root node properties.
     */
    std::pair<NestedDirectory, digest_string_map /*digest to filepath*/>
    makeNestedDirectory(
        int rootDirFd, const std::string &pathPrefix = "",
        const FileDigestFunction &fileDigestFunc = hashFile,
        const UnixModeUpdater &unixModeUpdater = {},
        const std::map<std::string, std::string> &rootNodeProperties = {},
        bool allowChmodToRead = false) const;

    // Default file digest function
    static inline Digest hashFile(int fd) { return DigestGenerator::hash(fd); }

    // Remap a file to a target path
    static MerklizeResult remapFile(const File &file,
                                    const std::filesystem::path &targetPath);
    // Remap a symlink to a target path
    static MerklizeResult
    remapSymlink(const std::string &linkTarget,
                 const std::filesystem::path &targetPath);
    // Remap a directory represented as `MerklizeResult` to a target path
    static MerklizeResult
    remapDirectory(MerklizeResult &&srcResult,
                   const std::filesystem::path &targetPath);

  private:
    struct TopoSortedTreePaths {
        std::vector<std::filesystem::path> d_files;
        std::vector<std::filesystem::path> d_symlinks;
        std::vector<std::filesystem::path> d_directories;
    };

    bool d_followSymlinks;
    std::vector<std::string> d_captureProperties;
    std::shared_ptr<IgnoreMatcher> d_ignoreMatcher;
    ThreadPool *d_threadPool;

    TopoSortedTreePaths topologicalSort(int rootDirFd) const;
    std::pair<
        std::unordered_map<std::filesystem::path, std::shared_future<File>>,
        std::unordered_map<std::filesystem::path, std::string>>
    visitFilesAndSymlinks(int rootDirFd,
                          const TopoSortedTreePaths &sortedPaths,
                          const FileDigestFunction &fileDigestFunc,
                          const UnixModeUpdater &unixModeUpdater,
                          FutureGroup<File> *futureGroup,
                          bool allowChmodToRead = false) const;
    int getStatFlags() const;
    int getOpenFlags() const;
    static std::filesystem::path
    createEntryPath(const std::filesystem::path &parent,
                    const std::filesystem::path &entry);
};

} // namespace buildboxcommon

#endif
