/// \file ROOT/RNTupleJoinTable.hxx
/// \ingroup NTuple
/// \author Florine de Geus <florine.de.geus@cern.ch>
/// \date 2024-04-02
/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
/// is welcome!

/*************************************************************************
 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers.               *
 * All rights reserved.                                                  *
 *                                                                       *
 * For the licensing terms see $ROOTSYS/LICENSE.                         *
 * For the list of contributors see $ROOTSYS/README/CREDITS.             *
 *************************************************************************/

#ifndef ROOT_RNTupleJoinTable
#define ROOT_RNTupleJoinTable

#include <ROOT/RField.hxx>

#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

namespace ROOT {
namespace Experimental {
namespace Internal {
// clang-format off
/**
\class ROOT::Experimental::Internal::RNTupleJoinTable
\ingroup NTuple
\brief Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
*/
// clang-format on
class RNTupleJoinTable {
public:
   using JoinValue_t = std::uint64_t;
   using PartitionKey_t = std::uint64_t;
   static constexpr PartitionKey_t kDefaultPartitionKey = PartitionKey_t(-1);

private:
   // clang-format off
   /**
   \class ROOT::Experimental::Internal::RNTupleJoinTable::REntryMapping
   \ingroup NTuple
   \brief Provides a mapping from one or several join field values to an entry index.
   */
   // clang-format on
   class REntryMapping {
   private:
      //////////////////////////////////////////////////////////////////////////
      /// Container for the combined hashes of join field values.
      struct RCombinedJoinFieldValue {
         std::vector<JoinValue_t> fJoinFieldValues;

         RCombinedJoinFieldValue(const std::vector<JoinValue_t> &joinFieldValues) : fJoinFieldValues(joinFieldValues) {}

         inline bool operator==(const RCombinedJoinFieldValue &other) const
         {
            return other.fJoinFieldValues == fJoinFieldValues;
         }
      };

      /////////////////////////////////////////////////////////////////////////////
      /// Hash combining the individual join field value hashes from RCombinedJoinFieldValue. Uses the implementation
      /// from `boost::hash_combine`. See
      /// https://www.boost.org/doc/libs/1_87_0/libs/container_hash/doc/html/hash.html#notes_hash_combine for more
      /// background. In particular, it mentions: "Several improvements of the 64 bit function have been subsequently
      /// proposed, by [David Stafford](https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html), [Pelle
      /// Evensen](https://mostlymangling.blogspot.com/2019/12/stronger-better-morer-moremur-better.html), and [Jon
      /// Maiga](http://jonkagstrom.com/mx3/mx3_rev2.html). We currently use Jon Maiga’s function."
      ///
      /// \note
      /// \parblock
      /// Copyright 2005-2014 Daniel James.
      /// Copyright 2021, 2022 Peter Dimov.
      /// Distributed under the Boost Software License, Version 1.0.
      /// https://www.boost.org/LICENSE_1_0.txt
      ///
      /// Based on Peter Dimov's proposal
      /// http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf
      /// issue 6.18.
      ///
      /// Boost Software License - Version 1.0 - August 17th, 2003
      ///
      /// Permission is hereby granted, free of charge, to any person or organization
      /// obtaining a copy of the software and accompanying documentation covered by
      /// this license (the "Software") to use, reproduce, display, distribute,
      /// execute, and transmit the Software, and to prepare derivative works of the
      /// Software, and to permit third-parties to whom the Software is furnished to
      /// do so, all subject to the following:
      ///
      /// The copyright notices in the Software and this entire statement, including
      /// the above license grant, this restriction and the following disclaimer,
      /// must be included in all copies of the Software, in whole or in part, and
      /// all derivative works of the Software, unless such copies or derivative
      /// works are solely in the form of machine-executable object code generated by
      /// a source language processor.
      ///
      /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      /// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
      /// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
      /// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
      /// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      /// DEALINGS IN THE SOFTWARE.
      /// \endparblock
      struct RCombinedJoinFieldValueHash {
         inline std::size_t operator()(const RCombinedJoinFieldValue &joinFieldVal) const
         {
            std::size_t seed = 0;
            for (const auto &fieldVal : joinFieldVal.fJoinFieldValues) {
               seed ^= mix(seed + 0x9e3779b9 + fieldVal);
            }
            return seed;
         }

         inline std::size_t mix(std::size_t init) const
         {
            init ^= init >> 32;
            init *= 0xe9846af9b1a615d;
            init ^= init >> 32;
            init *= 0xe9846af9b1a615d;
            init ^= init >> 28;
            return init;
         }
      };

      /// The mapping itself. Maps field values (or combinations thereof in case the join key is composed of multiple
      /// fields) to their respective entry numbers.
      std::unordered_map<RCombinedJoinFieldValue, std::vector<ROOT::NTupleSize_t>, RCombinedJoinFieldValueHash>
         fMapping;

      /// Names of the join fields used for the mapping to their respective entry indexes.
      std::vector<std::string> fJoinFieldNames;

      /// The size (in bytes) for each join field, corresponding to `fJoinFieldNames`. This information is stored to be
      /// able to properly cast incoming void pointers to the join field values in `GetEntryIndexes`.
      std::vector<std::size_t> fJoinFieldValueSizes;

   public:
      //////////////////////////////////////////////////////////////////////////
      /// \brief Get the entry indexes for this entry mapping.
      const std::vector<ROOT::NTupleSize_t> *GetEntryIndexes(std::vector<void *> valuePtrs) const;

      //////////////////////////////////////////////////////////////////////////
      /// \brief Create a new entry mapping.
      ///
      /// \param[in] pageSource The page source of the RNTuple with the entries to map.
      /// \param[in] joinFieldNames Names of the join fields to use in the mapping.
      REntryMapping(ROOT::Internal::RPageSource &pageSource, const std::vector<std::string> &joinFieldNames);
   };
   /// Names of the join fields used for the mapping to their respective entry indexes.
   std::vector<std::string> fJoinFieldNames;

   /// Partitions of one or multiple entry mappings.
   std::unordered_map<PartitionKey_t, std::vector<std::unique_ptr<REntryMapping>>> fPartitions;

   /////////////////////////////////////////////////////////////////////////////
   /// \brief Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source.
   ///
   /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are
   /// allowed.
   RNTupleJoinTable(const std::vector<std::string> &joinFieldNames) : fJoinFieldNames(joinFieldNames) {}

public:
   RNTupleJoinTable(const RNTupleJoinTable &other) = delete;
   RNTupleJoinTable &operator=(const RNTupleJoinTable &other) = delete;
   RNTupleJoinTable(RNTupleJoinTable &&other) = delete;
   RNTupleJoinTable &operator=(RNTupleJoinTable &&other) = delete;
   ~RNTupleJoinTable() = default;

   /////////////////////////////////////////////////////////////////////////////
   /// \brief Create an RNTupleJoinTable from an existing RNTuple.
   ///
   /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are
   /// allowed.
   ///
   /// \return A pointer to the newly-created join table.
   static std::unique_ptr<RNTupleJoinTable> Create(const std::vector<std::string> &joinFieldNames);

   /////////////////////////////////////////////////////////////////////////////
   /// \brief Add an entry mapping to the join table.
   ///
   ///
   /// \param[in] pageSource The page source of the RNTuple with the entries to map.
   /// \param[in] partitionKey Which partition to add the mapping to. If not provided, it will be added to the default
   /// partition.
   ///
   /// \return A reference to the updated join table.
   RNTupleJoinTable &Add(ROOT::Internal::RPageSource &pageSource, PartitionKey_t partitionKey = kDefaultPartitionKey);

   /////////////////////////////////////////////////////////////////////////////
   /// \brief Get all entry indexes for the given join field value(s) within a partition.
   ///
   /// \param[in] valuePtrs A vector of pointers to the join field values to look up.
   /// \param[in] partitionKey The partition key to use for the lookup. If not provided, it will use the default
   /// partition key.
   ///
   /// \return The entry numbers that correspond to `valuePtrs`. When there are no corresponding entries, an empty
   /// vector is returned.
   std::vector<ROOT::NTupleSize_t>
   GetEntryIndexes(const std::vector<void *> &valuePtrs, PartitionKey_t partitionKey = kDefaultPartitionKey) const;

   /////////////////////////////////////////////////////////////////////////////
   /// \brief Get all entry indexes for the given join field value(s) for a specific set of partitions.
   ///
   /// \param[in] valuePtrs A vector of pointers to the join field values to look up.
   /// \param[in] partitionKeys The partition keys to use for the lookup.
   ///
   /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding
   /// entries, an empty map is returned.
   std::unordered_map<PartitionKey_t, std::vector<ROOT::NTupleSize_t>>
   GetPartitionedEntryIndexes(const std::vector<void *> &valuePtrs,
                              const std::vector<PartitionKey_t> &partitionKeys) const;

   /////////////////////////////////////////////////////////////////////////////
   /// \brief Get all entry indexes for the given join field value(s) for all partitions.
   ///
   /// \param[in] valuePtrs A vector of pointers to the join field values to look up.
   ///
   /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding
   /// entries, an empty map is returned.
   std::unordered_map<PartitionKey_t, std::vector<ROOT::NTupleSize_t>>
   GetPartitionedEntryIndexes(const std::vector<void *> &valuePtrs) const;
};
} // namespace Internal
} // namespace Experimental
} // namespace ROOT

#endif // ROOT_RNTupleJoinTable
