/* BEGIN software license
 *
 * MsXpertSuite - mass spectrometry software suite
 * -----------------------------------------------
 * Copyright(C) 2009,...,2018 Filippo Rusconi
 *
 * http://www.msxpertsuite.org
 *
 * This file is part of the MsXpertSuite project.
 *
 * The MsXpertSuite project is the successor of the massXpert project. This
 * project now includes various independent modules:
 *
 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 * END software license
 */


/////////////////////// Local includes
#include "MsXpS/libXpertMassCore/CleavageMotif.hpp"
#include "MsXpS/libXpertMassCore/Polymer.hpp"
#include "MsXpS/libXpertMassCore/PolChemDef.hpp"

namespace MsXpS
{

namespace libXpertMassCore
{


/*!
\class MsXpS::libXpertMassCore::CleavageMotif
\inmodule libXpertMassCore
\ingroup PolChemDefAqueousChemicalReactions
\inheaderfile CleavageMotif.hpp

\brief The CleavageMotif class provides a model for specifying aqueous cleavage
motfis of \l{Polymer} \l{Sequence}s.

When a polymer sequence cleavage occurs, using, for example, the Trypsin
cleavage agent, that cleavage agent specifies that cleavage should occur at the
following sites "Lys/;Arg/;-Lys/Pro".

The "Lys/;Arg/;-Lys/Pro" string of sites gets parsed and \e{cleavage motifs} are
generated from it. In this specific case, we'll have three \e{CleavageMotif}
instances with the following data:

- First motif (or cleavage site):
- "Lys"
- monomer container: [0] = "Lys"
- offset = 1 ('/' indicates that the cut is right of monomer)
- is for cleavage ? = Enums::CleavageAction::CLEAVE

- Second motif (or cleavage site):
- "Arg"
- monomer container: [0] = "Arg"
- offset = 1 ('/' indicates that the cut is right of monomer)
- is for cleavage ? = Enums::CleavageAction::CLEAVE

- Third motif (or non-cleavage site):
- "-LysPro"
- monomer container: [0] = "Lys", [1] = "Pro"
- offset = 1 ('/' indicates that the cut is right of monomer)
- is for cleavage ? = Enums::CleavageAction::NO_CLEAVE

Thanks to this deconstruction (from "Lys/;Arg/;-Lys/Pro" to the 3 cleavage
motifs above) is the polymer sequence cleaved according to the
cleavage agent specification.

\sa CleavageAgent, CleavageRule
*/


/*!
\variable MsXpS::libXpertMassCore::CleavageMotif::mcsp_polChemDef

\brief The \l PolChemDef (polymer chemistry definition) that is needed.
*/

/*!
\variable MsXpS::libXpertMassCore::CleavageMotif::m_monomers

\brief The container of Monomer instances. The Monomer instances
are in the form of const shared pointers to Monomer in the
polymer chemistry definition.

A "Lys/Pro" motif will translate into two Monomer instances (actually pointers)
having codes Lys and Pro, in the right order.
*/

/*!
\variable MsXpS::libXpertMassCore::CleavageMotif::m_offset

\brief The offset between the actual cleavage site and the first monomer of the
motif.

In the The "Lys/", "Arg/" and "-Lys/Pro" examples, the offset would be 1 for
each motif, because each time the cleavage occurs after the first monomer
code: Lys/, or Arg/ or Lys/Pro. In the /Asp cleavage site the offset would be 0,
because the cleavage occurs before the first monomer in the motif (Asp). In the
ATGC/GCAT cleavage motif, the offset would be 4.
*/

/*!
\variable MsXpS::libXpertMassCore::CleavageMotif::m_cleavageAction

\brief Tells if the motif is for cleavage or not for cleavage.

In the "Lys/" and "Arg/" motif, that would be true; for "-Lys/Pro", that would
be false, because Trypsin does not cleave after a Lysyl residue if it is
followed by a Prolyl residue.
*/

/*!
\variable MsXpS::libXpertMassCore::CleavageMotif::m_isValid
\brief Tell the validity status of the CleavageMotif instance.
*/

/*!
\brief Constructs a cleavage motif with a number of parameters.

\list
\li \a pol_chem_def_csp Polymer chemistry definition. Cannot be nullptr.

\li \a motif Motif in the form of "Lys/" or "Lys/Pro" or "/Asp".

\li \a offset Offset position of the cleavage to the first monomer code in
the motif.

\li \a cleavage_action Tells if motif is for cleavage (for example, "Lys/") or
not for cleavage (for example, "-Lys/Pro").
\endlist

After setting the member data, the CleavageMotif instance is validated and
m_isValid is set to the result of this validation.
*/
CleavageMotif::CleavageMotif(PolChemDefCstSPtr pol_chem_def_csp,
                             const QString &motif,
                             int offset,
                             Enums::CleavageAction cleavage_action)
  : mcsp_polChemDef(pol_chem_def_csp),
    m_offset(offset),
    m_cleavageAction(cleavage_action)
{
  if(!parseMotif(motif))
    qCritical()
      << "Upon construction of CleavageMotif, the motif could not be parsed.";

  // Let's see the other members.
  ErrorList error_list;
  m_isValid = validate(&error_list);

  if(!m_isValid)
    qCritical() << "Upon construction of CleavageMotif, the instance failed to "
                   "validate with errors:"
                << Utils::joinErrorList(error_list, ", ");
}

/*!
\brief Constructs a CleavageMotif instance as a copy of \a other.

After setting the member data, the CleavageMotif instance is validated and
m_isValid is set to the result of this validation.
*/
CleavageMotif::CleavageMotif(const CleavageMotif &other)
  : mcsp_polChemDef(other.mcsp_polChemDef),
    m_offset(other.m_offset),
    m_cleavageAction(other.m_cleavageAction)
{
  m_monomers = other.m_monomers;

  // Let's see the other members.
  ErrorList error_list;

  m_isValid = validate(&error_list);

  if(!m_isValid)
    qCritical() << "Upon copy-construction of CleavageMotif, the instance "
                   "failed to validate with errors:"
                << Utils::joinErrorList(error_list, ", ");
}

/*!
\brief Destructs this CleavageMotif instance.
*/
CleavageMotif::~CleavageMotif()
{
  m_monomers.clear();
}

/*!
\brief Sets the PolChemDef member to \a pol_chem_def_csp.

This instance then undergoes validation and m_isValid is set to the result of
it.
*/
void
CleavageMotif::setPolChemDefCstSPtr(PolChemDefCstSPtr pol_chem_def_csp)
{
  mcsp_polChemDef = pol_chem_def_csp;

  ErrorList error_list;

  m_isValid = validate(&error_list);

  if(!m_isValid)
    qCritical() << "Upon setting PolChemDef of CleavageMotif, the instance "
                   "failed to validate with errors:"
                << Utils::joinErrorList(error_list, ", ");
}

/*!
\brief Returns the \l PolChemDef.
*/
PolChemDefCstSPtr
CleavageMotif::getPolChemDefCstSPtr() const
{
  return mcsp_polChemDef;
}

/*!
\brief Parses the \a motif and fills-in the member container of Monomers as a
result of the parsing.

\sa parseMotif()
*/
void
CleavageMotif::setMotif(const QString &motif)
{
  if(!parseMotif(motif))
    {
      qCritical()
        << "Upon setting motif string of CleavageMotif, the motif could "
           "not be parsed.";
      m_isValid = false;
    }

  // Let's see the other members.
  ErrorList error_list;

  m_isValid = validate(&error_list);

  if(!m_isValid)
    qCritical() << "Upon setting motif of CleavageMotif, the instance "
                   "failed to validate with errors:"
                << Utils::joinErrorList(error_list, ", ");
}

/*!
\brief Returns the motif as a concatenation of the codes of the Monomer
instances found in the member container of Monomers.
*/
QString
CleavageMotif::getMotif() const
{
  QString text;

  for(const MonomerSPtr &monomer_csp : m_monomers)
    text += monomer_csp->getCode();

  return text;
}

/*!
\brief Returns a const reference to the container of Monomers.
*/
const std::vector<MonomerSPtr> &
CleavageMotif::getMonomersCstRef() const
{
  return m_monomers;
}

/*!
\brief Returns a reference to the container of Monomers.
*/
std::vector<MonomerSPtr> &
CleavageMotif::getMonomersRef()
{
  return m_monomers;
}

/*!
\brief Sets the \a offset.

The offset is the position of the cleavage inside the motif, with reset to the
first monomer code in that motif. For example, for a cleavage site "Lys/Pro",
the cleavage motif becomes {"Lys", "Pro"} and the offset is 1, while for a site
"/Asp", the cleavage motif becomes {"Asp"} and the offset is 0.

An offset value can thus not be greater than the number of Monomer codes in the
motif.

This instance then undergoes validation and m_isValid is set to the result of
it.
*/
void
CleavageMotif::setOffset(int offset)
{
  m_offset = offset;

  ErrorList error_list;

  m_isValid = validate(&error_list);

  if(!m_isValid)
    qCritical() << "Upon setting offset of CleavageMotif, the instance "
                   "failed to validate with errors:"
                << Utils::joinErrorList(error_list, ", ");
}

/*!
\brief Returns the offset.
*/
int
CleavageMotif::getOffset() const
{
  return m_offset;
}

/*!
\brief Sets the member Enums::CleavageAction to \a cleavage_action.

This instance then undergoes validation and m_isValid is set to the result of
it.
*/
void
CleavageMotif::setCleavageAction(Enums::CleavageAction cleavage_action)
{
  m_cleavageAction = cleavage_action;

  ErrorList error_list;

  m_isValid = validate(&error_list);

  if(!m_isValid)
    qCritical()
      << "Upon setting cleave operation of CleavageMotif, the instance "
         "failed to validate with errors:"
      << Utils::joinErrorList(error_list, ", ");
}

/*!
\brief Returns if motif is for cleavage or not.
*/
Enums::CleavageAction
CleavageMotif::getCleavageAction() const
{
  return m_cleavageAction;
}

/*!
\brief Assigns \a other to this CleavageMotif instance.

This instance then undergoes validation and m_isValid is set to the result of
it.

Returns a reference to this CleavageMotif instance.
*/
CleavageMotif &
CleavageMotif::operator=(const CleavageMotif &other)
{
  if(&other == this)
    return *this;

  mcsp_polChemDef  = other.mcsp_polChemDef;
  m_offset         = other.m_offset;
  m_cleavageAction = other.m_cleavageAction;

  m_monomers = other.m_monomers;

  ErrorList error_list;
  m_isValid = validate(&error_list);

  if(!m_isValid)
    qCritical() << "Upon assignment of CleavageMotif, the instance "
                   "failed to validate with errors:"
                << Utils::joinErrorList(error_list, ", ");

  return *this;
}

/*!
\brief Returns true if \c this and \a other are identical.

The comparison of the Monomer instances in the member container is deep (that is
the Monomer instances are compared and not the shared pointers).
*/
bool
CleavageMotif::operator==(const CleavageMotif &other) const
{
  if(&other == this)
    return true;

  // Remember that the motif is actually a vector of MonomerSPtr, so when one
  // does a copy either via the operator ==() or the copy constructor, the
  // Motifs are going to be the same.

  // If, then one of the MonomerSPtr is changed, the other is changed also.

  // qDebug() << "Now == comparing cleavage motifs:" << this->getMotif()
  // << "versus" << other.getMotif();

  // We cannot compare the PolChemDef, because that would cause
  // an infinite loop: (each instance of this class in the PolChemDef would
  // try to compare the PolChemDef...).

  if(m_offset != other.m_offset ||
     m_monomers.size() != other.m_monomers.size() ||
     m_cleavageAction != other.m_cleavageAction)
    {
      // qDebug() << "Either or both offset or/and cleavage operation differ.";
      return false;
    }

  for(std::size_t iter = 0; iter < m_monomers.size(); ++iter)
    {
      if(*m_monomers.at(iter).get() != *other.m_monomers.at(iter).get())
        {
          // qDebug() << "At least one Monomer instance differ in both
          // CleavageMotif instances.";
          return false;
        }
    }

  return true;
}

/*!
\brief Returns true if \c this and \a other are different.

Returns the negated result of operator==().
*/
bool
CleavageMotif::operator!=(const CleavageMotif &other) const
{
  if(&other == this)
    return false;

  return !operator==(other);
}

/*!
\brief Parses the cleavage \a site and returns the count of Monomer
instances stored in the member container as a result of parsing the \a site.

A cleavage site is a string in the form "Lys/Pro" or "/Asp". The member
container of Monomer pointers is filled-in with pointers to the PolChemDef's
Monomer instances having codes {Lys, Pro} or {Asp}. The offset is the position
of the
'/' cleavage symbol (that is, the actual cleavage position in the motif). For
"Lys/Pro", the offset is 1 while for "/Asp", the offset is 0.

After setting the member data, the CleavageMotif instance is validated and
m_isValid is set to the result of this validation.
*/
std::size_t
CleavageMotif::parseSite(const QString &site)
{
  m_monomers.clear();

  if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr)
    {
      qCritical()
        << "Cannot parse cleavage site without any available PolChemDef.";
      m_isValid = false;
      return 0;
    }

  if(site.count('/') == 0 || site.count('/') > 1)
    {
      qCritical() << "Cannot parse cleavage site: either missing '/' cleavage "
                     "indicator or more than one.";
      m_isValid = false;
      return 0;
    }

  std::size_t code_length = mcsp_polChemDef->getCodeLength();

  std::size_t monomer_count = 0;

  QRegularExpression single_monomer_code_regexp(
    QString("([/]?)([A-Z][a-z]{0,%1})([/]?)").arg(code_length - 1));

  for(const QRegularExpressionMatch &match :
      single_monomer_code_regexp.globalMatch(site))
    {
      QString sub_match = match.captured(0);
      // qDebug() << "Entering single_monomer_code_regexp sub-match:" <<
      // sub_match;

      QString ant_cleave = match.captured(1);
      // qDebug() << "ant_cleave:" << ant_cleave;

      QString code = match.captured(2);
      // qDebug() << "Parsed code:" << code;

      QString post_cleave = match.captured(3);
      // qDebug() << "post_cleave:" << post_cleave;

      if(!ant_cleave.isEmpty())
        m_offset = monomer_count;

      if(code.isEmpty())
        qFatalStream() << "Programming error: code cannot be empty.";

      const MonomerSPtr monomer_csp =
        mcsp_polChemDef->getMonomerCstSPtrByCode(code);
      if(monomer_csp == nullptr)
        qFatalStream()
          << "Programming error. The monomer code must be known to the "
             "PolChemDef.";

      m_monomers.push_back(monomer_csp);
      ++monomer_count;

      if(!post_cleave.isEmpty())
        m_offset = monomer_count;
    }

  ErrorList error_list;

  m_isValid = validate(&error_list);

  if(!m_isValid)
    qCritical() << "Upon parsing cleavage site in CleavageMotif, the instance "
                   "failed to validate with errors:"
                << Utils::joinErrorList(error_list, ", ");

  return m_monomers.size();
}

/*!
\brief Parses the cleavage \a motif and returns the count of Monomer
instances stored in m_monomers as a result of parsing the \a motif.

A cleavage motif is a string in the form "LysPro" or "Asp" (in fact that is
the string representation of a \l{Sequence}).
*/
std::size_t
CleavageMotif::parseMotif(const QString &motif)
{
  m_monomers.clear();

  if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr)
    {
      qCritical() << "Cannot parse motif without any available PolChemDef;";
      m_isValid = false;
      return 0;
    }

  Sequence motif_sequence(mcsp_polChemDef, motif);

  ErrorList error_list;
  if(!motif_sequence.validate(&error_list))
    {
      qCritical()
        << "The motif" << motif
        << "failed to convert to a valid Sequence object, with errors:"
        << Utils::joinErrorList(error_list, ", ");
      m_isValid = false;
      return 0;
    }

  motif_sequence.cleanupMonomers();
  for(const MonomerSPtr &monomer_sp : motif_sequence.getMonomersCstRef())
    {
      m_monomers.push_back(monomer_sp);
      // qDebug() << "Pushed back monomer:" << monomer_csp->toString();
    }

  return m_monomers.size();
}

/*!
\brief Returns true if validation of this CleavageMotif instance was successful,
false otherwise.

If errors are encountered and \a error_list_p is not nullptr, then these error are stored in that ErrorList.
*/
bool
CleavageMotif::validate(ErrorList *error_list_p) const
{
  m_isValid = true;

  qsizetype error_count = error_list_p->size();

  if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr)
    {
      qCritical() << "A CleavageMotif with no PolChemDef available cannot "
                     "validate successfully.";

      if(error_list_p != nullptr)
        error_list_p->push_back(
          "A CleavageMotif with no PolChemDef available cannot validate "
          "successfully");
    }

  if(m_monomers.size() == 0)
    {
      qCritical() << "A CleavageMotif with no Monomer motif cannot validate "
                     "successfully.";
      if(error_list_p != nullptr)
        error_list_p->push_back(
        "A CleavageMotif with no Monomer motif cannot validate successfully");
    }

  if(m_offset > m_monomers.size())
    {
      qCritical()
        << "A CleavageMotif with an offset greater than the number of "
           "Monomer codes in the motif cannot validate successfully.";
        if(error_list_p != nullptr)
          error_list_p->push_back(
        "A CleavageMotif with an offset greater than the number of "
        "Monomer codes in the motif cannot validate successfully");
    }

  if(m_cleavageAction == Enums::CleavageAction::NOT_SET)
    {
      qCritical() << "A CleavageMotif with no set Enums::CleavageAction cannot "
                     "validate successfully.";
      if(error_list_p != nullptr)
        error_list_p->push_back(
        "A CleavageMotif with no set Enums::CleavageAction cannot validate "
        "successfully");
    }

  m_isValid = error_list_p->size() > error_count ? false : true;
  return m_isValid;
}

/*
\brief Returns the validity status of this CleavageMotif instance.
*/
bool
CleavageMotif::isValid() const
{
  return m_isValid;
}


} // namespace libXpertMassCore
} // namespace MsXpS
