/* BEGIN software license
 *
 * MsXpertSuite - mass spectrometry software suite
 * -----------------------------------------------
 * Copyright(C) 2009,...,2018 Filippo Rusconi
 *
 * http://www.msxpertsuite.org
 *
 * This file is part of the MsXpertSuite project.
 *
 * The MsXpertSuite project is the successor of the massXpert project. This
 * project now includes various independent modules:
 *
 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 * END software license
 */


/////////////////////// Local includes
#include "CleaveMotif.hpp"
#include "Polymer.hpp"
#include "PolChemDef.hpp"


namespace MsXpS
{

namespace libXpertMass
{


/*!
\class MsXpS::libXpertMass::CleaveMotif
\inmodule libXpertMass
\ingroup PolChemDefAqueousChemicalReactions
\inheaderfile CleaveMotif.hpp

\brief The CleaveMotif class provides a model for specifying aqueous cleavage
sites of \l{Polymer} \l{Sequence}s.

When a polymer sequence cleavage occurs, using, for example, the
specification "Lys/;Arg/;-Lys/Pro", a number of actions need be
performed prior to listing the oligomers obtained as a result of the
cleavage.

The "Lys/;Arg/;-Lys/Pro" cleavage specification (\l CleaveSpec) gets crunched
in a number of steps and cleavage motifs are generated for it. In this specific
case we'll have three motifs with the following data:

- First motif (or cleavage site):
- "Lys/"
- code list [0] = "Lys"
- offset = 1 ('/' indicates that cut is right of monomer)
- is for cleavage ? = true

- Second motif (or cleavage site):
- "Arg/"
- code list [0] = "Arg"
- offset = 1 ('/' indicates that cut is right of monomer)
- is for cleavage ? = true

- Third motif (or non-cleavage site):
- "-Lys/Pro"
- code list [0] = "Lys", [1] = "Pro"
- offset = 1 ('/' indicates that cut is right of monomer)
- is for cleavage ? = false

Thanks to this deconstruction (from "Lys/;Arg/;-Lys/Pro" to the 3
motifs above) is the polymer sequence cleaved according to the
specification.

\sa CleaveSpec, CleaveRule
*/


/*!
\variable MsXpS::libXpertMass::CleaveMotif::m_motif

\brief The string motif that describes a specific cleavage site.

The "Lys/" cleavage motif is part of the cleavage specification that describes
the model for the Trypsin cleavage in the proteinaceous world.
*/

/*!
\variable MsXpS::libXpertMass::CleaveMotif::m_codeList

\brief The list of \l Monomer codes that are in the string motif.

For the "-Lys/Pro" motif, also from the  Trypsin cleavage specification, there
are two codes: "Lys" and "Pro".
*/

/*!
\variable MsXpS::libXpertMass::CleaveMotif::m_offset

\brief The cleavage site offset with respect to the first monomer code of the
motif.

In the The "Lys/", "Arg/" and "-Lys/Pro" examples, the offset would be 1 for
each motif, because each time the cleavage occurs after the first monomer
code: Lys/, or Arg/ or Lys/Pro.
*/

/*!
\variable MsXpS::libXpertMass::CleaveMotif::m_isForCleave

\brief Tells if the motif is for cleavage.

In the "Lys/" and "Arg/" motif, that would be true; for "-Lys/Pro", that would
be false, because Trypsin does not cleave after a Lysil residue if it is
followed by a Prolyl residue.
*/


/*!
\brief Constructs a cleavage motif.

\a pol_chem_def_csp Polymer chemistry definition. Cannot be nullptr.

\a name Name. Cannot be empty.

\a motif Motif in the form of "Lys/" or "-Lys/Pro".

\a offset Offset position of the cleavage to the first monomer code in
the motif.

\a is_for_cleavage Tells if motif is for cleavage (for example, "Lys/") or
not for cleavage (for example, "-Lys/Pro").
*/
CleaveMotif::CleaveMotif(PolChemDefCstSPtr pol_chem_def_csp,
                         QString name,
                         const QString &motif,
                         int offset,
                         bool is_for_cleavage)
  : PolChemDefEntity(pol_chem_def_csp, name),
    m_motif(motif),
    m_offset(offset),
    m_isForCleave(is_for_cleavage)
{
}

/*!
\brief Constructs a CleaveMotif instance as a copy of \a other.
*/
CleaveMotif::CleaveMotif(const CleaveMotif &other)
  : PolChemDefEntity(other),
    m_motif(other.m_motif),
    m_codeList(other.m_codeList),
    m_offset(other.m_offset),
    m_isForCleave(other.m_isForCleave)
{
}

/*!
\brief Destructs this CleaveMotif instance.
*/
CleaveMotif::~CleaveMotif()
{
}

/*!
\brief Assigns \a other to this CleaveMotif instance.

Returns a reference to this CleaveMotif instance.
*/
CleaveMotif &
CleaveMotif::operator=(const CleaveMotif &other)
{
  if(&other == this)
    return *this;

  PolChemDefEntity::operator=(other);

  m_motif       = other.m_motif;
  m_codeList    = other.m_codeList;
  m_offset      = other.m_offset;
  m_isForCleave = other.m_isForCleave;

  return *this;
}

/*!
\brief Sets the \a motif.
*/
void
CleaveMotif::setMotif(const QString &motif)
{
  m_motif = motif;
}

/*!
\brief Returns the motif.
*/
const QString &
CleaveMotif::motif()
{
  return m_motif;
}

/*!
\brief Returns the string list of codes in the motif.
*/
const QStringList &
CleaveMotif::codeList() const
{
  return m_codeList;
}

/*!
\brief Sets the \a offset.
*/
void
CleaveMotif::setOffset(int offset)
{
  m_offset = offset;
}

/*!
\brief Returns the offset.
*/
int
CleaveMotif::offset()
{
  return m_offset;
}

/*!
\brief Sets if motif is for cleavage to \a for_cleave.
*/
void
CleaveMotif::setForCleave(bool for_cleave)
{
  m_isForCleave = for_cleave;
}

/*!
\brief Returns if motif is for cleavage.
*/
bool
CleaveMotif::isForCleave()
{
  return m_isForCleave;
}

/*!
\brief Parses the cleavage \a site.

Upon parsing of the cleavage \a site, this CleaveMotif instance is filled with
data.

Returns the number of monomer codes in the motif, or -1 upon error.
*/
int
CleaveMotif::parse(const QString &site)
{
  const QList<Monomer *> &refList = mcsp_polChemDef->monomerList();

  QString code;
  QString error;
  QString local = site;

  int index = 0;
  int count = 0;

  // We get something like "Lys/Pro" or something like "KKGK/RRGK" and
  // we have to make three things:
  //
  // 1. change the site "KKGK/RRGK" to a motif string(KKGKRRGK).
  //
  // 2. set the offset member to the index of '/' in the initial site
  // string.
  //
  // 3. make an array of codes with the motif.

  Sequence sequence(local);

  while(1)
    {
      code.clear();

      if(sequence.nextCode(
           &code, &index, &error, mcsp_polChemDef->codeLength()) == -1)
        {
          if(error == "/")
            {
              m_offset = count;

              // qDebug() << __FILE__ << __LINE__
              // << "Found / at code position" << m_offset;

              // Increment index so that we iterate in the next code
              // at next round.
              ++index;

              continue;
            }

          // There was an error parsing the site sequence. If the err
          // string contains a space, that is not serious, we could
          // skip that.

          qDebug() << __FILE__ << __LINE__ << "Failed to parse cleavage site"
                   << site;

          return -1;
        }

      // There might have been no error, but that does not means that
      // we necessarily got a code.
      if(code.isEmpty())
        {
          // We arrived at the end of a code parsing step(either
          // because the current 'local' cleavage site(that is
          // 'sequence') was finished parsing or because we finished
          // parsing one of its monomer codes.
          break;
        }

      // At this point we actually had a code.

      //       qDebug() << __FILE__ << __LINE__
      // 		<< "Got next code:" << code.toAscii();

      // At this point, 'code' contains something that looks like a
      // valid code, but we still have to make sure that this code
      // actually is in our list of monomer codes...

      if(Monomer::isCodeInList(code, refList) == -1)
        {
          qDebug() << __FILE__ << __LINE__ << "Monomer code" << code
                   << " is not in the monomer list.";

          return -1;
        }

      // At the end m_motif will contain KKGKRRGK, while site was
      // "KKGK/RRGK" KKGKRRGK
      m_motif += code;

      // Add the newly parsed code to the code list.
      m_codeList << code;

      ++count;

      // Increment index so that we iterate in the next code at next
      // round.
      ++index;
    }

  // Return the number of successfully parsed monomer codes for the
  // 'site' string.
  Q_ASSERT(count == m_codeList.size());

  return count;
}


bool
CleaveMotif::validate()
{
  Q_ASSERT(0);
  return true;
}

} // namespace libXpertMass

} // namespace MsXpS
