/* SimpleComparisonData.java
 *
 * created: Wed May 17 2000
 *
 * This file is part of Artemis
 *
 * Copyright (C) 2000  Genome Research Limited
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * $Header: /nfs/disk222/yeastpub/Repository/powmap/diana/SimpleComparisonData.java,v 1.3 2000/05/31 10:55:37 kmr Exp $
 */

package diana;

import diana.sequence.*;

import uk.ac.sanger.pathogens.embl.Range;
import uk.ac.sanger.pathogens.OutOfRangeException;

import java.util.Vector;
import java.util.Hashtable;

/**
 *  This class contains methods that are common to all ComparisonData
 *  objects.  In particular it has methods for managing AlignMatch objects.
 *
 *  @author Kim Rutherford <kmr@sanger.ac.uk>
 *  @version $Id: SimpleComparisonData.java,v 1.3 2000/05/31 10:55:37 kmr Exp $
 **/

class SimpleComparisonData implements ComparisonData {
  /**
   *  Return an array containing all the AlignMatch objects for this
   *  comparison.
   **/
  public AlignMatch [] getMatches () {
    return matches;
  }

  /**
   *  Return all the AlignMatch objects in this comparison which overlap
   *  subject_seq_range on the subject sequence or query_seq_range on the query
   *  sequence.
   **/
  public AlignMatch [] getMatchesInRange (final Range subject_seq_range,
                                             final Range query_seq_range) {

    // a count of how many objects we have put into match_buffer so far.
    int match_buffer_count = 0;

    for (int i = 0 ; i < spare_buckets.size () ; ++i) {
      final AlignMatch this_match = (AlignMatch) spare_buckets.elementAt (i);

      if (matchInRange (this_match, subject_seq_range, query_seq_range)) {
        match_buffer[match_buffer_count] = this_match;
        ++match_buffer_count;
      }
    }

    // used to make sure we don't return any duplicates
    final Hashtable table = new Hashtable (100);

    for (int bucket_index = subject_seq_range.getStart () / BUCKET_SIZE ;
         bucket_index < subject_seq_range.getEnd () / BUCKET_SIZE ;
         ++bucket_index) {
      for (int i = 0 ;
           i < subject_sequence_buckets[bucket_index].size () ;
           ++i) {
        final AlignMatch this_match =
          (AlignMatch) subject_sequence_buckets[bucket_index].elementAt (i);

        if (this_match.getSubjectSequenceRange ().overlaps (subject_seq_range)) {
          match_buffer[match_buffer_count] = this_match;
          ++match_buffer_count;
          table.put (this_match, this_match);
        }
      }
    }

    for (int bucket_index = query_seq_range.getStart () / BUCKET_SIZE ;
         bucket_index < query_seq_range.getEnd () / BUCKET_SIZE ;
         ++bucket_index) {
      for (int i = 0 ;
           i < query_sequence_buckets[bucket_index].size () ;
           ++i) {
        final AlignMatch this_match =
          (AlignMatch) query_sequence_buckets[bucket_index].elementAt (i);

        if (table.containsKey (this_match)) {
          continue;
        }

        if (this_match.getQuerySequenceRange ().overlaps (query_seq_range)) {
          match_buffer[match_buffer_count] = this_match;
          ++match_buffer_count;
        }
      }
    }

    final AlignMatch [] return_matches = new AlignMatch [match_buffer_count];

    System.arraycopy (match_buffer, 0,
                      return_matches, 0,
                      return_matches.length);

    return return_matches;
  }

  /**
   *  If this object contains only valid matches for a comparison between
   *  subject_sequence and query_sequence return null (subject_sequence is the
   *  subject of the comparison query_sequence is the query).  If the
   *  comparison would be valid if the data for the ends of the matches were
   *  swapped, then return a copy of this object with all the matches flipped.
   *  (For now, valid means that none of the matches goes over the end of the
   *  sequence.)
   *  @exception OutOfRangeException Thrown if the data in this object is not
   *    valid for either orientation.
   **/
  public ComparisonData flipMatchesIfNeeded (final Bases subject_sequence,
                                             final Bases query_sequence)
      throws OutOfRangeException {
    if (checkMatches (subject_sequence, query_sequence)) {
      return null;
    } else {
      if (checkMatches (query_sequence, subject_sequence)) {
        final SimpleComparisonData new_comparison_data =
          new SimpleComparisonData ();

        final AlignMatch [] new_matches = new AlignMatch [matches.length];

        for (int i = 0 ; i < matches.length ; ++i) {
          final AlignMatch this_match = matches[i];

          final AlignMatch new_match =
            new AlignMatch (this_match.getQuerySequenceRange (),
                            this_match.getSubjectSequenceRange (),
                            this_match.isRevMatch (),
                            this_match.getScore ());

          new_matches [i] = new_match;
        }

        new_comparison_data.setMatches (new_matches);

        return new_comparison_data;
      } else {
        throw new OutOfRangeException ("match goes off end of sequence");
      }
    }
  }

  /**
   *  Return true if and only if this object contains only valid matches for a
   *  comparison between subject_sequence and query_sequence.
   **/
  private boolean checkMatches (final Bases subject_sequence,
                                final Bases query_sequence) {
    for (int i = 0 ; i < matches.length ; ++i) {
      final AlignMatch match = matches[i];

      if (match.getSubjectSequenceEnd () > subject_sequence.getLength ()) {
        return false;
      }

      if (match.getQuerySequenceEnd () > query_sequence.getLength ()) {
        return false;
      }
    }

    return true;
  }

  /**
   *  Return true if and only if the given AlignMatch object overlaps
   *  subject_seq_range on the subject sequence or query_seq_range on the
   *  query sequence.
   **/
  private boolean matchInRange (final AlignMatch match,
                                final Range subject_seq_range,
                                final Range query_seq_range) {
    if (match.getSubjectSequenceRange ().overlaps (subject_seq_range) ||
        match.getQuerySequenceRange ().overlaps (query_seq_range)) {
      return true;
    } else {
      return false;
    }
  }


  /**
   *  Set the array of AlignMatch objects.
   **/
  protected void setMatches (final AlignMatch [] matches) {
    this.matches = matches;

    match_buffer = new AlignMatch [matches.length];

    for (int i = 0 ; i < matches.length ; ++i) {
      final AlignMatch this_match = matches[i];

      final int score = this_match.getScore ();

      if (score > -1) {
        if (score > max_score) {
          max_score = score;
        }

        if (min_score == -1 || score < min_score) {
          min_score = score;
        }
      }

      final int this_match_subject_sequence_end =
        this_match.getSubjectSequenceEnd ();

      final int this_match_query_sequence_end =
        this_match.getQuerySequenceEnd ();

      if (this_match_subject_sequence_end > subject_sequence_max_base) {
        subject_sequence_max_base = this_match_subject_sequence_end;
      }

      if (this_match_query_sequence_end > query_sequence_max_base) {
        query_sequence_max_base = this_match_query_sequence_end;
      }
    }
  }

  /**
   *  The number of base per bucket.
   **/
  final private int BUCKET_SIZE = 1000;

  /**
   *  Create subject_sequence_buckets, query_sequence_buckets and
   *  spare_buckets.
   **/
  private void makeBuckets () {
    subject_sequence_buckets =
      new Vector [subject_sequence_max_base / BUCKET_SIZE + 1];
    query_sequence_buckets =
      new Vector [query_sequence_max_base / BUCKET_SIZE + 1];

    for (int i = 0 ; i < matches.length ; ++i) {
      final AlignMatch match = matches[i];

      if (match.getSubjectSequenceRange ().getCount () > BUCKET_SIZE ||
          match.getQuerySequenceRange ().getCount () > BUCKET_SIZE) {
        spare_buckets.addElement (match);
      } else {
        final int match_subject_sequence_start =
          match.getSubjectSequenceStart ();

        final int match_query_sequence_start =
          match.getQuerySequenceStart ();

        final int subject_buckets_index =
          match_subject_sequence_start / BUCKET_SIZE;
        subject_sequence_buckets[subject_buckets_index].addElement (match);

        final int query_buckets_index =
          match_query_sequence_start / BUCKET_SIZE;
        query_sequence_buckets[query_buckets_index].addElement (match);
      }
    }
  }

  /**
   *  Make and return a new AlignMatch.
   **/
  protected AlignMatch makeAlignMatch (int subject_sequence_start,
                                       int subject_sequence_end,
                                       int query_sequence_start,
                                       int query_sequence_end,
                                       final int score) {
    try {
      // true if and only if the query hits the reverse complement of the
      // subject
      boolean rev_match = false;

      if (subject_sequence_end < subject_sequence_start) {
        final int tmp = subject_sequence_start;
        subject_sequence_start = subject_sequence_end;
        subject_sequence_end = tmp;
        rev_match = !rev_match;
      }

      if (query_sequence_end < query_sequence_start) {
        final int tmp = query_sequence_start;
        query_sequence_start = query_sequence_end;
        query_sequence_end = tmp;
        rev_match = !rev_match;
      }

      return new AlignMatch (new Range (subject_sequence_start,
                                        subject_sequence_end),
                             new Range (query_sequence_start,
                                        query_sequence_end),
                             rev_match,
                             score);
    } catch (OutOfRangeException e) {
      throw new Error ("internal error - unexpected exception: " + e);
    }
  }

  /**
   *  Return the maximum score of all the AlignMatch objects in this object.
   **/
  public int getMaximumScore () {
    return max_score;
  }

  /**
   *  Return the minimum score of all the AlignMatch objects in this object.
   **/
  public int getMinimumScore () {
    return min_score;
  }

  /**
   *  This is the array of matches created by the constructor.
   **/
  private AlignMatch [] matches;

  /**
   *  This is the array is used as a buffer by getMatchesInRange ().
   **/
  private AlignMatch [] match_buffer;

  /**
   *  Set by the constructor and returned by getMaximumScore ().
   **/
  private int max_score = -1;

  /**
   *  Set by the constructor and returned by getMinimumScore ().
   **/
  private int min_score = 999999999;

  /**
   *  Set by setMatches () to be the highest base we see in the subject
   *  sequence.
   **/
  private int subject_sequence_max_base = -1;

  /**
   *  Set by setMatches () to be the highest base we see in the query
   *  sequence.
   **/
  private int query_sequence_max_base = -1;

  /**
   *  This array contains a Vector for each BUCKET_SIZE bases in the subject
   *  sequence.  All AlignMatch objects where the match start in the subject
   *  sequence (ie AlignMatch.getSubjectSequenceStart ()) is >= 1 and <=
   *  BUCKET_SIZE will be put in the subject bucket.  If >= BUCKET_SIZE + 1
   *  and <= BUCKET_SIZE * 2 it will be in the query bucket, etc.
   **/
  private Vector [] subject_sequence_buckets = null;

  /**
   *  This array contains a Vector for each BUCKET_SIZE bases in the query
   *  sequence.
   **/
  private Vector [] query_sequence_buckets = null;

  /**
   *  This Vector contains the AlignMatch objects where the match is bigger
   *  than BUCKET_SIZE in either of the sequences.
   **/
  private Vector spare_buckets = new Vector ();
}
