/* LineGroup.java
 *
 * created: Mon Oct 12 1998
 *
 * This file is part of Artemis
 *
 * Copyright (C) 1998,1999,2000  Genome Research Limited
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * $Header: /nfs/disk222/yeastpub/Repository/powmap/uk/ac/sanger/pathogens/embl/LineGroup.java,v 1.12 2000/08/02 12:04:38 kmr Exp $
 */

package uk.ac.sanger.pathogens.embl;

import java.io.Reader;
import java.io.Writer;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.Hashtable;

/**
 *  This class corresponds to a group of associated lines in an EMBL entry.
 *  An example of a group of associated lines is all the lines in an entry
 *  that start with FT.
 *
 *  @author Kim Rutherford
 *  @version $Id: LineGroup.java,v 1.12 2000/08/02 12:04:38 kmr Exp $
 *
 */

abstract class LineGroup
    extends EMBLObject {
  /**
   *  Try to read and return a new LineGroup object from a stream.
   *  @param reader The stream to read from.
   *  @return A new LineGroup object or null if stream is at the end of file.
   *  @exception IOException Thrown if exception occurs while reading.
   *  @exception ReadFormatException Thrown if the format of the input is in
   *    error.
   *  @exception InvalidRelationException Thrown if this Feature cannot contain
   *    the given Qualifier.
   **/
  public static LineGroup readNextLineGroup (LinePushBackReader reader)
      throws IOException, InvalidRelationException {

    String line;

    // read until we get to a non-blank line
    LINES: while (true) {
      line = reader.readLine ();

      if (line == null) {
        // end of file
        return null;
      }

      // check for and ignore blank lines
      for (int i = 0 ; i < line.length () ; ++i) {
        final char letter = line.charAt (i);

        if (letter != ' ' && letter != '\t') {
          break LINES;
        }
      }
    }

    final int line_type = LineGroup.getLineType (line);

    reader.pushBack (line);

    switch (line_type) {
    case SEQUENCE:
      return StreamSequenceFactory.makeStreamSequence (reader);

    case EMBL_FEATURE:
      return EmblStreamFeature.readFromStream (reader);

    case EMBL_FEATURE_HEADER:
      return new FeatureHeader (reader);

    case GENBANK_FEATURE:
      return GenbankStreamFeature.readFromStream (reader);

    case GFF_FEATURE:
      return GFFStreamFeature.readFromStream (reader);

    case MSPCRUNCH_FEATURE:
      return MSPcrunchStreamFeature.readFromStream (reader);

    case END_OF_ENTRY:
      // in this case we do want to read the line (which will be //) so that
      // the next call to readNextEntry () starts on the next entry
      reader.readLine ();
      return null;

    case EMBL_MISC:
      return new EmblMisc (reader);

    case GENBANK_MISC:
      return new GenbankMisc (reader);

    case GFF_MISC:
      return new GFFMisc (reader);

    default:
      throw new ReadFormatException ("reader got confused - " +
                                     "unknown line type",
                                     reader.getLineNumber ());
    }
  }

  /**
   *  Return the embl line type of the line contained in the argument String.
   */
  public static int getLineType (String line) {
    if (line.startsWith ("#")) {
      return GFF_MISC;
    }

    if (line.length () >= 2 &&
        (line.charAt (0) == '/' || Character.isLetter (line.charAt (0))) &&
        (line.charAt (1) == '/' || Character.isLetter (line.charAt (1))) &&
        (line.length () == 2 ||
         line.length () == 3 && line.endsWith (" ") ||
         line.length () == 4 && line.endsWith ("  ") ||
         line.length () >= 5 && line.substring (2,5).equals ("   "))) {

      if (line.startsWith (EMBL_FEATURE_STRING)) {
        return EMBL_FEATURE;
      }

      if (line.startsWith (END_OF_ENTRY_STRING)) {
        return END_OF_ENTRY;
      }

      if (line.startsWith (EMBL_SEQUENCE_STRING)) {
        return SEQUENCE;
      }

      if (line.startsWith (EMBL_FEATURE_HEADER_STRING)) {
        return EMBL_FEATURE_HEADER;
      }

      // this covers all the lines in the header
      return EMBL_MISC;
    }

    if (line.length () > 21 &&
        ((line.startsWith ("     ") &&
          (Character.isLetter (line.charAt (5)) ||
           Character.isDigit (line.charAt (5)) ||
           line.charAt (5) == '-') &&
          line.charAt (20) == ' ') ||
         (line.startsWith ("                    ") &&
          line.trim ().length () > 0))) {
      return GENBANK_FEATURE;
    }

    if (line.indexOf ('\t') >= 0) {
      return GFF_FEATURE;
    }

    final int genbank_type = getGenbankType (line);

    if (genbank_type != UNKNOWN) {
      return GENBANK_MISC;
    }

    if (isMSPcrunchLine (line)) {
      return MSPCRUNCH_FEATURE;
    }

    // default is sequence
    return SEQUENCE;
  }

  /**
   *  Return true if and only if the given String appears to be a feature
   *  generated by MSPcrunch -d
   **/
  private static boolean isMSPcrunchLine (final String line) {
    final String trim_line = line.trim ();

    if (trim_line.length () > 0 &&
        Character.isDigit (trim_line.charAt (0)) &&
        trim_line.indexOf (' ') != -1) {
      return true;
    } else {
      return false;
    }
  }

  /**
   *  Return the type of GENBANK LineGroup that starts with the given String
   *  or UNKNOWN if the String isn't the
   **/
  private static int getGenbankType (final String line) {
    if (line.length () > 0 && Character.isLetter (line.charAt (0))) {
      final int first_space = line.indexOf (' ');
      if (first_space == -1) {
        if (genbank_hash.get (line) != null) {
          return GENBANK_MISC;
        }
      } else {
        final String first_word = line.substring (0, first_space);
//        System.err.println ("first_word: " + first_word);

        if (genbank_hash.get (first_word) != null) {
          return GENBANK_MISC;
        }
      }
    }

    return UNKNOWN;
  }

  /**
   *  Returns a String containing the contents of the line with the initial
   *  type string (two letters) and white space (three spaces) removed.
   */
  public static String getRestOfLine (String line) {
    final int END_OF_SPACES = 5;

//    System.out.println ("in getRestOfLine (): " + line);
    if (line.length () > END_OF_SPACES) {
//      System.out.println ("in getRestOfLine () returning: " +
//                          line.substring (END_OF_SPACES));
      return line.substring (END_OF_SPACES);
    } else {
      return "";
    }
  }

  /**
   *  Write the end of entry marker - "//".
   **/
  public static void writeEndOfEMBLEntry (Writer writer) throws IOException {
    writer.write (END_OF_ENTRY_STRING + "\n");
  }

  /**
   *  Write this object to the given stream.
   *  @param writer The stream to write to.
   **/
  public abstract void writeToStream (final Writer out_stream)
      throws IOException;


  /**
   *  The tag used for unidentified input.
   **/
  final static private int UNKNOWN = 0;

  /**
   *  The tag for the end of entry line: "//"
   **/
  final static int END_OF_ENTRY = 1;
  final static String END_OF_ENTRY_STRING = "//";

  /**
   *  The tag for the start of sequence line
   **/
  final static int SEQUENCE = 2;
  final static String EMBL_SEQUENCE_STRING = "SQ";

  /**
   *  The tag for an EMBL feature table line
   **/
  final static int EMBL_FEATURE = 3;
  final static String EMBL_FEATURE_STRING = "FT";

  /**
   *  The tag for an EMBL feature header lines (FH ...)
   **/
  final static int EMBL_FEATURE_HEADER = 4;
  final static String EMBL_FEATURE_HEADER_STRING = "FH";

  /**
   *  The tag for a GENBANK feature table line
   **/
  final static int GENBANK_FEATURE = 5;

  /**
   *  This is the tag for an EMBL LineGroup that we don't have a handler for.
   *  It will be stored in an object of type EmblMisc.
   **/
  final static int EMBL_MISC = 6;

  /**
   *  This is the tag for an Genbank LineGroup that we don't have a handler
   *  for.  It will be stored in an object of type GenbankMisc.
   **/
  final static int GENBANK_MISC = 7;

  /**
   *  This is the tag for a GFF LineGroup (generally a comment line) that we
   *  don't have a handler for.  It will be stored in an object of type
   *  GFFMisc.
   **/
  final static int GFF_MISC = 8;

  /**
   *  This is the tag for a GFF format line.
   **/
  final static int GFF_FEATURE = 9;

  /**
   *  This is the tag for lines generated by MSPcrunch -d
   **/
  final static int MSPCRUNCH_FEATURE = 10;

  /**
   *  This hash table contains the GENBANK start of line keywords (LOCUS,
   *  DEFINITION, FEATURES etc.)
   **/
  private static Hashtable genbank_hash = null;

  static {
    genbank_hash = new Hashtable ();
    genbank_hash.put ("LOCUS","LOCUS");
    genbank_hash.put ("DEFINITION","DEFINITION");
    genbank_hash.put ("ACCESSION","ACCESSION");
    genbank_hash.put ("NID","NID");
    genbank_hash.put ("VERSION","VERSION");
    genbank_hash.put ("KEYWORDS","KEYWORDS");
    genbank_hash.put ("SOURCE","SOURCE");
    genbank_hash.put ("REFERENCE","REFERENCE");
    genbank_hash.put ("COMMENT","COMMENT");
    genbank_hash.put ("FEATURES","FEATURES");
  }
}
