/*
 * Decompiled with CFR 0.152.
 */
package symbol;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import org.biojava.bio.dist.Distribution;
import org.biojava.bio.dist.DistributionFactory;
import org.biojava.bio.dist.SimpleDistributionTrainerContext;
import org.biojava.bio.seq.DNATools;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.db.HashSequenceDB;
import org.biojava.bio.seq.db.IDMaker;
import org.biojava.bio.seq.db.SequenceDB;
import org.biojava.bio.seq.io.FastaDescriptionLineParser;
import org.biojava.bio.seq.io.FastaFormat;
import org.biojava.bio.seq.io.SequenceBuilderFactory;
import org.biojava.bio.seq.io.SequenceFormat;
import org.biojava.bio.seq.io.SimpleSequenceBuilder;
import org.biojava.bio.seq.io.StreamReader;
import org.biojava.bio.symbol.Alphabet;
import org.biojava.bio.symbol.AlphabetManager;
import org.biojava.bio.symbol.AtomicSymbol;
import org.biojava.bio.symbol.FiniteAlphabet;
import org.biojava.bio.symbol.SymbolList;
import org.biojava.bio.symbol.SymbolListViews;

public class WindowCount {
    public static void main(String[] args) {
        try {
            File infile = new File(args[0]);
            Integer order = new Integer(args[1]);
            Double threshold = new Double(1.0 / Math.pow(4.0, order.intValue()));
            FiniteAlphabet dna = DNATools.getDNA();
            SequenceDB seqs = WindowCount.readSequenceDB(infile, dna);
            FiniteAlphabet nOrderAlpha = (FiniteAlphabet)AlphabetManager.getCrossProductAlphabet(Collections.nCopies(order, DNATools.getDNA()));
            Distribution d = DistributionFactory.DEFAULT.createDistribution(nOrderAlpha);
            SimpleDistributionTrainerContext context = new SimpleDistributionTrainerContext();
            context.registerDistribution(d);
            SequenceIterator iter = seqs.sequenceIterator();
            while (iter.hasNext()) {
                Sequence s = iter.nextSequence();
                SymbolList nseq = SymbolListViews.orderNSymbolList(s, order);
                Iterator nmers = nseq.iterator();
                while (nmers.hasNext()) {
                    Object nmer = nmers.next();
                    context.addCount(d, (AtomicSymbol)nmer, 1.0);
                }
            }
            context.train();
            Iterator symbols = nOrderAlpha.iterator();
            TreeMap<Double, AtomicSymbol> tree = new TreeMap<Double, AtomicSymbol>();
            while (symbols.hasNext()) {
                AtomicSymbol s = (AtomicSymbol)symbols.next();
                Double weight = new Double(d.getWeight(s));
                tree.put(weight, s);
            }
            SortedMap sig = tree.tailMap(threshold);
            Set<Double> keys = sig.keySet();
            System.out.println("threshold = " + threshold);
            System.out.println("\nNMER\tWEIGHT");
            Iterator<Double> keysI = keys.iterator();
            while (keysI.hasNext()) {
                Double key = keysI.next();
                AtomicSymbol value = (AtomicSymbol)sig.get(key);
                WindowCount.output(key, value);
            }
        }
        catch (IOException ioe) {
            ioe.printStackTrace(System.err);
        }
        catch (Exception e) {
            e.printStackTrace(System.err);
        }
    }

    public static SequenceDB readSequenceDB(File seqFile, Alphabet alpha) throws Exception {
        HashSequenceDB seqDB = new HashSequenceDB(IDMaker.byName);
        FastaDescriptionLineParser.Factory sbFact = new FastaDescriptionLineParser.Factory(SimpleSequenceBuilder.FACTORY);
        FastaFormat fFormat = new FastaFormat();
        StreamReader seqI = new StreamReader(new FileInputStream(seqFile), (SequenceFormat)fFormat, alpha.getTokenization("token"), (SequenceBuilderFactory)sbFact);
        while (seqI.hasNext()) {
            Sequence seq = seqI.nextSequence();
            seqDB.addSequence(seq);
        }
        return seqDB;
    }

    public static void output(Double d, AtomicSymbol s) {
        System.out.print(s.getName());
        System.out.println("\t" + d);
    }

    public static void usage() {
        System.out.println("\n\n\t***USAGE***\n\n");
        System.out.println("java WindowCount <file> [size]");
        System.out.println("\n\tfile\tFile in Fasta Format");
        System.out.println("\tsize\tSize of nmers to count");
        System.exit(0);
    }
}

