#!/bin/sh
exec ruby -w -x $0 ${1+"$@"} # -*- ruby -*-
#!ruby -w

# An extended grep, with extended functionality including full regular
# expressions, contextual output, highlighting, detection and exclusion of
# nontext files, and complex matching criteria.

# $Id: glark,v 1.25 2003/01/15 13:03:35 jeugenepace Exp $

require "English"

# most of these correspond to user options.

$after             = 0          # lines of context before the match
$before            = 0          # lines of context after the match
$count             = false      # just count the lines
$explain           = false      # display a legible version of the expression
$file_names_only   = false      # display only the file names
$grep_output       = false      # emulate grep output
$highlight         = true       # highlight matches (using ANSI codes)
$invert_match      = false      # display non-matching lines
$nocase            = false      # match case
$quiet             = false      # minimize warnings
$show_line_numbers = true       # display numbers of matching lines
$verbose           = false      # display debugging output
$whole_lines       = false      # true means patterns must match the entire line
$whole_words       = false      # true means all patterns are '\b'ed front and back
$exit_status       = 1          # 0 == matches, 1 == no matches, 2 == error
$directory         = "read"     # read, skip, or recurse, a la grep
$write_null        = false      # in $file_names_only mode, write '\0' instead of '\n'
$exclude_matching_files = false # exclude files whose names match the expression
$range_start       = nil        # range to begin searching; nil => the entire file
$range_end         = nil        # range to stop searching; nil => the entire file
$num_matches       = nil        # the maximum number of matches to display per file

$stdout.sync = true             # unbuffer
$stderr.sync = true             # unbuffer

$PACKAGE = "glark"
$VERSION = "1.6.1"

#-------------------------------------------------------
# ANSIColor module
#-------------------------------------------------------

module ANSIColor

  # Adds the escape sequence for value retrieval.
  class ANSIEscapedColors < Hash
    def [](str)
      val = super
      if val
        val = "\e[" + val + "m"
      end
      val
    end
  end

  @@ATTRIBUTE_COLORS = ANSIEscapedColors[
    'none'       => '00', 
    'bold'       => '01',
    'underscore' => '04',
    'underline'  => '04',
    'blink'      => '05',
    'reverse'    => '07',
    'concealed'  => '08'
  ]

  @@TEXT_COLORS = ANSIEscapedColors[
    'black'   => '30',
    'red'     => '31',
    'green'   => '32',
    'yellow'  => '33',
    'blue'    => '34',
    'magenta' => '35',
    'cyan'    => '36',
    'white'   => '37'
  ]
  
  @@BACKGROUND_COLORS = ANSIEscapedColors[
    'black'   => '40',
    'red'     => '41',
    'green'   => '42',
    'yellow'  => '43',
    'blue'    => '44',
    'magenta' => '45',
    'cyan'    => '46',
    'white'   => '47'
  ]

  @@RESET = "\e[0m"

  # returns the code for the given color string, which is in the format:
  # [foreground] on [background]. Note that the foreground and background sections
  # can have modifiers (attributes). Examples:
  #     black
  #     blue on white
  #     bold green on yellow
  #     underscore bold magenta on cyan
  #     underscore red on bold cyan

  def ANSIColor.code(str)
    fg, bg = str.split(/\s*\bon_?\s*/)
    foreground(fg) + background(bg)
  end

  # returns the code for the given background color(s)
  def ANSIColor.background(bgcolor)
    make_code(bgcolor, [ @@BACKGROUND_COLORS ])
  end

  # returns the code for the given foreground color(s)
  def ANSIColor.foreground(fgcolor)
    make_code(fgcolor, [ @@ATTRIBUTE_COLORS, @@TEXT_COLORS ])
  end

  def ANSIColor.reset
    @@RESET
  end

  protected

  def ANSIColor.make_code(str, colors)
    code = ""
    return code unless str
    str.split.each do |s|
      found = false
      colors.each do |ca|
        if c = ca[s]
          code += c 
          found = true
          break
        end
      end
      unless found
        $stderr.puts "WARNING: ANSIColor::make_code(" + str + "): unknown color: " + s
        break
      end
    end
    code
  end

end

#-------------------------------------------------------

# default highlighting
$TEXT_HIGHLIGHT = ANSIColor::code("black on yellow")
$FILE_HIGHLIGHT = ANSIColor::code("reverse bold")
$RESET          = ANSIColor::reset

# signifies no limit to the distance between matches, i.e., anywhere within the
# entire file is valid.
$INFINITE_DISTANCE = -1

# for debugging output
def spew(name, var)
  if $verbose
    printf "%45s = ", name 
    print var, "\n"
  end
end

def has_context
  $after != 0 || $before != 0
end


def range(var, count)
  if var
    if var.index(/([\.\d]+)%/)
      count * $1.to_f / 100
    else
      var.to_f
    end
  else
    nil
  end
end


class FileTester 

  # the percentage of characters that we allow to be odd in a text file
  @@ODD_FACTOR = 0.3

  # how many bytes (characters) of a file we test
  @@TEST_LENGTH = 1024

  # extensions associated with files that are always text:
  @@KNOWN_TEXT = %w{ txt c cpp mk h hpp html java }

  # extensions associated with files that are never text:
  @@KNOWN_NONTEXT = %w{ a o obj class elc gif gz jar jpg jpeg png pdf tar Z }

  def FileTester.ascii?(c)
    # from ctype.h
    return (c.to_i & ~0x7f) == 0
  end

  def FileTester.text?(file)
    # Don't waste our time if it doesn't even exist:
    return false unless File.exists?(file)
    
    if file.index(/\.(\w+)\s*$/)
      suffix = $1
      return true  if @@KNOWN_TEXT.include?(suffix)
      return false if @@KNOWN_NONTEXT.include?(suffix)
    end
    
    ntested = 0
    nodd = 0
    f = File.new(file)
    f.each do |line|

      # split returns strings, whereas we want characters (bytes)
      chars = line.split(//, @@TEST_LENGTH).collect { |w| w[0] }

      # using the limit parameter to split results in the last character being
      # "0" (nil), so remove it

      if chars.size > 1 and chars[-1].to_i == 0
        chars = chars[0 .. -2]
      end
      
      chars.each do |ch|
        ntested += 1

        # never allow null in a text file
        return false if ch.to_i == 0
        
        nodd += 1 unless FileTester.ascii?(ch)
        return FileTester.summary(nodd, ntested) if ntested >= @@TEST_LENGTH
      end
    end
    
    return FileTester.summary(nodd, ntested)
  end

  def FileTester.summary(nodd, ntested)
    return nodd < ntested * @@ODD_FACTOR
  end

end


# A thing that can be grepped.
class InputFile

  attr_reader :lines, :fname
  
  def initialize(fname, lines)
    @fname = fname
    @lines = lines
    @displayed = false
    @stati = Array.new          # index = line number, value = context character
    if $count
      @count = 0
    end
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    if lnum < lines.size
      if $show_line_numbers
        printf "%5d ", lnum + 1
      end
      if ch && has_context
        printf "%s ", ch
      end
      print lines[lnum]
    end
  end

  def set_status(from, to, ch, force = false)
    from.upto(to) do |ln|
      if force or not @stati[ln]
        @stati[ln] = ch
      end
    end
  end

  def mark_as_match(start_line, end_line = start_line)
    if $grep_output
      end_line = start_line
    end

    $exit_status = $invert_match ? 1 : 0

    if $count
      @count += 1
    else
      st = [0, start_line - $before].max
      set_status(st,           start_line - 1,    "-")
      set_status(start_line,   end_line,          ":",  true)
      set_status(end_line + 1, end_line + $after, "+")
    end
  end

  def write_matches
    if $count
      write_count
    else
      lastln = nil
      0.upto(@stati.size) do |ln|
        if @stati[ln]
          print_line(ln, @stati[ln]) 
          lastln = ln
        end
      end
    end
  end

  def write_non_matches
    if $count
      write_non_count
    else
      (0 ... lines.length).each do |ln|
        unless @stati[ln] && @stati[ln] == ":"
          print_line(ln) 
        end
      end
    end
  end

end


# Supports context and highlighting.
class GlarkFormat < InputFile
  
  def show_file_header
    print $FILE_HIGHLIGHT if $highlight
    print @fname, ":"
    print $RESET if $highlight
    print "\n"
  end

  def write_count
    puts "    " + @count.to_s
  end

  def write_non_count
    puts "    " + (@lines.size - @count).to_s
  end

  def write_matches
    show_file_header if ARGV.size > 1
    super
  end

  def write_non_matches
    show_file_header if ARGV.size > 1
    super
  end

end


# This matches grep, mostly. It is for running within emacs, thus,
# it does not support context or highlighting.
class GrepFormat < InputFile

  def initialize(fname, lines)
    @show_file_name = ARGV.size > 1
    super
  end

  def write_count
    print @fname, ":" if @show_file_name
    puts @count
  end

  def write_non_count
    print @fname, ":" if @show_file_name
    puts @lines.length - @count
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    print @fname, ":" if @show_file_name
    if $show_line_numbers
      printf "%d: ", lnum + 1
    end
    print @lines[lnum]
  end

end


# A function object, which can be applied (processed) against a InputFile.
class FuncObj
  
  attr_accessor :match_line_number, :file, :matches

  def initialize
    @match_line_number = nil
    @matches = Array.new
  end

  def add_match(lnum)
    @matches.push(lnum)
  end

  def start_position
    match_line_number
  end

  def end_position
    start_position
  end

  def reset_file(file)
    @match_line_number = nil
    @file              = file
    @matches           = Array.new
  end

  def process(infile)
    got_match = false
    reset_file(infile.fname)

    rgstart = range($range_start, infile.lines.size)
    rgend = range($range_end, infile.lines.size)

    # puts "rgstart = #{rgstart}"
    # puts "rgend = #{rgend}"

    nmatches = 0
    (0 ... infile.lines.size).each do |lnum|
      #puts "start #{lnum} >= #{rgstart}: #{lnum >= rgstart}"
      #puts "end #{lnum} <= #{rgend}: #{lnum <= rgend}"

      # puts "running at lnum #{lnum}"

      if (!rgstart || lnum >= rgstart) && 
          (!rgend || lnum <= rgend) &&
          evaluate(infile.lines[lnum], lnum, infile.fname)
        mark_as_match(infile)
        got_match = true
        nmatches += 1
        break if $num_matches && nmatches >= $num_matches
      end
    end
    
    if $file_names_only
      if got_match != $invert_match
        print infile.fname
        if $write_null
          print "\0"
        else
          print "\n"
        end
      end
    elsif $invert_match
      infile.write_non_matches
    elsif got_match
      infile.write_matches
    end
  end

  def mark_as_match(infile)
    infile.mark_as_match(start_position)
  end
  
  def to_s
    return inspect
  end
  
end


# Applies a regular expression against a InputFile.
class RegexpFuncObj < FuncObj

  attr_reader :re

  def initialize(re)
    @re = re
    @file = nil
    super()
  end

  def inspect
    @re.inspect
  end

  def evaluate(line, lnum, file)
    # spew self, "evaluating <<<#{line[0 .. -2]}>>>"
    if line.index(@re)
      # spew self, "matched regular expression #{@re}"
      @match_line_number = lnum
      # highlight what the regular expression matched
      if $highlight
        # must use the block form
        line.gsub!(@re) { |m| $TEXT_HIGHLIGHT + m + $RESET }
      end
      add_match(lnum)
      return true
    else
      return false
    end
  end
  
end


# Associates a pair of expressions.
class CompoundExpression < FuncObj

  def initialize(op1, op2)
    @op1, @op2 = op1, op2
    @file = nil
    super()
  end

  def reset_file(file)
    @op1.reset_file(file)
    @op2.reset_file(file)
    super
  end

  def start_position
    return @last_start
  end
  
end


# Evaluates both expressions, and is satisfied when either return true.
class OrExpression < CompoundExpression

  def evaluate(line, lnum, file)
    # spew self, "evaluating <<<#{line[0 .. -2]}>>>"

    m1 = @op1.evaluate(line, lnum, file)
    m2 = @op2.evaluate(line, lnum, file)

    if m1 or m2
      if m1
        @last_start = @op1.start_position
        @last_end   = @op1.end_position
      end
      if m2
        @last_start = @op2.start_position
        @last_end   = @op2.end_position
      end
      
      @match_line_number = lnum
      add_match(lnum)
      return true
    else
      return false
    end
  end
  
  def inspect
    "(" + @op1.to_s + " or " + @op2.to_s + ")"
  end

  def end_position
    return @last_end
  end

end


# Evaluates both expressions, and is satisfied when both return true.
class AndExpression < CompoundExpression
  
  def initialize(dist, op1, op2)
    @dist = dist
    super(op1, op2)
  end

  def mark_as_match(infile)
    infile.mark_as_match(start_position, end_position)
  end

  def match_within_distance(op, lnum)
    op.matches.size > 0 and (op.matches[-1] - lnum <= @dist)
  end

  def inspect
    str = "("+ @op1.to_s
    if @dist == 0
      str += " same line as "
    else 
      str += " within " + @dist.to_s + " lines of "
    end
    str += @op2.to_s + ")"
    str
  end

  def reset_match(op, lnum)
    op.matches.reverse.each do |m|
      if lnum - m <= @dist
        @last_start = m
        return true
      else
        return false
      end
    end
    return true
  end

  def match?(line, lnum, file)
    m1 = @op1.evaluate(line, lnum, file)
    m2 = @op2.evaluate(line, lnum, file)

    if m1 and match_within_distance(@op2, lnum)
      return reset_match(@op2, lnum)
    elsif m2 and match_within_distance(@op1, lnum)
      return reset_match(@op1, lnum)
    else
      return false
    end
  end

  def end_position
    [@op1.end_position, @op2.end_position].max
  end

  def evaluate(line, lnum, file)
    # spew self, "evaluating line #{lnum}: #{line[0 .. -2]}"

    if match?(line, lnum, file)
      @match_line_number = lnum
      return true
    else
      return false
    end
  end
end


# The main processor.
class Glark 
  def initialize(func)
    @func = func
  end

  def search_file(fname, lines)
    puts "searching #{fname} for #{@func}" if $verbose
    if $grep_output
      p = GrepFormat.new(fname, lines)
    else
      p = GlarkFormat.new(fname, lines)
    end
    @func.process(p)
  end

  def search(name)
    puts "searching #{name} for #{@func}" if $verbose

    if name == "-" 
      $stderr.print "reading standard input...\n" unless $quiet
      search_file(name, $stdin.readlines)
    elsif not File.exists?(name)
      $stderr.print "WARNING: no such file: #{name}\n" unless $quiet
    elsif not File.stat(name).file?
      if File.stat(name).directory?
        case $directory
        when "read"
          $stderr.print "glark: #{name}: Is a directory\n" unless $quiet
        when "recurse"
          entries = Dir.entries(name).reject { |x| x == "." || x == ".." }
          entries.each do |e|
            search(name + "/" + e)
          end
        when "skip"
          puts "skipping directory #{name}" if $verbose
        end
      end
    elsif !File.readable?(name)
      $stderr.print "file not readable: #{name}\n" unless $quiet
    elsif FileTester.text?(name)
      search_file(name, IO.readlines(name))
    else
      $stderr.print "not a text file: #{name}\n" if $verbose
    end
  end
end


class ExpressionCreator

  attr_reader :expr

  def initialize(arg, args)
    @current = arg
    @args    = args
    @expr    = create_expression
  end

  def create_regular_expression
    pat = @current

    # this check is because they may have omitted the pattern, e.g.:
    #   % glark *.cpp
    if File.exists?(pat) and !$quiet
      $stderr.print "WARNING: pattern '#{pat}' exists as a file.\n"
      $stderr.print "    Pattern may have been omitted.\n"
    end

    ignorecase = $nocase
    
    # we handle a ridiculous number of possibilities here:
    #     /foobar/     -- "foobar"
    #     /foo/bar/    -- "foo", then slash, then "bar"
    #     /foo\/bar/   -- same as above
    #     /foo/bar/i   -- same as above, case insensitive
    #     /foo/bari    -- "/foo/bari" exactly
    #     /foo/bar\/i  -- "/foo/bar/i" exactly
    #     foo/bar/     -- "foo/bar/" exactly
    #     foo/bar/     -- "foo/bar/" exactly

    if pat.index(/^\/(.*[^\\])\/i$/)
      pattern    = $1
      ignorecase = true
    elsif pat.index(/^\/(.*[^\\])\/$/)
      pattern    = $1
    elsif pat.index(/^(\/.*)$/)
      pattern    = $1
    elsif pat.index(/^(.*\/)$/)
      pattern    = $1
    else
      pattern    = pat
    end
    
    if $whole_words
      unless pattern =~ /^(\w|\\w).*(\w|\\w)$/

        # Yes, this is right: we're looking for either "\w", literally, or \w,
        # as a regular expression. In the former case, "\\w" is what is sought.
        # And we want this at the beginning and end of the expression.

        $stderr.print "WARNING: pattern '#{pattern}' does not begin and end on a word boundary.\n"
      end
      pattern = '\b' + pattern + '\b'
    elsif $whole_lines
      pattern = '^'  + pattern + '$'
    end
    
    # spew "pattern", pattern
    # spew "ignorecase", ignorecase
    
    if ignorecase
      regex = Regexp.new(pattern, Regexp::IGNORECASE)
    else
      regex = Regexp.new(pattern)
    end
    
    return RegexpFuncObj.new(regex)
  end 

  # creates two expressions and returns them.
  def create_expressions
    @current = @args.shift
    a1 = create_expression

    @current = @args.shift
    a2 = create_expression
    
    [ a1, a2 ]
  end

  def consume_or_expression
    a1, a2 = create_expressions
    unless a1 && a2
      $stderr.print "ERROR: 'or' expression takes two arguments\n"
      exit 2
    end

    # explicit end tag is optional:
    @args.shift if @args[0] == "--end-of-or"
    OrExpression.new(a1, a2)
  end

  def consume_and_expression
    dist = @args.shift
    # check to ensure that this is numeric
    if !dist || (dist.to_i != $INFINITE_DISTANCE && !dist.index(/^\d+$/))
      $stderr.print "ERROR: invalid distance for 'and' expression: '#{dist}'\n" 
      $stderr.print "    expecting an integer, or #{$INFINITE_DISTANCE} for 'infinite'\n" 
      exit 2
    end

    if dist.to_i == $INFINITE_DISTANCE
      dist = 1.0 / 0.0            # infinity
    else
      dist = dist.to_i
    end

    a1, a2 = create_expressions
    unless a1 && a2
      $stderr.print "ERROR: 'and' expression takes two arguments\n"
      exit 2
    end
    # explicit end tag is optional:
    @args.shift if @args[0] == "--end-of-and"
    AndExpression.new(dist, a1, a2)
  end

  def create_expression
    if @current
      spew "processing arg", @current
      case @current
      when "--or", "-o"
        return consume_or_expression
      when "--and", "-a"
        return consume_and_expression
      when /^--/
        $stderr.print "option not understood: #{@current}"
        exit 2
      else
        $stderr.print "assuming the last argument #{@current} is a pattern\n" if $verbose
        return create_regular_expression
      end
    else
      return nil
    end
  end

end

# sets output a la grep
def set_grep_output
  $highlight         = false
  $show_line_numbers = true
  $after             = 0
  $before            = 0
  $grep_output       = true
end

def show_help

  puts "USAGE"
  puts "    glark [options] expression file..."
  puts ""
  puts "OPTIONS"
  puts "    -?, --help"
  puts "        Display this help message."
  puts ""
  puts "    -A NUM, --after-context=NUM"
  puts "        Print NUM lines after a matched expression."
  puts ""
  puts "    -b NUM[%], --before NUM[%]"
  puts "        Restrict the search to before the given location, which represents"
  puts "        either the number of the last line within the valid range, or the"
  puts "        percentage of lines to be searched."
  puts ""
  puts "    -B NUM, --before-context=NUM"
  puts "        Print NUM lines before a matched expression."
  puts ""
  puts "    -C [NUM], -NUM, --context[=NUM]"
  puts "        Output <NUM> lines of context around a matched expression. The"
  puts "        default is no context. If no NUM is given for this option, the"
  puts "        number of lines of context is 2."
  puts ""
  puts "    -c, --count"
  puts "        Instead of normal output, display only the number of matches in"
  puts "        each file. Because of multi-line matches, the -v (--invert-match)"
  puts "        option would produce misleading output with this option."
  puts ""
  puts "    -d ACTION, --directories=ACTION"
  puts "        Directories are processed according to the given ACTION, which by"
  puts "        default is 'read'. If ACTION is 'recurse', each file in the"
  puts "        directory is read and each subdirectory is recursed into"
  puts "        (equivalent to the -r option). If ACTION is 'skip', directories are"
  puts "        not read, and no message is produced."
  puts ""
  puts "    -f NUM[%], --after NUM[%]"
  puts "        Restrict the search to after the given location, which represents"
  puts "        either the number of the first line within the valid range, or the"
  puts "        percentage of lines to be skipped."
  puts ""
  puts "    -F, --file-color COLOR"
  puts "        Specify the highlight color for file names."
  puts ""
  puts "    -g, --grep"
  puts "        Produce output like the grep default: file names and line numbers,"
  puts "        and a single line of the match, which will be the first line for."
  puts "        matches that span multiple lines."
  puts ""
  puts "    -h, --no-filename"
  puts "        Do not display the names of the files that matched."
  puts ""
  puts "    -H, --with-filename"
  puts "        Display the names of the files that matched. This is the default"
  puts "        behavior."
  puts ""
  puts "    -i, --ignore-case"
  puts "        Match regular expressions without regard to case. The default is"
  puts "        case sensitive."
  puts ""
  puts "    -l, --files-with-matches"
  puts "        Print only the names of the file that matched the expression."
  puts ""
  puts "    -L, --files-without-match"
  puts "        Print only the names of the file that did not match the expression."
  puts ""
  puts "    -m NUM, --match-limit NUM"
  puts "        Find only the first NUM matches in each file."
  puts ""
  puts "    -M, --exclude-matching-filenames"
  puts "        Do not search files whose names match the given expression."
  puts ""
  puts "    -n, --line-number"
  puts "        Display the line numbers. This is the default behavior."
  puts ""
  puts "    -N, --no-line-number"
  puts "        Do not display the line numbers."
  puts ""
  puts "    -q, -s, --quiet, --no-messages"
  puts "        Suppress warnings."
  puts ""
  puts "    -Q, --no-quiet"
  puts "        Enable warnings. This is the default."
  puts ""
  puts "    -r, --recurse"
  puts "        Recurse through directories. Equivalent to --directories=read."
  puts ""
  puts "    -R, --range NUM[%] NUM[%]"
  puts "        Restrict the search to the given range of lines."
  puts ""
  puts "    -T, --text-color COLOR"
  puts "        Specify the highlight color for text."
  puts ""
  puts "    -u, --highlight, -hl"
  puts "        Turn highlighting on, which uses ANSI escape sequences. This is the"
  puts "        default behavior. The '-hl' option is deprecated."
  puts ""
  puts "    -U, --no-highlight, -nohl"
  puts "        Turn highlighting off. The '-nohl' option is deprecated."
  puts ""
  puts "    -v, --invert-match"
  puts "        Show lines that do not match the expression."
  puts ""
  puts "    -V, --version"
  puts "        Display version information."
  puts ""
  puts "    -w, --word, --word-regexp"
  puts "        Put word boundaries around each pattern, thus matching only where"
  puts "        the full word(s) occur in the text. Thus, glark -w Foo is the same"
  puts "        as glark '/\bFoo\b/'."
  puts ""
  puts "    -W, --no-word"
  puts "        Do not put word boundaries around each pattern. This is the default."
  puts ""
  puts "    -x, --line-regexp"
  puts "        Select only where the entire line matches the pattern(s)."
  puts ""
  puts "    -X, --no-line-regexp"
  puts "        Do not select only where the entire line matches the pattern(s). This"
  puts "        is the default behavior."
  puts ""
  puts "    -Z, --null"
  puts "        When in -l mode, write file names followed by the ASCII NUL character ('\\0')"
  puts "        instead of '\\n'."
  puts ""
  puts "    -_, --verbose"
  puts "        Display normally suppressed output, for debugging purposes."
  puts ""
  puts "EXPRESSION"
  puts "    A regular expression, in Ruby format. Run `perldoc perlre' for more"
  puts "    general information. The expression may be of either form:"
  puts ""
  puts "        something"
  puts "        /something/"
  puts ""
  puts "    All regular expression characters and options are available, such as"
  puts "    ``\w'', ``.*?'' and ``[^9]''."
  puts ""
  puts "    If the and and or options are not used, the last non-option is"
  puts "    considered to be the expression to be matched."
  puts ""
  puts "    -o expr1 expr2"
  puts "    --or expr1 expr2 --end-of-or"
  puts "        Matches either of the two expressions. The results of the two forms"
  puts "        are equivalent. In the latter syntax, the --end-of-or is optional."
  puts ""
  puts "    -a number expr1 expr2"
  puts "    --and number expr1 expr2 --end-of-and"
  puts "        Matches both of the two expressions, within <number> lines of each"
  puts "        other. As with the or option, the results of the two forms are"
  puts "        equivalent, and the --end-of-and is optional."
  puts ""
  puts "        If the number provided is -1 (negative one), the distance is"
  puts "        considered to be ``infinite'', and thus, the condition is satisfied"
  puts "        if both expressions match within the same file."
  puts ""
  puts "        If the number provided is 0 (zero), the condition is satisfied if"
  puts "        both expressions match on the same line."
  puts ""
  puts "        A warning will be issued if the value given in the number position"
  puts "        does not appear to be numeric."
  puts ""
  puts "    --explain"
  puts "        Writes the regular expression in a more legible format. Useful for"
  puts "        debugging."
  puts ""
  puts "See the man page for more information."
  puts ""
end

# creates a color for the given option, based on its value
def make_highlight(opt, value)
  if value
    return ANSIColor::code(value)
  else
    $stderr.print "ERROR: " + opt + " requires a color\n"
    exit 2
  end
end

# returns whether the value matches a true value, such as "yes", "true", or "on".
def to_boolean(value)
  [ "yes", "true", "on" ].include?(value.downcase)
end

# the expression to be evaluated
expr = nil

def process_option(opt, args)
  # spew "processing option", opt
  case opt

    # after (context)
  when "-A"
    $after = args.shift.to_i
  when /^--after-context=(\d+)/
    $after = $1.to_i

    # before (context)
  when "-B"
    $before = args.shift.to_i
  when /^--before-context=(\d+)/
    $before = $1.to_i

    # after (range)
  when "-f", "--after"
    $range_start = args.shift

    # before (range)
  when "-b", "--before"
    $range_end = args.shift

    # range
  when "-R", "--range"
    $range_start, $range_end = args.shift, args.shift

    # context
  when "-C"
    nxt = args.shift
    # keep it if it is a number, else use the default
    if nxt =~ /^\d+/
      $before = $after = nxt.to_i
    else
      $before = $after = 2
      args.unshift(nxt)
    end
  when /^--context(=(\d+))?/
    $after = $before = if $2 then $2.to_i else 2 end
  when /^-(\d+)$/
    $after = $before = $1.to_i

    # highlighting
  when "-u", "--highlight", "-hl"
    $highlight = true
  when "-U", "--no-highlight", "-nohl"
    $highlight = false
    
    # version
  when "-V", "--version"
    print $PACKAGE, ", version ", $VERSION, "\n"
    print "Written by Jeff Pace (jpace@glark.org).\n"
    print "Released under the Lesser GNU Public License.\n"
    exit 1

    # verbose
  when "-_", "--verbose"
    $verbose = true
  when "-v", "--invert-match"
    $invert_match = true
    $exit_status  = 0
  when "-i", "--ignore-case"
    $nocase = true

    # grep
  when "-g", "--grep"
    set_grep_output

    # help
  when "-?", "--help"
    show_help
    exit 1

    # regexp explanation
  when "-e", "--explain"
    $explain = true

    # line numbers
  when "-N", "--no-line-number"
    $show_line_numbers = false
  when "-n", "--line-number"
    $show_line_numbers = true

    # quiet
  when "-q", "-s", "--quiet", "--messages"
    $quiet = true
  when "-Q", "-S", "--no-quiet", "--no-messages"
    $quiet = false

  when "-m", "--match-limit"
    $num_matches = args.shift.to_i
    
    # whole words
  when "-w", "--word", "--word-regexp"
    $whole_words = true
  when "-W", "--no-word", "--no-word-regexp"
    $whole_words = false

    # whole lines
  when "-x", "--line-regexp"
    $whole_lines = true
  when "-X", "--no-line-regexp"
    $whole_lines = false

    # file names only
  when "-l", "--files-with-matches"
    $file_names_only = true
    $invert_match = false
  when  "-L", "--files-without-match"
    $file_names_only = true
    $invert_match = true

    # colors
  when "-T", "--text-color"
    $TEXT_HIGHLIGHT = make_highlight(opt, args.shift)
  when "-F", "--file-color"
    $FILE_HIGHLIGHT = make_highlight(opt, args.shift)

  when "-c", "--count"
    $count = true

  when "-Z", "--null"
    $write_null = true

  when "-M", "--exclude-matching-filenames"
    $exclude_matching_files = true
    
  when "-d"
    $directory = args.shift
  when /^--directories=(\w+)/
    $directory = $1

  when "-r", "--recurse"
    $directory = "recurse"

  when "--or", "-o", "--and", "-a"
    ec = ExpressionCreator.new(opt, args)
    expr = ec.expr
    return expr                 # we are done.
    
  when /(\-\w)(.*)/
    opt, rest = $1, "-" + $2
    puts "opt, rest = #{opt}, #{rest}" if $verbose
    args.unshift(rest)
    puts "args = #{args}" if $verbose
    return process_option(opt, args)
    
    # the expression
  else
    if args
      ec = ExpressionCreator.new(opt, args)
      expr = ec.expr
      return expr               # we are done.
    end
  end
  return nil                    # we're not done.
end

# process the rc file
if ENV["HOME"]
  grc = ENV["HOME"] + "/.glarkrc"
  if File.exists?(grc)
    IO.readlines(grc).each do |line|
      line.sub!(/\s*#.*/, "")
      line.chomp!
      name, value = line.split(/\s*[=:]\s*/)
      next unless name && value

      case name
      when "after-context"
        $after = value.to_i
      when "before-context"
        $before = value.to_i
      when "context"
        $after = $before = value.to_i
      when "file-color"
        $FILE_HIGHLIGHT = make_highlight(name, value)
      when "grep"
        set_grep_output if to_boolean(value)
      when "highlight"
        $highlight = to_boolean(value)
      when "ignore-case"
        $nocase = to_boolean(value)
      when "quiet"
        $quiet = to_boolean(value)
      when "text-color"
        $TEXT_HIGHLIGHT = make_highlight(name, value)
      when "verbose"
        $verbose = to_boolean(value)
      end

    end
  end
end

# process the environment variable
if ENV["GLARKOPTS"]
  options = ENV["GLARKOPTS"].split(/\s+/)
  while options.length > 0
    opt = options.shift
    process_option(opt, options)
  end
end

# honor the EMACS environment variable; go to grep mode
if ENV["EMACS"]
  set_grep_output
end

nargs = ARGV.size

while ARGV.length > 0
  arg = ARGV.shift
  break if expr = process_option(arg, ARGV)
end

unless expr
  # were any options processed?
  if nargs > 0
    $stderr.print "No expression provided.\n"
  end
  $stderr.print "Usage: glark [options] expression file...\n"
  $stderr.print "Try `glark --help' for more information.\n"
  exit 1
end

unless expr
  $stderr.print "Usage: glark [options] expression file...\n"
  $stderr.print "Try `glark --help' for more information.\n"
  exit 1
end

# sanity check for options
if $range_start && $range_end
  pctre = Regexp.new(/([\.\d]+)%/)
  smd = pctre.match($range_start)
  emd = pctre.match($range_end)
  if !smd == !emd
    if smd
      if smd[1].to_f > emd[1].to_f
        puts "ERROR: range start (#{smd}) follows range end (#{emd})"
        exit 2
      end
    else
      if $range_start.to_i > $range_end.to_i
        puts "ERROR: range start (#{$range_start}) follows range end (#{$range_end})"
        exit 2
      end
    end
  end
end

# To get rid of the annoying stack trace on ctrl-C:
trap("INT") { puts "#{$0}: interrupted"; abort }

puts expr if $explain

glark = Glark.new(expr)
files = if ARGV.size > 0 then ARGV else [ '-' ] end

files.each do |f|
  if $exclude_matching_files
    if expr.evaluate(f, 0, 0)
      spew f, "skipping because name matched"
      next
    else
      spew f, "not skipping"
    end
  end
  glark.search(f) 
end
