#!/bin/sh
exec ruby -w -x $0 ${1+"$@"} # -*- ruby -*-
#!ruby -w

# An extended grep, with extended functionality including full regular
# expressions, contextual output, highlighting, detection and exclusion of
# nontext files, and complex matching criteria.

# $Id: glarkrun.rb,v 1.4 2003/05/05 13:43:54 jeugenepace Exp $

require "English"

$stdout.sync = true             # unbuffer
$stderr.sync = true             # unbuffer

$PACKAGE = "glark"
$VERSION = "1.6.4"



# Very minimal logging output. If $verbose is set, this displays the method and
# line number whence called.

class Log
  
  @@verbose = false
  @@width = 0
  @@output = $stdout
  @@fmt = "[%s:%04d] {%s} %s\n"
  @@align = false

  def Log.verbose=(v)
    @@verbose = v
  end

  def Log.output=(fname)
    @@output = File.new(fname, "w")
  end

  def Log.set_widths(file_width, line_width, func_width)
    @@fmt = "[%#{file_width}s:%#{line_width}d] {%#{func_width}s} %s\n"
  end

  def Log.align
    @@align = true
  end
  
  def Log.log(msg, level = 0)
    if @@verbose
      c = caller(1)[0]
      c.index(/(.*):(\d+)(?::in \`(.*)\')?/)
      file, line, func = $1, $2, $3
      file.sub!(/.*\//, "")
      if @@align
        @@width = [ @@width, func.length ].max
        @@output.printf "[%s:%04d] {%-*s} %s\n", file, line, @@width, func, msg
      else
        @@output.printf @@fmt, file, line, func, msg
      end
    end
  end

  # Plain old unformatted output, going to standard error.
  def Log.print(msg, level = 0)
    if @@verbose
      $stderr.print msg
    end
  end

end

# A thing that can be grepped.
class InputFile

  attr_reader :lines, :fname, :stati, :count
  
  def initialize(fname, lines)
    @fname = fname
    @lines = lines
    @stati = Array.new          # index = line number, value = context character
    if $options.count
      @count = 0
    end
    @output = $options.grep_output ? GrepOutputFormat.new(self) : GlarkOutputFormat.new(self)
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    if lnum < lines.size
      if $options.show_line_numbers
        printf "%5d ", lnum + 1
      end
      if ch && has_context
        printf "%s ", ch
      end
      print lines[lnum]
    else
      Log.log "line number #{lnum} exceeds line count #{lines.size}"
    end
  end

  def has_context
    $options.after != 0 || $options.before != 0
  end

  def set_status(from, to, ch, force = false)
    from.upto(to) do |ln|
      if force or not @stati[ln]
        @stati[ln] = ch
      end
    end
  end

  def mark_as_match(start_line, end_line = start_line)
    if $options.grep_output
      end_line = start_line
    end

    $options.exit_status = $options.invert_match ? 1 : 0

    if $options.count
      @count += 1
    else
      st = [0, start_line - $options.before].max
      set_status(st,           start_line - 1,    "-")
      set_status(start_line,   end_line,          ":",  true)
      set_status(end_line + 1, end_line + $options.after, "+")
    end
  end

  def write_matches
    @output.write_matches
  end

  def write_non_matches
    @output.write_non_matches
  end

end


class OutputFormat

  def initialize(infile)
    @infile = infile
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    if lnum < @infile.lines.size
      if $options.show_line_numbers
        printf "%5d ", lnum + 1
      end
      if ch && has_context
        printf "%s ", ch
      end
      print @infile.lines[lnum]
    end
  end

  def has_context
    $options.after != 0 || $options.before != 0
  end

  def write_matches
    if $options.count
      write_count
    else
      lastln = nil
      0.upto(@infile.stati.size) do |ln|
        if @infile.stati[ln]
          print_line(ln, @infile.stati[ln]) 
          lastln = ln
        end
      end
    end
  end

  def write_non_matches
    if $options.count
      write_non_count
    else
      (0 ... @infile.lines.length).each do |ln|
        unless @infile.stati[ln] && @infile.stati[ln] == ":"
          print_line(ln) 
        end
      end
    end
  end

end


class GlarkOutputFormat < OutputFormat

  def show_file_header
    if $options.show_file_names
      print $options.file_highlight if $options.highlight
      print @infile.fname, ":"
      print ANSIColor.reset if $options.highlight
      print "\n"
    end
  end

  def write_count
    puts "    " + @infile.count.to_s
  end

  def write_non_count
    puts "    " + (@infile.lines.size - @infile.count).to_s
  end

  def write_matches
    show_file_header if ARGV.size > 1
    super
  end

  def write_non_matches
    show_file_header if ARGV.size > 1
    super
  end

end


# This matches grep, mostly. It is for running within emacs, thus,
# it does not support context or highlighting.
class GrepOutputFormat < OutputFormat

  def initialize(infile)
    @show_file_name = ARGV.size > 1 && $options.show_file_names
    super
  end

  def write_count
    print @infile.fname, ":" if @show_file_name
    puts @infile.count
  end

  def write_non_count
    print @infile.fname, ":" if @show_file_name
    puts @infile.lines.length - @infile.count
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    print @infile.fname, ":" if @show_file_name
    if $options.show_line_numbers
      printf "%d: ", lnum + 1
    end
    print @infile.lines[lnum]
  end

end


class BinaryFile < InputFile

  def write_matches
    if $options.count
      write_count
    else
      puts "Binary file " + @fname + " matches"
    end
  end

  def write_non_matches
    if $options.count
      write_non_count
    else
      puts "Binary file " + @fname + " matches"
    end
  end

end



class FileTester 

  BINARY = "binary"
  DIRECTORY = "directory"
  NONE = "none"
  TEXT = "text"
  UNKNOWN = "unknown"
  UNREADABLE = "unreadable"

  # the percentage of characters that we allow to be odd in a text file
  @@ODD_FACTOR = 0.3

  # how many bytes (characters) of a file we test
  @@TEST_LENGTH = 1024

  # extensions associated with files that are always text:
  @@KNOWN_TEXT = %w{ txt c cpp mk h hpp html java }

  # extensions associated with files that are never text:
  @@KNOWN_NONTEXT = %w{ a o obj class elc gif gz jar jpg jpeg png pdf tar Z }

  def FileTester.ascii?(c)
    # from ctype.h
    return (c.to_i & ~0x7f) == 0
  end

  def FileTester.type(file)
    if File.exists?(file)
      if File.stat(file).file?
        if File.readable?(file)
          if FileTester.text?(file)
            TEXT
          else
            BINARY
          end
        else
          UNREADABLE
        end
      elsif File.stat(file).directory?
        DIRECTORY
      else
        UNKNOWN
      end
    else
      NONE
    end
  end

  def FileTester.text?(file)
    # Don't waste our time if it doesn't even exist:
    return false unless File.exists?(file)
    
    if file.index(/\.(\w+)\s*$/)
      suffix = $1
      return true  if @@KNOWN_TEXT.include?(suffix)
      return false if @@KNOWN_NONTEXT.include?(suffix)
    end
    
    ntested = 0
    nodd = 0
    f = File.new(file)
    f.each do |line|

      # split returns strings, whereas we want characters (bytes)
      chars = line.split(//, @@TEST_LENGTH).collect { |w| w[0] }

      # using the limit parameter to split results in the last character being
      # "0" (nil), so remove it

      if chars.size > 1 and chars[-1].to_i == 0
        chars = chars[0 .. -2]
      end
      
      chars.each do |ch|
        ntested += 1

        # never allow null in a text file
        return false if ch.to_i == 0
        
        nodd += 1 unless FileTester.ascii?(ch)
        return FileTester.summary(nodd, ntested) if ntested >= @@TEST_LENGTH
      end
    end
    
    return FileTester.summary(nodd, ntested)
  end

  def FileTester.summary(nodd, ntested)
    return nodd < ntested * @@ODD_FACTOR
  end

end



# The main processor.
class Glark 
  def initialize(func)
    @func = func
  end

  def search_file(fname, lines)
    Log.log "searching #{fname} for #{@func}" if $options.verbose
    p = InputFile.new(fname, lines)
    @func.process(p)
  end

  def search_binary_file(fname, lines)
    Log.log "searching binary file #{fname} for #{@func}" if $options.verbose
    bf = BinaryFile.new(fname, lines)
    @func.process(bf)
  end
  
  def search(name)
    Log.log "searching #{name} for #{@func}"

    if name == "-" 
      Log.log "reading standard input..."
      $stderr.print "reading standard input...\n" unless $options.quiet
      search_file(name, $stdin.readlines)
    else
      type = FileTester.type(name)
      case type
      when FileTester::TEXT
        Log.log "searching text", 2
        search_file(name, IO.readlines(name))
      when FileTester::BINARY
        Log.log "handling binary", 1
        # Log.print "not a text file: #{name}\n"
        
        case $options.binary_files
        when "without-match"
          Log.log "skipping binary file #{name}"
          
        when "binary"
          search_binary_file(name, IO.readlines(name))
          
        when "text"
          Log.log "processing binary file #{name} as text"
          search_file(name, IO.readlines(name))
        end
        
      when FileTester::UNREADABLE
        Log.log "skipping unreadable", 2
        $stderr.print "file not readable: #{name}\n" unless $options.quiet
      when FileTester::NONE
        Log.log "skipping none", 2
        $stderr.print "WARNING: no such file: #{name}\n" unless $options.quiet
      when FileTester::UNKNOWN
        Log.log "skipping unknown", 2
        $stderr.print "WARNING: unknown file type: #{name}\n" unless $options.quiet
      when FileTester::DIRECTORY
        Log.log "processing directory"
        case $options.directory
        when "read"
          $stderr.print "glark: #{name}: Is a directory\n" unless $options.quiet
        when "recurse"
          entries = Dir.entries(name).reject { |x| x == "." || x == ".." }
          entries.each do |e|
            search(name + "/" + e)
          end
        when "skip"
          Log.print "skipping directory #{name}"
        end
      else
        print "unknown type #{type}"
      end
    end
  end
end



# Returns the home directory, for both Unix and Windows.

module Env

  def Env.home_directory
    if hm = ENV["HOME"]
      return hm
    else
      hd = ENV["HOMEDRIVE"]
      hp = ENV["HOMEPATH"]
      if hd || hp
        return (hd || "") + (hp || "\\")
      else
        return nil
      end
    end
  end

end

#-------------------------------------------------------
# ANSIColor module
#-------------------------------------------------------

module ANSIColor

  # Adds the escape sequence for value retrieval.
  class ANSIEscapedColors < Hash
    def [](str)
      val = super
      if val
        val = "\e[" + val + "m"
      end
      val
    end
  end

  @@ATTRIBUTE_COLORS = ANSIEscapedColors[
    'none'       => '00', 
    'bold'       => '01',
    'underscore' => '04',
    'underline'  => '04',
    'blink'      => '05',
    'reverse'    => '07',
    'concealed'  => '08'
  ]

  @@TEXT_COLORS = ANSIEscapedColors[
    'black'   => '30',
    'red'     => '31',
    'green'   => '32',
    'yellow'  => '33',
    'blue'    => '34',
    'magenta' => '35',
    'cyan'    => '36',
    'white'   => '37'
  ]
  
  @@BACKGROUND_COLORS = ANSIEscapedColors[
    'black'   => '40',
    'red'     => '41',
    'green'   => '42',
    'yellow'  => '43',
    'blue'    => '44',
    'magenta' => '45',
    'cyan'    => '46',
    'white'   => '47'
  ]

  @@RESET = "\e[0m"

  # returns the code for the given color string, which is in the format:
  # [foreground] on [background]. Note that the foreground and background sections
  # can have modifiers (attributes). Examples:
  #     black
  #     blue on white
  #     bold green on yellow
  #     underscore bold magenta on cyan
  #     underscore red on bold cyan

  def ANSIColor.code(str)
    fg, bg = str.split(/\s*\bon_?\s*/)
    foreground(fg) + background(bg)
  end

  # returns the code for the given background color(s)
  def ANSIColor.background(bgcolor)
    make_code(bgcolor, [ @@BACKGROUND_COLORS ])
  end

  # returns the code for the given foreground color(s)
  def ANSIColor.foreground(fgcolor)
    make_code(fgcolor, [ @@ATTRIBUTE_COLORS, @@TEXT_COLORS ])
  end

  def ANSIColor.reset
    @@RESET
  end

  protected

  def ANSIColor.make_code(str, colors)
    code = ""
    return code unless str
    str.split.each do |s|
      found = false
      colors.each do |ca|
        if c = ca[s]
          code += c 
          found = true
          break
        end
      end
      unless found
        $stderr.puts "WARNING: ANSIColor::make_code(" + str + "): unknown color: " + s
        break
      end
    end
    code
  end

end



# A function object, which can be applied (processed) against a InputFile.
class FuncObj
  
  attr_accessor :match_line_number, :file, :matches

  def initialize
    @match_line_number = nil
    @matches = Array.new
  end

  def add_match(lnum)
    @matches.push(lnum)
  end

  def start_position
    match_line_number
  end

  def end_position
    start_position
  end

  def reset_file(file)
    @match_line_number = nil
    @file              = file
    @matches           = Array.new
  end

  def range(var, count)
    if var
      if var.index(/([\.\d]+)%/)
        count * $1.to_f / 100
      else
        var.to_f
      end
    else
      nil
    end
  end

  def process(infile)
    got_match = false
    reset_file(infile.fname)

    rgstart = range($options.range_start, infile.lines.size)
    rgend = range($options.range_end, infile.lines.size)

    # puts "rgstart = #{rgstart}"
    # puts "rgend = #{rgend}"

    nmatches = 0
    (0 ... infile.lines.size).each do |lnum|
      #puts "start #{lnum} >= #{rgstart}: #{lnum >= rgstart}"
      #puts "end #{lnum} <= #{rgend}: #{lnum <= rgend}"

      # puts "running at lnum #{lnum}"

      if (!rgstart || lnum >= rgstart) && 
          (!rgend || lnum <= rgend) &&
          evaluate(infile.lines[lnum], lnum, infile.fname)
        mark_as_match(infile)
        got_match = true
        nmatches += 1
        break if $options.num_matches && nmatches >= $options.num_matches
      end
    end
    
    if $options.file_names_only
      if got_match != $options.invert_match
        print infile.fname
        if $options.write_null
          print "\0"
        else
          print "\n"
        end
      end
    elsif $options.invert_match
      infile.write_non_matches
    elsif got_match
      infile.write_matches
    end
  end

  def mark_as_match(infile)
    infile.mark_as_match(start_position)
  end
  
  def to_s
    return inspect
  end
  
end


# Applies a regular expression against a InputFile.
class RegexpFuncObj < FuncObj

  attr_reader :re

  def initialize(re)
    @re = re
    @file = nil
    super()
  end

  def inspect
    @re.inspect
  end

  def evaluate(line, lnum, file)
    # spew self, "evaluating <<<#{line[0 .. -2]}>>>"
    if md = @re.match(line)
      # spew self, "matched regular expression #{@re}"

      if $options.extract_matches
        Log.log "replacing line"
        line.replace(md[-1] + "\n")
        # line.gsub!(@re) { |m| Log.log "replacing with #{m}"; m }
      else
        Log.log "NOT replacing line"
      end
      
      @match_line_number = lnum
      # highlight what the regular expression matched
      if $options.highlight
        # must use the block form
        line.gsub!(@re) { |m| $options.text_highlight + m + ANSIColor.reset }
      end
      add_match(lnum)
      return true
    else
      return false
    end
  end
  
end


# Associates a pair of expressions.
class CompoundExpression < FuncObj

  def initialize(op1, op2)
    @op1, @op2 = op1, op2
    @file = nil
    super()
  end

  def reset_file(file)
    @op1.reset_file(file)
    @op2.reset_file(file)
    super
  end

  def start_position
    return @last_start
  end
  
end


# Evaluates both expressions, and is satisfied when either return true.
class OrExpression < CompoundExpression

  def evaluate(line, lnum, file)
    # spew self, "evaluating <<<#{line[0 .. -2]}>>>"

    m1 = @op1.evaluate(line, lnum, file)
    m2 = @op2.evaluate(line, lnum, file)

    if m1 or m2
      if m1
        @last_start = @op1.start_position
        @last_end   = @op1.end_position
      end
      if m2
        @last_start = @op2.start_position
        @last_end   = @op2.end_position
      end
      
      @match_line_number = lnum
      add_match(lnum)
      return true
    else
      return false
    end
  end
  
  def inspect
    "(" + @op1.to_s + " or " + @op2.to_s + ")"
  end

  def end_position
    return @last_end
  end

end


# Evaluates both expressions, and is satisfied when both return true.
class AndExpression < CompoundExpression
  
  def initialize(dist, op1, op2)
    @dist = dist
    super(op1, op2)
  end

  def mark_as_match(infile)
    infile.mark_as_match(start_position, end_position)
  end

  def match_within_distance(op, lnum)
    op.matches.size > 0 and (op.matches[-1] - lnum <= @dist)
  end

  def inspect
    str = "("+ @op1.to_s
    if @dist == 0
      str += " same line as "
    elsif @dist == $options.infinite_distance
      str += " same file as "
    else 
      str += " within " + @dist.to_s + " lines of "
    end
    str += @op2.to_s + ")"
    str
  end

  def reset_match(op, lnum)
    op.matches.reverse.each do |m|
      if lnum - m <= @dist
        @last_start = m
        return true
      else
        return false
      end
    end
    return true
  end

  def match?(line, lnum, file)
    m1 = @op1.evaluate(line, lnum, file)
    m2 = @op2.evaluate(line, lnum, file)

    if m1 and match_within_distance(@op2, lnum)
      return reset_match(@op2, lnum)
    elsif m2 and match_within_distance(@op1, lnum)
      return reset_match(@op1, lnum)
    else
      return false
    end
  end

  def end_position
    [@op1.end_position, @op2.end_position].max
  end

  def evaluate(line, lnum, file)
    # spew self, "evaluating line #{lnum}: #{line[0 .. -2]}"

    if match?(line, lnum, file)
      @match_line_number = lnum
      return true
    else
      return false
    end
  end
end


class ExpressionCreator

  attr_reader :expr

  def initialize(arg, args)
    @current = arg
    @args    = args
    @expr    = create_expression
  end

  def create_regular_expression
    pat = @current

    # this check is because they may have omitted the pattern, e.g.:
    #   % glark *.cpp
    if File.exists?(pat) and !$options.quiet
      $stderr.print "WARNING: pattern '#{pat}' exists as a file.\n"
      $stderr.print "    Pattern may have been omitted.\n"
    end

    ignorecase = $options.nocase
    
    # we handle a ridiculous number of possibilities here:
    #     /foobar/     -- "foobar"
    #     /foo/bar/    -- "foo", then slash, then "bar"
    #     /foo\/bar/   -- same as above
    #     /foo/bar/i   -- same as above, case insensitive
    #     /foo/bari    -- "/foo/bari" exactly
    #     /foo/bar\/i  -- "/foo/bar/i" exactly
    #     foo/bar/     -- "foo/bar/" exactly
    #     foo/bar/     -- "foo/bar/" exactly

    if pat.index(/^\/(.*[^\\])\/i$/)
      pattern    = $1
      ignorecase = true
    elsif pat.index(/^\/(.*[^\\])\/$/)
      pattern    = $1
    elsif pat.index(/^(\/.*)$/)
      pattern    = $1
    elsif pat.index(/^(.*\/)$/)
      pattern    = $1
    else
      pattern    = pat
    end
    
    if $options.whole_words
      # match "\w", A-Za-z0-9_, 
      stword = pattern.index(/^[\[\(]*(?:\\w|\w)/)

      endword = pattern[-1, 1] =~ /\w/ || pattern[-2, 2] =~ /\w\?\*/

      re = Regexp.new('(?:                 # one of the following:
                           \\w             #   - \w for regexp
                         |                 # 
                           \w              #   - a literal A-Z, a-z, 0-9, or _
                         |                 # 
                           (?:             #   - one of the following:
                               \[[^\]]*    #         LB, with no RB until:
                               (?:         #      - either of:
                                   \\w     #         - "\w"
                                 |         # 
                                   \w      #         - a literal A-Z, a-z, 0-9, or _
                               )           #      
                               [^\]]*\]    #      - anything (except RB) to the next RB
                           )               #
                       )                   #
                       (?:                 # optionally, one of the following:
                           \*              #   - "*"
                         |                 # 
                           \+              #   - "+"
                         |                 #
                           \?              #   - "?"
                         |                 #
                           \{\d*,\d*\}     #   - "{3,4}", "{,4}, "{,123}" (also matches the invalid {,})
                       )?                  #
                       $                   # the end.
                      ', 
                      Regexp::EXTENDED)
      endword = pattern.index(re)

      if stword && endword
        # good
      else
        msg = "WARNING: pattern '#{pattern}' does not "
        if stword
          msg += "end"
        elsif endword
          msg += "begin"
        else
          msg += "begin and end"
        end
        msg += " on a word boundary."
        $stderr.puts msg
      end
      pattern = '\b' + pattern + '\b'
    elsif $options.whole_lines
      pattern = '^'  + pattern + '$'
    end
    
    # spew "pattern", pattern
    # spew "ignorecase", ignorecase
    
    if ignorecase
      regex = Regexp.new(pattern, Regexp::IGNORECASE)
    else
      regex = Regexp.new(pattern)
    end
    
    return RegexpFuncObj.new(regex)
  end 

  # creates two expressions and returns them.
  def create_expressions
    @current = @args.shift
    a1 = create_expression

    @current = @args.shift
    a2 = create_expression
    
    [ a1, a2 ]
  end

  def consume_or_expression
    a1, a2 = create_expressions
    unless a1 && a2
      $stderr.print "ERROR: 'or' expression takes two arguments\n"
      exit 2
    end

    # explicit end tag is optional:
    @args.shift if @args[0] == "--end-of-or"
    OrExpression.new(a1, a2)
  end

  def consume_and_expression
    dist = @args.shift
    # check to ensure that this is numeric
    if !dist || (dist.to_i != $options.infinite_distance && !dist.index(/^\d+$/))
      $stderr.print "ERROR: invalid distance for 'and' expression: '#{dist}'\n" 
      $stderr.print "    expecting an integer, or #{$options.infinite_distance} for 'infinite'\n" 
      exit 2
    end

    if dist.to_i == $options.infinite_distance
      dist = 1.0 / 0.0            # infinity
    else
      dist = dist.to_i
    end

    a1, a2 = create_expressions
    unless a1 && a2
      $stderr.print "ERROR: 'and' expression takes two arguments\n"
      exit 2
    end
    # explicit end tag is optional:
    @args.shift if @args[0] == "--end-of-and"
    AndExpression.new(dist, a1, a2)
  end

  def create_expression
    if @current
      Log.log "processing arg #{@current}"
      case @current
      when "--or", "-o"
        return consume_or_expression
      when "--and", "-a"
        return consume_and_expression
      when /^--/
        $stderr.print "option not understood: #{@current}"
        exit 2
      else
        $stderr.print "assuming the last argument #{@current} is a pattern\n" if $options.verbose
        return create_regular_expression
      end
    else
      return nil
    end
  end

end



class GlarkHelp

  def initialize
    puts "USAGE"
    puts "    glark [options] expression file..."
    puts ""
    puts "OPTIONS"
    puts "    -?, --help"
    puts "        Display this help message."
    puts ""
    puts "    -A NUM, --after-context=NUM"
    puts "        Print NUM lines after a matched expression."
    puts ""
    puts "    -b NUM[%], --before NUM[%]"
    puts "        Restrict the search to before the given location, which represents"
    puts "        either the number of the last line within the valid range, or the"
    puts "        percentage of lines to be searched."
    puts ""
    puts "    --binary-files=TYPE"
    puts "        Specify how to treat binary files, thus overriding the default"
    puts "        behavior, which is to denote the binary files that match the"
    puts "        expression, without displaying the match."
    puts ""
    puts "    -B NUM, --before-context=NUM"
    puts "        Print NUM lines before a matched expression."
    puts ""
    puts "    -C [NUM], -NUM, --context[=NUM]"
    puts "        Output <NUM> lines of context around a matched expression. The"
    puts "        default is no context. If no NUM is given for this option, the"
    puts "        number of lines of context is 2."
    puts ""
    puts "    -c, --count"
    puts "        Instead of normal output, display only the number of matches in"
    puts "        each file. Because of multi-line matches, the -v (--invert-match)"
    puts "        option would produce misleading output with this option."
    puts ""
    puts "    -d ACTION, --directories=ACTION"
    puts "        Directories are processed according to the given ACTION, which by"
    puts "        default is 'read'. If ACTION is 'recurse', each file in the"
    puts "        directory is read and each subdirectory is recursed into"
    puts "        (equivalent to the -r option). If ACTION is 'skip', directories are"
    puts "        not read, and no message is produced."
    puts ""
    puts "    -f NUM[%], --after NUM[%]"
    puts "        Restrict the search to after the given location, which represents"
    puts "        either the number of the first line within the valid range, or the"
    puts "        percentage of lines to be skipped."
    puts ""
    puts "    -F, --file-color COLOR"
    puts "        Specify the highlight color for file names."
    puts ""
    puts "    -g, --grep"
    puts "        Produce output like the grep default: file names and line numbers,"
    puts "        and a single line of the match, which will be the first line for."
    puts "        matches that span multiple lines."
    puts ""
    puts "    -h, --no-filename"
    puts "        Do not display the names of the files that matched."
    puts ""
    puts "    -H, --with-filename"
    puts "        Display the names of the files that matched. This is the default"
    puts "        behavior."
    puts ""
    puts "    -i, --ignore-case"
    puts "        Match regular expressions without regard to case. The default is"
    puts "        case sensitive."
    puts ""
    puts "    -l, --files-with-matches"
    puts "        Print only the names of the file that matched the expression."
    puts ""
    puts "    -L, --files-without-match"
    puts "        Print only the names of the file that did not match the expression."
    puts ""
    puts "    -m NUM, --match-limit NUM"
    puts "        Find only the first NUM matches in each file."
    puts ""
    puts "    -M, --exclude-matching-filenames"
    puts "        Do not search files whose names match the given expression."
    puts ""
    puts "    -n, --line-number"
    puts "        Display the line numbers. This is the default behavior."
    puts ""
    puts "    -N, --no-line-number"
    puts "        Do not display the line numbers."
    puts ""
    puts "    -q, -s, --quiet, --no-messages"
    puts "        Suppress warnings."
    puts ""
    puts "    -Q, --no-quiet"
    puts "        Enable warnings. This is the default."
    puts ""
    puts "    -r, --recurse"
    puts "        Recurse through directories. Equivalent to --directories=read."
    puts ""
    puts "    -R, --range NUM[%] NUM[%]"
    puts "        Restrict the search to the given range of lines."
    puts ""
    puts "    -T, --text-color COLOR"
    puts "        Specify the highlight color for text."
    puts ""
    puts "    -u, --highlight, -hl"
    puts "        Turn highlighting on, which uses ANSI escape sequences. This is the"
    puts "        default behavior. The '-hl' option is deprecated."
    puts ""
    puts "    -U, --no-highlight, -nohl"
    puts "        Turn highlighting off. The '-nohl' option is deprecated."
    puts ""
    puts "    -v, --invert-match"
    puts "        Show lines that do not match the expression."
    puts ""
    puts "    -V, --version"
    puts "        Display version information."
    puts ""
    puts "    -w, --word, --word-regexp"
    puts "        Put word boundaries around each pattern, thus matching only where"
    puts "        the full word(s) occur in the text. Thus, glark -w Foo is the same"
    puts "        as glark '/\bFoo\b/'."
    puts ""
    puts "    -W, --no-word"
    puts "        Do not put word boundaries around each pattern. This is the default."
    puts ""
    puts "    -x, --line-regexp"
    puts "        Select only where the entire line matches the pattern(s)."
    puts ""
    puts "    -X, --no-line-regexp"
    puts "        Do not select only where the entire line matches the pattern(s). This"
    puts "        is the default behavior."
    puts ""
    puts "    -Z, --null"
    puts "        When in -l mode, write file names followed by the ASCII NUL character ('\\0')"
    puts "        instead of '\\n'."
    puts ""
    puts "    -_, --verbose"
    puts "        Display normally suppressed output, for debugging purposes."
    puts ""
    puts "EXPRESSION"
    puts "    A regular expression, in Ruby format. Run `perldoc perlre' for more"
    puts "    general information. The expression may be of either form:"
    puts ""
    puts "        something"
    puts "        /something/"
    puts ""
    puts "    All regular expression characters and options are available, such as"
    puts "    ``\w'', ``.*?'' and ``[^9]''."
    puts ""
    puts "    If the and and or options are not used, the last non-option is"
    puts "    considered to be the expression to be matched."
    puts ""
    puts "    -o expr1 expr2"
    puts "    --or expr1 expr2 --end-of-or"
    puts "        Matches either of the two expressions. The results of the two forms"
    puts "        are equivalent. In the latter syntax, the --end-of-or is optional."
    puts ""
    puts "    -a number expr1 expr2"
    puts "    --and number expr1 expr2 --end-of-and"
    puts "        Matches both of the two expressions, within <number> lines of each"
    puts "        other. As with the or option, the results of the two forms are"
    puts "        equivalent, and the --end-of-and is optional."
    puts ""
    puts "        If the number provided is -1 (negative one), the distance is"
    puts "        considered to be ``infinite'', and thus, the condition is satisfied"
    puts "        if both expressions match within the same file."
    puts ""
    puts "        If the number provided is 0 (zero), the condition is satisfied if"
    puts "        both expressions match on the same line."
    puts ""
    puts "        A warning will be issued if the value given in the number position"
    puts "        does not appear to be numeric."
    puts ""
    puts "    --explain"
    puts "        Writes the regular expression in a more legible format. Useful for"
    puts "        debugging."
    puts ""
    puts "See the man page for more information."
    puts ""
  end

end



class GlarkOptions

  attr_accessor :after
  attr_accessor :before
  attr_accessor :binary_files
  attr_accessor :count
  attr_accessor :directory
  attr_accessor :expr
  attr_accessor :exclude_matching_files
  attr_accessor :exit_status
  attr_accessor :explain
  attr_accessor :extract_matches
  attr_accessor :file_highlight
  attr_accessor :file_names_only
  attr_accessor :grep_output
  attr_accessor :highlight
  attr_accessor :infinite_distance
  attr_accessor :invert_match
  attr_accessor :nocase
  attr_accessor :num_matches
  attr_accessor :quiet
  attr_accessor :range_end
  attr_accessor :range_start
  attr_accessor :show_line_numbers
  attr_accessor :show_file_names
  attr_accessor :text_highlight
  attr_accessor :verbose
  attr_accessor :whole_lines
  attr_accessor :whole_words
  attr_accessor :write_null

  def initialize(package = "undef", version = "1.2.3.4")
    $options = self

    @after             = 0          # lines of context before the match
    @before            = 0          # lines of context after the match
    @binary_files      = "binary"   # 
    @count             = false      # just count the lines
    @directory         = "read"     # read, skip, or recurse, a la grep
    @expr              = nil    # the expression to be evaluated
    @exclude_matching_files = false # exclude files whose names match the expression
    @exit_status       = 1          # 0 == matches, 1 == no matches, 2 == error
    @explain           = false      # display a legible version of the expression
    @extract_matches   = false      # whether to show _only_ the part that matched
    @file_names_only   = false      # display only the file names
    @grep_output       = false      # emulate grep output
    @highlight         = true       # highlight matches (using ANSI codes)

    @infinite_distance = -1         # signifies no limit to the distance between
                                    # matches, i.e., anywhere within the entire file is valid.

    @invert_match      = false      # display non-matching lines
    @nocase            = false      # match case
    @num_matches       = nil        # the maximum number of matches to display per file
    @package           = package
    @quiet             = false      # minimize warnings
    @range_end         = nil        # range to stop searching; nil => the entire file
    @range_start       = nil        # range to begin searching; nil => the entire file
    @show_line_numbers = true       # display numbers of matching lines
    @show_file_names   = true       # show the names of matching files
    @verbose           = nil        # display debugging output
    @version           = version
    @whole_lines       = false      # true means patterns must match the entire line
    @whole_words       = false      # true means all patterns are '\b'ed front and back
    @write_null        = false      # in @file_names_only mode, write '\0' instead of '\n'

    # default highlighting
    @text_highlight    = ANSIColor::code("black on yellow")
    @file_highlight    = ANSIColor::code("reverse bold")
  end

  def run
    Log.log ""
    read_rcfile
    read_environment_variable

    # honor thy EMACS; go to grep mode
    set_grep_output if ENV["EMACS"]

    read_options
    validate
  end

  def read_rcfile
    Log.log ""
    # process the rc file
    if hd = Env.home_directory
      rc = hd + "/.glarkrc"
      Log.log "reading RC file: #{rc}"
      if File.exists?(rc)
        IO.readlines(rc).each do |line|
          line.sub!(/\s*#.*/, "")
          line.chomp!
          name, value = line.split(/\s*[=:]\s*/)
          next unless name && value

          case name
          when "after-context"
            @after = value.to_i
          when "before-context"
            @before = value.to_i
          when "binary-files"
            @binary_files = value
          when "context"
            @after = @before = value.to_i
          when "file-color"
            @file_highlight = make_highlight(name, value)
          when "grep"
            set_grep_output if to_boolean(value)
          when "highlight"
            @highlight = to_boolean(value)
          when "ignore-case"
            @nocase = to_boolean(value)
          when "quiet"
            @quiet = to_boolean(value)
          when "text-color"
            @text_highlight = make_highlight(name, value)
          when "verbose"
            @verbose = to_boolean(value) ? 1 : nil
            Log.verbose = @verbose
          when "verbosity"
            @verbose = value.to_i
            Log.verbose = @verbose
          end

        end
        
      end
    end
  end

  # creates a color for the given option, based on its value
  def make_highlight(opt, value)
    if value
      return ANSIColor::code(value)
    else
      $stderr.print "ERROR: " + opt + " requires a color\n"
      exit 2
    end
  end

  # returns whether the value matches a true value, such as "yes", "true", or "on".
  def to_boolean(value)
    [ "yes", "true", "on" ].include?(value.downcase)
  end

  def read_environment_variable
    # process the environment variable
    if e = ENV["GLARKOPTS"]
      options = e.split(/\s+/)
      while options.length > 0
        opt = options.shift
        process_option(opt, options)
      end
    end
  end

  # sets output a la grep
  def set_grep_output
    @highlight         = false
    @show_line_numbers = false
    @after             = 0
    @before            = 0
    @grep_output       = true
  end

  def read_options
    Log.log ""
    nargs = ARGV.size
    args = ARGV.dup

    @expr = nil

    while ARGV.length > 0
      arg = ARGV.shift
      break if @expr = process_option(arg, ARGV)
    end

    unless @expr
      # were any options processed?

      # A lone option of "-v" means version, if there was nothing else on the
      # command line. For grep compatibility, "-v" with an expression has to
      # mean an inverted match.

      if nargs == 1 && args[0] == "-v"
        show_version
      elsif nargs > 0
        $stderr.print "No expression provided.\n"
      end
      
      $stderr.print "Usage: glark [options] expression file...\n"
      $stderr.print "Try `glark --help' for more information.\n"
      exit 1
    end
  end

  def process_option(opt, args)
    Log.log "processing option #{opt}"
    case opt

      # after (context)
    when "-A"
      @after = args.shift.to_i
    when /^--after-context=(\d+)/
      @after = $1.to_i

      # before (context)
    when "-B"
      @before = args.shift.to_i
    when /^--before-context=(\d+)/
      @before = $1.to_i

      # after (range)
    when "-f", "--after"
      @range_start = args.shift

      # before (range)
    when "-b", "--before"
      @range_end = args.shift

      # range
    when "-R", "--range"
      @range_start, @range_end = args.shift, args.shift

      # context
    when "-C"
      nxt = args.shift
      # keep it if it is a number, else use the default
      if nxt =~ /^\d+/
        @before = @after = nxt.to_i
      else
        @before = @after = 2
        args.unshift(nxt)
      end
    when /^--context(=(\d+))?/
      @after = @before = if $2 then $2.to_i else 2 end
    when /^-(\d+)$/
      @after = @before = $1.to_i

      # highlighting
    when "-u", "--highlight"
      @highlight = true
    when "-U", "--no-highlight"
      @highlight = false
      
      # version
    when "-V", "--version"
      show_version

      # verbose
    when "-_"
      @verbose = 1
      Log.verbose = @verbose
    when /^--verbos(?:e|ity)(?:=(\d+))?/
      @verbose = $1 ? $1.to_i : 1
      Log.log "setting verbose to #{@verbose}"
      Log.verbose = @verbose

    when "-v", "--invert-match"
      @invert_match = true
      @exit_status  = 0
    when "-i", "--ignore-case"
      @nocase = true

      # grep
    when "-g", "--grep"
      set_grep_output

      # help
    when "-?", "--help"
      GlarkHelp.new
      exit 0

      # regexp explanation
    when "-e", "--explain"
      @explain = true

      # line numbers
    when "-N", "--no-line-number"
      @show_line_numbers = false
    when "-n", "--line-number"
      @show_line_numbers = true

      # quiet
    when "-q", "-s", "--quiet", "--messages"
      @quiet = true
    when "-Q", "-S", "--no-quiet", "--no-messages"
      @quiet = false

    when "-m", "--match-limit"
      @num_matches = args.shift.to_i
      
      # whole words
    when "-w", "--word", "--word-regexp"
      @whole_words = true
    when "-W", "--no-word", "--no-word-regexp"
      @whole_words = false

      # whole lines
    when "-x", "--line-regexp"
      @whole_lines = true
    when "-X", "--no-line-regexp"
      @whole_lines = false

      # file names only
    when "-l", "--files-with-matches"
      @file_names_only = true
      @invert_match = false
    when  "-L", "--files-without-match"
      @file_names_only = true
      @invert_match = true

      # colors
    when "-T", "--text-color"
      @text_highlight = make_highlight(opt, args.shift)
    when "-F", "--file-color"
      @file_highlight = make_highlight(opt, args.shift)

    when "-c", "--count"
      @count = true

    when "-Z", "--null"
      @write_null = true

    when "-M", "--exclude-matching-filenames"
      @exclude_matching_files = true
      
    when "-d"
      @directory = args.shift
    when /^--directories=(\w+)/
      @directory = $1

    when "-r", "--recurse"
      @directory = "recurse"

    when "-o", "-a"
      ec = ExpressionCreator.new(opt, args)
      @expr = ec.expr
      return @expr               # we are done.

    when "-h", /^--with-?filenames?$/
      @show_file_names = true
      
    when "-H", /^--no-?filenames?$/
      @show_file_names = false
      
    when /^--binary-files?=\"?(\w+)\"?/
      @binary_files = $1
      Log.log "set binary_files to #{@binary_files}"

    when "-y", "--extract-matches"
      Log.log "set extract matches"
      @extract_matches = true
      
    when /^(\-\w)(.+)/
      opt, rest = $1, "-" + $2
      puts "opt, rest = #{opt}, #{rest}" if @verbose
      args.unshift(rest)
      puts "args = #{args}" if @verbose
      return process_option(opt, args)
      
      # the expression
    else
      Log.log "not an option: #{opt}"
      if args
        ec = ExpressionCreator.new(opt, args)
        @expr = ec.expr
        return @expr            # we are done.
      end
    end
    return nil                  # we're not done.
  end

  # check options for collisions/data validity
  def validate
    if @range_start && @range_end
      pctre = Regexp.new(/([\.\d]+)%/)
      smd = pctre.match(@range_start)
      emd = pctre.match(@range_end)
      if !smd == !emd
        if smd
          if smd[1].to_f > emd[1].to_f
            puts "ERROR: range start (#{smd}) follows range end (#{emd})"
            exit 2
          end
        elsif @range_start.to_i > @range_end.to_i
          puts "ERROR: range start (#{@range_start}) follows range end (#{@range_end})"
          exit 2
        end
      end
    end
  end

  def show_version
    print @package, ", version ", @version, "\n"
    print "Written by Jeff Pace (jpace@glark.org).\n"
    print "Released under the Lesser GNU Public License.\n"
    exit 0
  end
  
end



begin
  Log.set_widths(15, 5, -30)

  Log.log "loading options"
  GlarkOptions.new($PACKAGE, $VERSION).run
  Log.log "done loading options"

  # To get rid of the annoying stack trace on ctrl-C:
  trap("INT") { abort }

  puts $options.expr if $options.explain

  glark = Glark.new($options.expr)
  files = if ARGV.size > 0 then ARGV else [ '-' ] end

  files.each do |f|
    if $options.exclude_matching_files
      if $options.expr.evaluate(f, 0, 0)
        Log.log "skipping file #{f} with matching name"
        next
      else
        Log.log "not skipping file #{f}"
      end
    end
    glark.search(f) 
  end
rescue => e
  # show only the message, not the stack trace:
  $stderr.puts "error: #{e}"
end
