#!/usr/bin/ruby -w
# -*- ruby -*-

# $Id: cvsdelta,v 1.14 2002/08/08 18:42:35 jeugenepace Exp $

# cvsdelta: summarizes CVS changes and executes the appropriate commands

require 'find'

$verbose = false                # whether to spew debugging output
$stdout.sync = true             # unbuffer output
$stderr.sync = true             # unbuffer output
$quiet = false                  # whether to suppress warnings
$confirm = false                # whether to confirm remove commands
$compression = 3                # compression for net transfer
$changes = true                 # whether to show files that changed
$adds = true                    # whether to show files that were added
$deletes = true                 # whether to show files that were deleted
$execute = false                # whether to execute

$PACKAGE = "cvsdelta"
$VERSION = "1.5.0"


## ------------------------------------------------------- 
## General extensions to Ruby
## -------------------------------------------------------

# A primitive "progress meter", for showing when something time-consuming is
# being done.

class ProgressMeter
  def initialize
    @progress = %w{ | \\ - / }
    @count = 0
  end
  
  def tick(what = nil)
    if $verbose
      print "processing ", (what ? what : "..."), "\n"
    else
      print "\r"
      @count = (@count + 1) % 4
      print @progress[@count]
    end
  end
end


# Extended so that we can convert "Unix" (shell, actually) regular expressions
# ("*.java") to Ruby regular expressions ("/\.java$/").

class Regexp

  # shell expressions to Ruby regular expressions
  @@sh2re = Hash[
    '*' => '.*', 
    '?' => '.',
    '[' => '[',
    ']' => ']',
    '.'  => '\.',
    '$'  => '\$',
    '/'  => '\/'
  ]

  # Returns a regular expression for the given Unix file system expression.
  
  def Regexp.unixre_to_string(pat)
    str = pat.gsub(/(\\.)|(.)/) do
      if $1
        $1
      else
        if @@sh2re.has_key?($2) then
          @@sh2re[$2] 
        else
          $2
        end
      end
    end
    str
  end

end


# Strips the PWD and the leading ./
def convert_filename(fname)
  file = fname.dup
  file.gsub!(Dir.pwd, "")
  file.gsub!(/^\//, "")
  file.sub!(/^\.\//, "")
  file
end


# A hash that ensures that we use file name of the form: "foo/Bar", not
# "./foo/Bar".

class FileHash < Hash

  def []=(f, value)
    file = convert_filename(f)
    super(file, value)
  end

  def [](f)
    file = convert_filename(f)
    super(file)
  end

end


# Additions to the File built-in class.

class File

  # Returns a File::Stat object, or null if there were errors (such as the file
  # not existing, access denied, etc.).
  def File.status(fd)
    begin 
      return File.stat(fd)
    rescue
      # ignore files that could not be read, etc.
      return nil
    end
  end

  # Returns whether the given object is a file. Ignores errors.
  def File.is_file?(fd)
    fs = File.status(fd)
    return fs && fs.file?
  end
  
  # Returns whether the given object is a directory. Ignores errors.
  def File.is_directory?(fd)
    fs = File.status(fd)
    return fs && fs.directory?
  end

  # Returns an array containing each of the names for which the associated block
  # returned true.
  def File.find_where(dir)
    names = Array.new
    Find.find(dir) do |f|
      names.push(f) if yield(f)
    end
    names
  end

  # Returns an array of all files under the given directory.
  def File.find_files(dir)
    File.find_where(dir) { |f| is_file?(f) }
  end

  # Returns an array of all directory under the given directory.
  def File.find_directories(dir)
    File.find_where(dir) { |f| is_directory?(f) }
  end
  
end


## ------------------------------------------------------- 
## Configuration management support
## -------------------------------------------------------

# Maps directories to regular expressions that denote ignored files.

class IgnoredPatterns < Hash

  def initialize(ignorename)
    @ignorename = ignorename
    @dirsread = Array.new
  end

  def read(dir)
    # from the CVS default settings -- ignoring overrides

    puts "reading ignored patterns for " + dir if $verbose

    return if @dirsread.include?(dir)
    @dirsread.push(dir)

    pats = %w{
                  CVS
                  *~
                  .cvsignore
                  *.o
                  *$
                  *.BAK
                  *.Z
                  *.a
                  *.bak
                  *.elc
                  *.exe
                  *.ln
                  *.obj
                  *.olb
                  *.old
                  *.orig
                  *.rej
                  *.so
                  .
                  ..
                  .del-*
                  .make.state
                  .nse_depinfo
                  CVS.adm
                  RCS
                  RCSLOG
                  SCCS
                  TAGS
                  _$*
                  core
                  cvslog.*
                  tags
              }
    
    # can't put these guys in the qw() list:
    ['.#*', '#*', ',*'].each { |p| pats.push(p) }

    # read the repository-wide cvsignore file, if it exists and is local.
    #cvsroot = ENV["CVSROOT"]
    #if cvsroot
    #  cri = cvsroot + "/CVSROOT/cvsignore"
    #  repo = read_ignore_file_named(cri)
    #  pats.push(*repo) unless repo.length == 0
    #end

    # read ~/<ignore>
    homedir = ENV["HOME"]       # unix
    unless homedir              # windows
      homedir  = ENV["HOMEDRIVE"]
      homepath = ENV["HOMEPATH"]
      if homepath then
        if homedir then
          homedir += homepath
        else
          homedir = homepath
        end
      end
    end
    
    global = read_ignore_file(homedir)
    pats.push(*global) unless global.length == 0

    # read <ignore> in the current directory
    local = read_ignore_file(dir)
    pats.push(*local) unless local.length == 0

    # prepend the current directory to the patterns, contending with the fact
    # that the directory might actually be a valid regular expression.

    # wildcard if the pattern is a directory
    pats = pats.collect do |p|
      p += "/*" if File.directory?(dir + "/" + p)
      p
    end

    qdir = Regexp.quote(dir)
    pats = pats.collect do |p| 
      p = Regexp.unixre_to_string(p)
      qdir + "/" + p
    end

    # make a regular expression for each one, to be the entire string (^...$)
    self[dir] = Array.new
    pats.each do |p| 
      re = Regexp.new("^" + p + "$")
      puts "IgnoredPatterns: storing re " + re.source + " for dir " + dir if $verbose
      self[dir].push(re)
    end
  end


    # Reads the ignore file from the given directory, using the default ignore
    # name.

  def read_ignore_file(dir)
    pats = Array.new

    if dir then
      cifile = dir + "/" + @ignorename
      if File.exists?(cifile)
        IO.foreach(cifile) do |line|
          line.chomp!
          line.gsub!(/\+/, '\\+')
          pats.push(*line.split) if line.split.size > 0
        end
      else
        puts "no ignore file in " + dir if $verbose
      end
    end
    pats
  end

    # Reads the given ignore file, if it exists.

  def read_ignore_file_named(fname)
    pats = Array.new

    if File.exists?(fname)
      IO.foreach(fname) do |line|
        line.chomp!
        line.gsub!(/\+/, '\\+')
        pats.push(line)
      end
    else
      puts "no such file " + fname if $verbose
    end
    pats

  end

  # Returns if the file is ignored. Checks the name as both "./name" and "name".

  def is_ignored?(name)
    puts "is_ignored?(" + name + ")" if $verbose
    if name.index("./") == 0
      withpref, nopref = name, name.sub!("./", "")
    else
      withpref, nopref = "./" + name, name
    end
    
    [ withpref, nopref ].each do |name|
      dir = name
      puts "dirs = " + keys.join(", ") if $verbose
      while dir = File.dirname(dir)
        if include?(dir)
          regexps = self[dir]
          regexps.each do |re|
            puts "matching " + name + " against " + re.source if $verbose
            # stop as soon as we find out it is ignored
            return true if re.match(name)
          end
        else
          puts "    dir " + dir + " is not included" if $verbose
        end
        break if dir == "."     # else we'll cycle continuously
      end
    end
    
    return false              # it's not ignored
  end

end


## ------------------------------------------------------- 
## A file that has changed with respect to the configuration management system.
## # This can be one that has been added (a new file), changed (a previously #
## existing file), or deleted (one that has been removed).
## ------------------------------------------------------- 

class DeltaFile

  attr_accessor :adds, :changes, :deletes, :name
  
  def initialize(name)
    # in Ruby, these are primitives, not Objects, so they are not
    # referencing the same primitive value (i.e., this is just like Java)
    @adds = @changes = @deletes = 0
    @name = convert_filename(name)
  end

  def total
    @adds + @changes + @deletes
  end

end


class ExistingFile < DeltaFile

  def symbol; "*"; end
  
end


class DeletedFile < DeltaFile

  def initialize(name)
    super
    # it would be nice to know how long the file was, i.e., many lines were
    # deleted
  end

  def symbol; "-"; end
  
end


class NewFile < DeltaFile

  def initialize(name)
    super
    @adds = IO.readlines(name).length
  end

  def symbol; "+"; end
  
end


## -------------------------------------------------------
## Processing of "diff" output, either contextual ("long form") or unified
## (traditional).
## -------------------------------------------------------

class DiffOutput

  def initialize(total, regexp)
    @total  = total
    @regexp = regexp
    @md     = nil
  end

  def match(line)
    @md = @regexp.match(line)
  end

  def update(record)
    nlines = number_of_lines
    update_record(record, nlines)
    update_record(@total, nlines)
  end

  def to_s
    self.class.to_s + " " + @regexp.source
  end

end


class NormalDiffOutput < DiffOutput

  def initialize(total, letter)
    fmt = '(\d+)(?:,(\d+))?'
    re  = Regexp.new("^" + fmt + letter + fmt)
    super(total, re)
  end

  # Returns the amount of lines that changed, based on the MatchData object
  # which is from standard diff output

  def number_of_lines
    from = diff_difference(1, 2)
    to   = diff_difference(3, 4)
    1 + [from, to].max
  end

  # Returns the difference between the two match data objects, which represent
  # diff output (3,4c4).

  def diff_difference(from, to)
    if @md[to] then @md[to].to_i - @md[from].to_i else 0 end
  end

end


class NormalDiffOutputAdd < NormalDiffOutput

  def initialize(total)
    super(total, 'a')
  end

  def update_record(rec, nlines)
    rec.adds += nlines
  end

end


class NormalDiffOutputChange < NormalDiffOutput

  def initialize(total)
    super(total, 'c')
  end

  def update_record(rec, nlines)
    rec.changes += nlines
  end

end


class NormalDiffOutputDelete < NormalDiffOutput

  def initialize(total)
    super(total, 'd')
  end

  def update_record(rec, nlines)
    rec.deletes += nlines
  end

end


class NormalDiffProcessor

  def initialize(total)
    name = self.class
    @addre = NormalDiffOutputAdd.new(total)
    @delre = NormalDiffOutputDelete.new(total)
    @chgre = NormalDiffOutputChange.new(total)
  end

  def get_tests(line)
    [ @addre, @chgre, @delre ]
  end

end


# Directories listed so that the parents are first in the list.

class DirectoryList < Array
  def initialize(files)
    @list = Array.new
    files.each { |f| add(File.dirname(f)) }
  end

  # add a directory
  def add(dir)
    if dir
      unless File.exists?(dir + "/CVS/Entries")
        # attempt to add the parent
        add(File.dirname(dir))
        pos = index(dir)
        if pos
          # nothing to do; dir is already in the list
        else
          pdpos = index(File.dirname(dir))
          if pdpos
            # parent already in the list, so insert this dir immediately afterward
            self[pdpos + 1, 0] = dir
          else
            # prepending
            unshift(dir)
          end
        end
      end
    end
  end

end


## -------------------------------------------------------
## CVS-specific code
## -------------------------------------------------------

# A difference within a configuration management system.

class CVSDelta
  attr_reader :added, :changed, :deleted, :total

  def initialize(args)
    # for showing that we're actually doing something
    @progress = if $quiet then nil else ProgressMeter.new end

    @ignored_patterns = IgnoredPatterns.new(".cvsignore")

    args = [ "." ] unless args.length > 0

    @added    = FileHash.new
    @changed  = FileHash.new
    @deleted  = FileHash.new
    @total    = DeltaFile.new("total")
    @warned   = Array.new
    @entries  = Array.new
    @entfiles = Array.new

    diffprocessor = NormalDiffProcessor.new(@total)
    curfile = nil

    # backticks seem to work more consistenty than IO.popen, which was losing
    # lines from the CVS diff output.

    args.collect { |a| File.dirname(a) }.uniq.each { |dir| @ignored_patterns.read(dir) }

    if $changes
      # Ignore the .cvsrc file; handle only normal diff output.
      
      # Tweaking compression (via -z[0 .. 9]) makes diff less likely to hang
      # after producing output. Both -z0 and -z9 work best on my system (against
      # the doctorj CVS repository at SourceForge.net).

      cmd = "cvs -fq -z" + $compression.to_s + " diff " + args.join(" ") + " 2>&1"
      puts "executing command " + cmd if $verbose
      
      lines = `#{cmd}`
      lines.each do |line|
        @progress.tick(line) if @progress

        puts "\rline: " + line if $verbose

        if line.index(/^\?\s*(\S*)/)
          # some CVS servers seem to write new files as "? foo/bar.x", but we'll
          # figure out the new files for ourselves anyway
        elsif line.index(/^cvs server:\s*(\S*)was removed/) ||
            line.index(/^cvs (?:diff|server): *cannot find\s*(\S*)/)
          # various ways that CVS servers tell us what was removed, but we'll
          # figure it out for ourself
          # file = $1
          # add_deleted_file(file)
          # puts "deleted file: " + file if $verbose
        elsif line.index(/^Index:\s+(\S+)/)
          curfile = $1
          puts "new current file: " + curfile if $verbose
        elsif diffprocessor
          tests = diffprocessor.get_tests(line)
          tests.each do |re|
            if re.match(line)
              rec = get_record(curfile)
              re.update(rec)
            else
              puts re.to_s + ": not a match line: " + line if $verbose
            end
          end
        end
      end
    else
      puts "not processing changes"
    end

    # determine new files

    if $adds || $deletes
      args.each do |arg|
        if File.directory?(arg)
          File.find_directories(arg).each { |dir| read_entries_file(dir) unless dir.index(/CVS/) }
          File.find_files(arg).each { |f| add_file(f) }
        else
          # read the CVS/Entries file in the directory of the file
          dir = File.dirname(arg)
          read_entries_file(dir) unless dir.index(/CVS/)
          add_file(arg)
        end
      end
    end
  end

  def read_entries_file(dir)
    entfile = dir + "/CVS/Entries"
    if @entfiles.include?(entfile)
      puts "entries file " + entfile + " already read" if $verbose
    elsif !File.exists?(entfile)
      puts "no entries file: " + entfile if $verbose
    else
      puts "reading entries file: " + entfile if $verbose
      IO.foreach(entfile) do |line|
        @progress.tick(dir) if @progress
        file, ver, date = line.split('/')[1 .. 3]
        if file and ver and !file.empty? and !ver.empty?
          fullname = convert_filename(dir + "/" + file)
          puts "adding entry: " + fullname if $verbose 
          @entries.push(fullname)
          unless File.exists?(fullname)
            puts "entry " + fullname + " is missing" if $verbose
            add_deleted_file(fullname)
          end
        end
      end
      @entfiles.push(entfile)
      @ignored_patterns.read(dir)
    end
  end

  def add_file(file)
    if $adds
      fname = convert_filename(file)
      if @entries.include?(fname)
        puts "file already in entries: " + fname if $verbose
      elsif @ignored_patterns.is_ignored?(fname)
        puts "file is ignored: " + fname if $verbose
      else
        puts "adding file: " + fname if $verbose
        if File.readable?(fname)
          unless @added.include?(fname)
            # don't add it twice
            @added[fname] = NewFile.new(fname)
            @total.adds += @added[fname].adds
          end
        else
          unless @warned.include?(fname)
            puts "\rnot readable: " + fname 
            @warned.push(fname)
          end
        end
      end
    else
      puts "not adding new file " + file if $verbose
    end
  end

  def add_deleted_file(file)
    if $deletes
      @deleted[file] = DeletedFile.new(file)
    else
      puts "not adding deleted file " + file if $verbose
    end
  end
  
  def get_record(file)
    @changed[file] = ExistingFile.new(file) unless @changed.include?(file)
    @changed[file]
  end

  def execute
    print "\nEXECUTING COMMANDS\n"
    
    print "\n    ADDs\n"

    if @added.length > 0
      newdirs = DirectoryList.new(@added.keys)
      adds = newdirs + @added.keys
      execute_command(adds, "add")
    end

    if $confirm
      dels = @deleted.keys.reject { |name|
        print "delete " + name + "? "
        ans = $stdin.readline
        ans.upcase[0, 1] != 'Y'
      }
    else
      dels = @deleted.keys
    end

    print "\n    DELETEs\n"
    execute_command(dels, "remove")
  end

  def execute_command(names, command)
    if names.size > 0
      cmd = [ "cvs", command, *names ].join(" ")
      print "        ", cmd, "\n"
      system(cmd)
    elsif $verbose
      puts "no files to " + command
    end
  end
  
end


def print_record(rec, name = nil)
  name = rec.symbol + " " + rec.name unless name
  [rec.total, rec.adds, rec.changes, rec.deletes].each do |v|
    printf("%7d  ", v)
  end
  print name, "\n"
end


def print_change_summary(delta)
  puts
  printf "%-7s  %-7s  %-7s  %-7s  %s\n", "total", "added", "changed", "deleted", "file"
  printf "=======  =======  =======  =======  ====================\n"

  files = Hash.new
  [ delta.added, delta.changed, delta.deleted ].each do |ary|
    ary.each do |file, record| 
      files[file] = record
    end
  end

  files.sort.each { |file, record| print_record(record) }
  printf "-------  -------  -------  -------  --------------------\n";
  print_record(delta.total, "Total")
end


def show_help

  puts "USAGE"
  puts "    cvsdelta [options] directory..."
  puts ""
  puts "OPTIONS"
  puts "    -i, --confirm"
  puts "        Confirm deletions from CVS. Valid only with the execute option."
  puts ""
  puts "    -D, --nodiff"
  puts "        Run without comparing files that exist locally and in CVS."
  puts ""
  puts "    -e, --execute"
  puts "        Run the add and remove commands for the appropriate files."
  puts ""
  puts "    -h, --help"
  puts "        Display this help message."
  puts ""
  puts "    -q, --quiet"
  puts "        Run with minimum output."
  puts ""
  puts "    -v, --version"
  puts "        Display the version and exit."
  puts ""
  puts "    -V, --verbose"
  puts "        Run with maximum output."
  puts ""
  puts "    -z [LEVEL], --compression [LEVEL]"
  puts "        Set the compression to the given level for net traffic."
  puts ""
  puts "See the man page for more information."
  puts ""
end


# we should be running this from a CVS'ed directory
unless File.exists?("CVS")
  $stderr.print "this directory does not appear to be part of a CVS project\n"
end

# returns whether the value matches a true value, such as "yes", "true", or "on".
def to_boolean(value)
  [ "yes", "true", "on" ].include?(value.to_s.downcase)
end

def find_top_of_project(dir = File.expand_path("."))
  repfile = dir + "/CVS/Repository"
  if File.exists?(repfile)
    IO.readlines(repfile).each do |line|
      if line.index("/")
        # keep going up the directory structure
        if dir == "/"
          return nil
        else
          return find_top_of_project(File.dirname(dir))
        end
      else
        return dir
      end
    end
  end
  return nil
end

def process_option(opt, args = nil)
  opt.gsub!(/^\-+/, "")

  case opt
  when "h", "help"
    show_help
    exit
  when "q", "quiet"
    $quiet = true
  when "V", "verbose"
    $verbose = true
  when "e", "execute"
    $execute = true
  when "i", "confirm"
    $confirm = true
  when "nodiff"
    $changes = false
  when "z", "compression"
    # no need to convert it from an integer, since it'll be written back out as a string:
    $compression = args.shift
  when "a", "adds"
    $adds = true
  when "c", "changes"
    $changes = true
  when "d", "deletes"
    $deletes = true
  when "A", "no-adds"
    $adds = false
  when "C", "no-changes"
    $changes = false
  when "D", "no-deletes"
    $deletes = false
  when "V", "version"
    print $PACKAGE, ", version ", $VERSION, "\n"
    print "Written by Jeff Pace (jpace@incava.org).\n"
    print "Released under the Lesser GNU Public License.\n"
    exit 1
  when /^-/
    $stderr.puts "option not understood: " + opt.to_s
  else
    return true
  end
  return false
end

def read_rc_file(rc)
  IO.readlines(rc).each do |line|
    line.sub!(/\s*#.*/, "")
    line.chomp!
    name, value = line.split(/\s*[=:]\s*/)
    next unless name && value

    case name
    when "quiet"
      $quiet = to_boolean(value)
    when "verbose"
      $verbose = to_boolean(value)
    when "execute"
      $execute = to_boolean(value)
    when "confirm"
      $confirm = to_boolean(value)
    when "nodiff"
      $changes = to_boolean(value)
    when "compression"
      # no need to convert it from an integer, since it'll be written back out as a string:
      $compression = value
    when "adds"
      $adds = to_boolean(value)
    when "changes"
      $changes = to_boolean(value)
    when "deletes"
      $deletes = to_boolean(value)
    end
  end
end

# opts = GetoptLong.new(
#                       [ "--adds",          "-a",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--changes",       "-c",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--compression",   "-z",  GetoptLong::REQUIRED_ARGUMENT ],
#                       [ "--confirm",       "-i",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--deletes",       "-d",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--execute",       "-e",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--help",          "-h",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--no-adds",       "-A",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--no-changes",    "-C",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--no-deletes",    "-D",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--nodiff",        "-x",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--quiet",         "-q",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--verbose",       "-V",  GetoptLong::NO_ARGUMENT       ],
#                       [ "--version",       "-v",  GetoptLong::NO_ARGUMENT       ]
#                       )

# opts.each do |o, a|
#   process_option(o, a)
# end

# process the rc file
if ENV["HOME"]
  rc = ENV["HOME"] + "/.cvsdeltarc"
  if File.exists?(rc)
    read_rc_file(rc)
  end
end

# Use the topmost resource file in this project. We may refine this
# functionality so that multiple rc files can be used within a project

topdir = find_top_of_project

if topdir && File.exists?(topdir + "/.cvsdeltarc")
  read_rc_file(topdir + "/.cvsdeltarc")
end

# process the environment variable
if ENV["CVSDELTAOPTS"]
  options = ENV["CVSDELTAOPTS"].split(/\s+/)
  while options.length > 0
    opt = options.shift
    puts "processing opt " + opt
    arg = options.shift
    process_option(opt, options)
  end
end

while ARGV.length > 0
  arg = ARGV.shift
  break if expr = process_option(arg, ARGV)
end

delta = CVSDelta.new(ARGV)

print_change_summary(delta)
delta.execute if $execute
