#!/usr/bin/ruby -w
# -*- ruby -*-

# $Id: cvsdelta,v 1.10 2002/07/31 13:18:04 jeugenepace Exp $

# cvsdelta: summarizes CVS changes and executes the appropriate commands

require 'getoptlong'
require 'find'

$verbose = false                # whether to spew debugging output
$stdout.sync = true             # unbuffer output
$stderr.sync = true             # unbuffer output
$quiet = false                  # whether to suppress warnings
$confirm = false                # whether to confirm remove commands
$diff = true                    # whether to do cvs diff (which hangs often)
$compression = 3                # compression for net transfer

$PACKAGE = "cvsdelta"
$VERSION = "1.4.1"


## ------------------------------------------------------- 
## General extensions to Ruby
## -------------------------------------------------------

# A primitive "progress meter", for showing when something time-consuming is
# being done.

class ProgressMeter
  def initialize
    @progress = %w{ | \\ - / }
    @count = 0
  end
  
  def tick(what = nil)
    if $verbose
      print "processing ", (what ? what : "..."), "\n"
    else
      print "\r"
      @count = (@count + 1) % 4
      print @progress[@count]
    end
  end
end


# Extended so that we can convert "Unix" (shell, actually) regular expressions
# ("*.java") to Ruby regular expressions ("/\.java$/").

class Regexp

  # shell expressions to Ruby regular expressions
  @@sh2re = Hash[
    '*' => '.*', 
    '?' => '.',
    '[' => '[',
    ']' => ']',
    '.'  => '\.',
    '$'  => '\$',
    '/'  => '\/'
  ]

  # Returns a regular expression for the given Unix file system expression.
  
  def Regexp.unixre_to_string(pat)
    str = pat.gsub(/(\\.)|(.)/) do
      if $1
        $1
      else
        if @@sh2re.has_key?($2) then
          @@sh2re[$2] 
        else
          $2
        end
      end
    end
    str
  end

end


# Strips the PWD and the leading ./
def convert_filename(fname)
  file = fname.dup
  file.gsub!(Dir.pwd, "")
  file.gsub!(/^\//, "")
  file.sub!(/^\.\//, "")
  file
end


# A hash that ensures that we use file name of the form: "foo/Bar", not
# "./foo/Bar".

class FileHash < Hash

  def []=(f, value)
    file = convert_filename(f)
    super(file, value)
  end

  def [](f)
    file = convert_filename(f)
    super(file)
  end

end


# Additions to the File built-in class.

class File

  # Returns a File::Stat object, or null if there were errors (such as the file
  # not existing, access denied, etc.).
  def File.status(fd)
    begin 
      return File.stat(fd)
    rescue
      # ignore files that could not be read, etc.
      return nil
    end
  end

  # Returns whether the given object is a file. Ignores errors.
  def File.is_file?(fd)
    fs = File.status(fd)
    return fs && fs.file?
  end
  
  # Returns whether the given object is a directory. Ignores errors.
  def File.is_directory?(fd)
    fs = File.status(fd)
    return fs && fs.directory?
  end

  # Returns an array containing each of the names for which the associated block
  # returned true.
  def File.find_where(dir)
    names = Array.new
    Find.find(dir) do |f|
      names.push(f) if yield(f)
    end
    names
  end

  # Returns an array of all files under the given directory.
  def File.find_files(dir)
    File.find_where(dir) { |f| is_file?(f) }
  end

  # Returns an array of all directory under the given directory.
  def File.find_directories(dir)
    File.find_where(dir) { |f| is_directory?(f) }
  end
  
end


## ------------------------------------------------------- 
## Configuration management support
## -------------------------------------------------------

# Maps directories to regular expressions that denote ignored files.

class IgnoredPatterns < Hash

  def initialize(ignorename)
    @ignorename = ignorename
    @dirsread = Array.new
  end

  def read(dir)
    # from the CVS default settings -- ignoring overrides

    puts "reading ignored patterns for " + dir if $verbose

    return if @dirsread.include?(dir)
    @dirsread.push(dir)

    pats = %w{
                  CVS
                  *~
                  .cvsignore
                  *.o
                  *$
                  *.BAK
                  *.Z
                  *.a
                  *.bak
                  *.elc
                  *.exe
                  *.ln
                  *.obj
                  *.olb
                  *.old
                  *.orig
                  *.rej
                  *.so
                  .
                  ..
                  .del-*
                  .make.state
                  .nse_depinfo
                  CVS.adm
                  RCS
                  RCSLOG
                  SCCS
                  TAGS
                  _$*
                  core
                  cvslog.*
                  tags
              }
    
    # can't put these guys in the qw() list:
    ['.#*', '#*', ',*'].each { |p| pats.push(p) }

    # read the repository-wide cvsignore file, if it exists and is local.
    #cvsroot = ENV["CVSROOT"]
    #if cvsroot
    #  cri = cvsroot + "/CVSROOT/cvsignore"
    #  repo = read_ignore_file_named(cri)
    #  pats.push(*repo) unless repo.length == 0
    #end

    # read ~/<ignore>
    homedir = ENV["HOME"]       # unix
    unless homedir              # windows
      homedir  = ENV["HOMEDRIVE"]
      homepath = ENV["HOMEPATH"]
      if homepath then
        if homedir then
          homedir += homepath
        else
          homedir = homepath
        end
      end
    end
    
    global = read_ignore_file(homedir)
    pats.push(*global) unless global.length == 0

    # read <ignore> in the current directory
    local = read_ignore_file(dir)
    pats.push(*local) unless local.length == 0

    # prepend the current directory to the patterns, contending with the fact
    # that the directory might actually be a valid regular expression.

    # wildcard if the pattern is a directory
    pats = pats.collect do |p|
      p += "/*" if File.directory?(dir + "/" + p)
      p
    end

    qdir = Regexp.quote(dir)
    pats = pats.collect do |p| 
      p = Regexp.unixre_to_string(p)
      qdir + "/" + p
    end

    # make a regular expression for each one, to be the entire string (^...$)
    self[dir] = Array.new
    pats.each do |p| 
      re = Regexp.new("^" + p + "$")
      puts "IgnoredPatterns: storing re " + re.source + " for dir " + dir if $verbose
      self[dir].push(re)
    end
  end


    # Reads the ignore file from the given directory, using the default ignore
    # name.

  def read_ignore_file(dir)
    pats = Array.new

    if dir then
      cifile = dir + "/" + @ignorename
      if File.exists?(cifile)
        IO.foreach(cifile) do |line|
          line.chomp!
          line.gsub!(/\+/, '\\+')
          pats.push(line)
        end
      else
        puts "no ignore file in " + dir if $verbose
      end
    end
    pats
  end

    # Reads the given ignore file, if it exists.

  def read_ignore_file_named(fname)
    pats = Array.new

    if File.exists?(fname)
      IO.foreach(fname) do |line|
        line.chomp!
        line.gsub!(/\+/, '\\+')
        pats.push(line)
      end
    else
      puts "no such file " + fname if $verbose
    end
    pats

  end

  # Returns if the file is ignored. Checks the name as both "./name" and "name".

  def is_ignored?(name)
    puts "is_ignored?(" + name + ")" if $verbose
    if name.index("./") == 0
      withpref, nopref = name, name.sub!("./", "")
    else
      withpref, nopref = "./" + name, name
    end
    
    [ withpref, nopref ].each do |name|
      dir = name
      puts "dirs = " + keys.join(", ") if $verbose
      while dir = File.dirname(dir)
        if include?(dir)
          regexps = self[dir]
          regexps.each do |re|
            puts "matching " + name + " against " + re.source if $verbose
            # stop as soon as we find out it is ignored
            return true if re.match(name)
          end
        else
          puts "    dir " + dir + " is not included" if $verbose
        end
        break if dir == "."     # else we'll cycle continuously
      end
    end
    
    return false              # it's not ignored
  end

end


## ------------------------------------------------------- 
## A file that has changed with respect to the configuration management system.
## # This can be one that has been added (a new file), changed (a previously #
## existing file), or deleted (one that has been removed).
## ------------------------------------------------------- 

class DeltaFile

  attr_accessor :adds, :changes, :deletes, :name
  
  def initialize(name)
    # in Ruby, these are primitives, not Objects, so they are not
    # referencing the same primitive value (i.e., this is just like Java)
    @adds = @changes = @deletes = 0
    @name = convert_filename(name)
  end

  def total
    @adds + @changes + @deletes
  end

end


class ExistingFile < DeltaFile

  def symbol; "*"; end
  
end


class DeletedFile < DeltaFile

  def initialize(name)
    super
    # it would be nice to know how long the file was, i.e., many lines were
    # deleted
  end

  def symbol; "-"; end
  
end


class NewFile < DeltaFile

  def initialize(name)
    super
    @adds = IO.readlines(name).length
  end

  def symbol; "+"; end
  
end


## -------------------------------------------------------
## Processing of "diff" output, either contextual ("long form") or unified
## (traditional).
## -------------------------------------------------------

class DiffOutput

  def initialize(total, regexp)
    @total  = total
    @regexp = regexp
    @md     = nil
  end

  def match(line)
    @md = @regexp.match(line)
  end

  def update(record)
    nlines = number_of_lines
    update_record(record, nlines)
    update_record(@total, nlines)
  end

  def to_s
    self.class.to_s + " " + @regexp.source
  end

end


class NormalDiffOutput < DiffOutput

  def initialize(total, letter)
    fmt = '(\d+)(?:,(\d+))?'
    re  = Regexp.new("^" + fmt + letter + fmt)
    super(total, re)
  end

  # Returns the amount of lines that changed, based on the MatchData object
  # which is from standard diff output

  def number_of_lines
    from = diff_difference(1, 2)
    to   = diff_difference(3, 4)
    1 + [from, to].max
  end

  # Returns the difference between the two match data objects, which represent
  # diff output (3,4c4).

  def diff_difference(from, to)
    if @md[to] then @md[to].to_i - @md[from].to_i else 0 end
  end

end


class NormalDiffOutputAdd < NormalDiffOutput

  def initialize(total)
    super(total, 'a')
  end

  def update_record(rec, nlines)
    rec.adds += nlines
  end

end


class NormalDiffOutputChange < NormalDiffOutput

  def initialize(total)
    super(total, 'c')
  end

  def update_record(rec, nlines)
    rec.changes += nlines
  end

end


class NormalDiffOutputDelete < NormalDiffOutput

  def initialize(total)
    super(total, 'd')
  end

  def update_record(rec, nlines)
    rec.deletes += nlines
  end

end


class NormalDiffProcessor

  def initialize(total)
    name = self.class
    @addre = NormalDiffOutputAdd.new(total)
    @delre = NormalDiffOutputDelete.new(total)
    @chgre = NormalDiffOutputChange.new(total)
  end

  def get_tests(line)
    [ @addre, @chgre, @delre ]
  end

end


# Directories listed so that the parents are first in the list.

class DirectoryList < Array
  def initialize(files)
    @list = Array.new
    files.each { |f| add(File.dirname(f)) }
  end

  # add a directory
  def add(dir)
    if dir
      unless File.exists?(dir + "/CVS/Entries")
        # attempt to add the parent
        add(File.dirname(dir))
        pos = index(dir)
        if pos
          # nothing to do; dir is already in the list
        else
          pdpos = index(File.dirname(dir))
          if pdpos
            # parent already in the list, so insert this dir immediately afterward
            self[pdpos + 1, 0] = dir
          else
            # prepending
            unshift(dir)
          end
        end
      end
    end
  end

end


## -------------------------------------------------------
## CVS-specific code
## -------------------------------------------------------

# A difference within a configuration management system.

class CVSDelta
  attr_reader :added, :changed, :deleted, :total

  def initialize(args)
    # for showing that we're actually doing something
    @progress = if $quiet then nil else ProgressMeter.new end

    @ignored_patterns = IgnoredPatterns.new(".cvsignore")

    args = [ "." ] unless args.length > 0

    @added    = FileHash.new
    @changed  = FileHash.new
    @deleted  = FileHash.new
    @total    = DeltaFile.new("total")
    @warned   = Array.new
    @entries  = Array.new
    @entfiles = Array.new

    diffprocessor = NormalDiffProcessor.new(@total)
    curfile = nil

    # backticks seem to work more consistenty than IO.popen, which was losing
    # lines from the CVS diff output.

    args.collect { |a| File.dirname(a) }.uniq.each { |dir| @ignored_patterns.read(dir) }

    if $diff
      # Ignore the .cvsrc file; handle only normal diff output.
      
      # Tweaking compression (via -z[0 .. 9]) makes diff less likely to hang
      # after producing output. Both -z0 and -z9 work best on my system (against
      # the doctorj CVS repository at SourceForge.net).

      cmd = "cvs -fq -z" + $compression.to_s + " diff " + args.join(" ") + " 2>&1"
      
      lines = `#{cmd}`
      lines.each do |line|
        @progress.tick(line) if @progress

        puts "\rline: " + line if $verbose

        if line.index(/^\?\s*(\S*)/)
          # some CVS servers seem to write new files as "? foo/bar.x", but we'll
          # figure out the new files for ourselves anyway
        elsif line.index(/^cvs server:\s*(\S*)was removed/) ||
            line.index(/^cvs (?:diff|server): *cannot find\s*(\S*)/)
          # various ways that CVS servers tell us what was removed
          file = $1
          add_deleted_file(file)
          puts "deleted file: " + file if $verbose
        elsif line.index(/^Index:\s+(\S+)/)
          curfile = $1
          puts "new current file: " + curfile if $verbose
        elsif diffprocessor
          tests = diffprocessor.get_tests(line)
          tests.each do |re|
            if re.match(line)
              rec = get_record(curfile)
              re.update(rec)
            else
              puts re.to_s + ": not a match line: " + line if $verbose
            end
          end
        end
      end
    end

    # determine new files

    args.each do |arg|
      if File.directory?(arg)
        File.find_directories(arg).each { |dir| read_entries_file(dir) unless dir.index(/CVS/) }
        File.find_files(arg).each { |f| add_file(f) }
      else
        # read the CVS/Entries file in the directory of the file
        dir = File.dirname(arg)
        read_entries_file(dir) unless dir.index(/CVS/)
        add_file(arg) 
      end
    end    
  end

  def read_entries_file(dir)
    entfile = dir + "/CVS/Entries"
    if @entfiles.include?(entfile)
      puts "entries file " + entfile + " already read" if $verbose
    elsif !File.exists?(entfile)
      puts "no entries file: " + entfile if $verbose
    else
      puts "reading entries file: " + entfile if $verbose
      IO.foreach(entfile) do |line|
        @progress.tick(dir) if @progress
        file, ver, date = line.split('/')[1 .. 3]
        if file and ver and !file.empty? and !ver.empty?
          fullname = convert_filename(dir + "/" + file)
          puts "adding entry: " + fullname if $verbose 
          @entries.push(fullname)
          unless File.exists?(fullname)
            puts "entry " + fullname + " is missing" if $verbose
            add_deleted_file(fullname)
          end
        end
      end
      @entfiles.push(entfile)
      @ignored_patterns.read(dir)
    end
  end

  def add_file(file)
    fname = convert_filename(file)
    if @entries.include?(fname)
      puts "file already in entries: " + fname if $verbose
    elsif @ignored_patterns.is_ignored?(fname)
      puts "file is ignored: " + fname if $verbose
    else
      puts "adding file: " + fname if $verbose
      if File.readable?(fname)
        unless @added.include?(fname)
          # don't add it twice
          @added[fname] = NewFile.new(fname)
          @total.adds += @added[fname].adds
        end
      else
        unless @warned.include?(fname)
          puts "\rnot readable: " + fname 
          @warned.push(fname)
        end
      end
    end
  end

  def add_deleted_file(file)
    @deleted[file] = DeletedFile.new(file)
  end
  
  def get_record(file)
    @changed[file] = ExistingFile.new(file) unless @changed.include?(file)
    @changed[file]
  end

  def execute
    print "\nEXECUTING COMMANDS\n"
    
    print "\n    ADDs\n"

    if @added.length > 0
      newdirs = DirectoryList.new(@added.keys)
      adds = newdirs + @added.keys
      execute_command(adds, "add")
    end

    if $confirm
      dels = @deleted.keys.reject { |name|
        print "delete " + name + "? "
        ans = $stdin.readline
        ans.upcase[0, 1] != 'Y'
      }
    else
      dels = @deleted.keys
    end

    print "\n    DELETEs\n"
    execute_command(dels, "remove")
  end

  def execute_command(names, command)
    if names.size > 0
      cmd = [ "cvs", command, *names ].join(" ")
      print "        ", cmd, "\n"
      system(cmd)
    elsif $verbose
      puts "no files to " + command
    end
  end
  
end


def print_record(rec, name = nil)
  name = rec.symbol + " " + rec.name unless name
  [rec.total, rec.adds, rec.changes, rec.deletes].each do |v|
    printf("%7d  ", v)
  end
  print name, "\n"
end


def print_change_summary(delta)
  puts
  printf "%-7s  %-7s  %-7s  %-7s  %s\n", "total", "added", "changed", "deleted", "file"
  printf "=======  =======  =======  =======  ====================\n"

  files = Hash.new
  [ delta.added, delta.changed, delta.deleted ].each do |ary|
    ary.each do |file, record| 
      files[file] = record
    end
  end

  files.sort.each { |file, record| print_record(record) }
  printf "-------  -------  -------  -------  --------------------\n";
  print_record(delta.total, "Total")
end


def show_help

  puts "USAGE"
  puts "    cvsdelta [options] directory..."
  puts ""
  puts "OPTIONS"
  puts "    -c, --confirm"
  puts "        Confirm deletions from CVS. Valid only with the execute option."
  puts ""
  puts "    -D, --nodiff"
  puts "        Run without comparing files that exist locally and in CVS."
  puts ""
  puts "    -e, --execute"
  puts "        Run the add and remove commands for the appropriate files."
  puts ""
  puts "    -h, --help"
  puts "        Display this help message."
  puts ""
  puts "    -q, --quiet"
  puts "        Run with minimum output."
  puts ""
  puts "    -v, --version"
  puts "        Display the version and exit."
  puts ""
  puts "    -V, --verbose"
  puts "        Run with maximum output."
  puts ""
  puts "    -z [LEVEL], --compression [LEVEL]"
  puts "        Set the compression to the given level for net traffic."
  puts ""
  puts "See the man page for more information."
  puts ""
end


# we should be running this from a CVS'ed directory
unless File.exists?("CVS")
  $stderr.print "this directory does not appear to be part of a CVS project\n"
end

execute = false

opts = GetoptLong.new([ "--execute",       "-e",  GetoptLong::NO_ARGUMENT       ],
                      [ "--help",          "-h",  GetoptLong::NO_ARGUMENT       ],
                      [ "--quiet",         "-q",  GetoptLong::NO_ARGUMENT       ],
                      [ "--confirm",       "-c",  GetoptLong::NO_ARGUMENT       ],
                      [ "--verbose",       "-V",  GetoptLong::NO_ARGUMENT       ],
                      [ "--nodiff",        "-D",  GetoptLong::NO_ARGUMENT       ],
                      [ "--compression",   "-z",  GetoptLong::REQUIRED_ARGUMENT ],
                      [ "--version",       "-v",  GetoptLong::NO_ARGUMENT       ])

opts.each do |o, a|
  case o
  when /help/
    show_help
    exit
  when /quiet/
    $quiet = true
  when /verbose/
    $verbose = true
  when /execute/
    execute = true
  when /confirm/
    $confirm = true
  when /nodiff/
    $diff = false
  when /compression/
    $compression = a
  when /version/
    print $PACKAGE, ", version ", $VERSION, "\n"
    print "Written by Jeff Pace (jpace@erols.com).\n"
    print "Released under the Lesser GNU Public License.\n"
    exit 1
  end
end

delta = CVSDelta.new(ARGV)

print_change_summary(delta)
delta.execute if execute
