#!/usr/bin/env python

# flawfinder: Find potential security flaws ("hits") in source code.
# Usage:
#   flawfinder [options] [source_code_file]+
#
# See the man page for a description of the options.

version="0.21"

# The output is as follows:
# filename:line_number:column_number [risk_level] (type) function_name: message
#   where "risk_level" goes from 0 to 5. 0=no risk, 5=maximum risk.
# The final output is sorted by risk level, most risky first.
# The ":column_number" isn't displayed by default.
# 
# Currently this program can only analyze C/C++ code.

# Copyright (C) 2001 David A. Wheeler
# This is released under the General Public License (GPL):
# 
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA



import sys, re, string, getopt
import pickle               # To support load/save/diff of hitlist
import os, glob, operator   # To support filename expansion on Windows
import os.path

# Program Options - these are the default values:
show_context = 0
minimum_level = 1
show_immediately = 0
show_inputs = 0          # Only show inputs?
show_columns = 0
never_ignore = 0         # If true, NEVER ignore problems, even if directed.
loadhitlist = None
savehitlist = None
diffhitlist = None
quiet = 0

displayed_header = 0    # Have we displayed the header yet?


def error(message):
  sys.stderr.write("Error: %s\n"% message)


# Support routines: find a pattern.
# To simplify the calling convention, several global variables are used
# and these support routines are defined, in an attempt to make the
# actual calls simpler and clearer.
# 

filename = ""      # Source filename.
linenumber = 0     # Linenumber from original file.
ignoreline = -1    # Line number to ignore.


line_beginning = re.compile( r'(?m)^' )
blank_line     = re.compile( r'(?m)^\s+$' )


class Hit:
  """
  Each instance of Hit is a warning of some kind in a source code file.
  See the rulesets, which define the conditions for triggering a hit.
  Hit is initialized with a tuple containing the following:
    hook: function to call when function name found.
    level: (default) warning level, 0-5. 0=no problem, 5=very risky.
    warning: warning (text saying what's the problem)
    suggestion: suggestion (text suggesting what to do instead)
    category: One of "buffer" (buffer overflow), "race" (race condition),
              "tmpfile" (temporary file creation), "format" (format string).
              Use "" if you don't have a better category.
    url: URL fragment reference.
    other:  A dictionary with other settings.
 
  Other settings usually set:
  
    name: function name
    parameter: the function parameters (0th parameter null)
    input: set to 1 if the function inputs from external sources.
    start: start position (index) of the function name (in text)
    end:  end position of the function name (in text)
    filename: name of file
    line: line number in file
    column: column in line in file
    context_text: text surrounding hit"""

  # Set default values:
  source_position = 2 # By default, the second parameter is the source.
  format_position = 1 # By default, the first parameter is the format.
  input = 0           # By default, this doesn't read input.
  note = ""          # No additional notes.
  filename = ""      # Empty string is filename.

  def __init__(self, data):
    hook, level, warning, suggestion, category, url, other = data
    self.hook, self.level = hook, level
    self.warning, self.suggestion = warning, suggestion
    self.category, self.url = category, url
    for key in other.keys():
      setattr(self, key, other[key])

  def __cmp__(self, other):
    return (cmp(other.level, self.level) or  # Highest risk first.
            cmp(self.filename, other.filename) or 
            cmp(self.line, other.line) or
            cmp(self.column, other.column) or
            cmp(self.name, other.name))

  def __getitem__(self, X):   # Define this so this works: "%(line)" % hit
    return getattr(self, X)

  def show(self):
    sys.stdout.write("%s:" % self.filename)
    if show_columns:
      print ("%(line)s:%(column)s [%(level)s] (%(category)s) "
             "%(name)s: %(warning)s. %(suggestion)s. %(note)s" % self)
    else:
      print ("%(line)s [%(level)s] (%(category)s) "
             "%(name)s: %(warning)s. %(suggestion)s. %(note)s" % self)
    if show_context: print self.context_text



# The "hitlist" is the list of all hits (warnings) found so far.
# Use add_warning to add to it.

hitlist = []

def add_warning(hit):
  global hitlist
  if show_inputs and not hit.input: return
  if hit.level >= minimum_level and linenumber != ignoreline:
    hitlist.append(hit)
    if show_immediately:
      hit.show()

def internal_warn(message):
  print message

# C Language Specific

def extract_c_parameters(text, pos=0):
  "Return a list of the given C function's parameters, starting at text[pos]"
  # '(a,b)' produces ['', 'a', 'b']
  i = pos
  # Skip whitespace and find the "("; if there isn't one, return []:
  while i < len(text):
    if text[i] == '(':                 break
    elif text[i] in string.whitespace: i = i + 1
    else:                              return []
  else:  # Never found a reasonable ending.
    return []
  i = i + 1
  parameters = [""]  # Insert 0th entry, so 1st parameter is parameter[1].
  currentstart = i
  parenlevel = 1
  instring = 0
  incomment = 0
  while i < len(text):
    c = text[i]
    if instring:
      if c == '"': instring = 0
      if c == '\\':
         if text[i:i+2]=='\\"': i = i + 1
         elif text[i:i+2]=='\\\\': i = i + 1
    elif incomment:
      if c == '*' and text[i:i+2]=='*/':
        incomment = 0
        i = i + 1
    else:
      if c == '"': instring = 1
      elif c == '/' and text[i:i+2]=='/*':
         incomment = 1
         i = i + 1
      elif c == '/' and text[i:i+2]=='//':
         while i < len(text) and text[i] != "\n":
           i = i + 1
      elif c == '\\' and text[i:i+2]=='\\"': i = i + 1 # Handle '\"'
      elif c == '(': parenlevel = parenlevel + 1
      elif c == ',' and (parenlevel == 1):
        parameters.append(string.strip(text[currentstart:i]))
        currentstart = i + 1
      elif c == ')':
        parenlevel = parenlevel - 1
        if parenlevel <= 0:
            parameters.append(string.strip(text[currentstart:i]))
            return parameters
      elif c == ';':
          internal_warn("Parsing failed to find end of parameter list; "
                        "semicolon terminated it in %s" % text[pos:pos+200])
          return parameters
    i = i + 1
  internal_warn("Parsing failed to find end of parameter list in %s" %
                text[pos:pos+200])


# These patterns match gettext() and _() for internationalization.
# This is compiled here, to avoid constant recomputation.
# FIXME: assumes simple function call if it ends with ")",
# so will get confused by patterns like  gettext("hi") + function("bye")
# The ?s makes it posible to match multi-line strings.
gettext_pattern = re.compile(r'(?s)^\s*' + 'gettext' + r'\s*\((.*)\)\s*$')
undersc_pattern = re.compile(r'(?s)^\s*' + '_' + r'\s*\((.*)\)\s*$')

def strip_i18n(text):
  "Strip any internationalization function calls surrounding 'text', "
  "such as gettext() and _()."
  match = gettext_pattern.search(text)
  if match: return string.strip(match.group(1))
  match = undersc_pattern.search(text)
  if match: return string.strip(match.group(1))
  return text

p_c_singleton_string = re.compile( r'^\s*"([^\\]|\\[^0-6]|\\[0-6]+)?"\s*$')

def c_singleton_string(text):
  "Returns true if text is a C string with 0 or 1 character."
  if p_c_singleton_string.search(text): return 1
  else: return 0

p_c_constant_string = re.compile( r'^\s*"([^\\]|\\[^0-6]|\\[0-6]+)*"\s*$')

def c_constant_string(text):
  "Returns true if text is a constant C string."
  if p_c_constant_string.search(text): return 1
  else: return 0


# Precompile patterns for speed.

# Patterns indicating the beginning or ending of a chunk.
# I'm not using chunks right now, this may be removed in the future.
c_begin_chunk = re.compile( r'^\s*({|}|if|else|case|switch|for)\b' )
c_end_chunk   = re.compile( r'[{};]\s*$' )


# Currently this is unused:
def setdefault(dict, key, value):
  "Set dict[key]=value if it's not set, then return dict[key]."
  try:
    return dict[key]
  except KeyError:
    dict[key]=value
    return value


def c_buffer(hit):
  source_position = hit.source_position
  if source_position <= len(hit.parameters)-1:
    source=hit.parameters[source_position]
    if c_singleton_string(source):
      hit.level = 1
      hit.note = "Risk is low because the source is a constant character."
    elif c_constant_string(strip_i18n(source)):
      hit.level = max( hit.level - 2, 1)
      hit.note = "Risk is low because the source is a constant string."
  add_warning(hit)


def c_printf(hit):
  format_position = hit.format_position
  if format_position <= len(hit.parameters)-1:
    # Assume that translators are trusted to not insert "evil" formats:
    source = strip_i18n(hit.parameters[format_position])
    if c_constant_string(source):
      # Parameter is constant, so there's no risk of format string problems.
      if hit.name == "snprintf" or hit.name == "vsnprintf":
        hit.level = 1
        hit.warning = \
          "On some very old systems, snprintf is incorrectly implemented " \
          "and permits buffer overflows; there are also incompatible " \
          "standard definitions of it"
        hit.suggestion = "Check it during installation, or use something else"
        hit.category = "port"
      else:
        # We'll pass it on, just in case it's needed, but at level 0 risk.
        hit.level = 0
        hit.note = "Constant format string, so not considered risky."
  add_warning(hit)


p_dangerous_sprintf_format = re.compile(r'%-?([0-9]+|\*)?s')

# sprintf has both buffer and format vulnerabilities.
def c_sprintf(hit):
  source_position = hit.source_position
  if source_position <= len(hit.parameters)-1:
    source=hit.parameters[source_position]
    if c_singleton_string(source):
      hit.level = 1
      hit.note = "Risk is low because the source is a constant character."
    else:
      source = strip_i18n(source)
      if c_constant_string(source):
        if not p_dangerous_sprintf_format.search(source):
          hit.level = max( hit.level - 2, 1)
          hit.note = "Risk is low because the source has a constant maximum length."
        # otherwise, warn of potential buffer overflow (the default)
      else:
        # Ho ho - a nonconstant format string - we have a different problem.
        hit.warning = "Potential format string problem"
        hit.suggestion = "Make format string constant"
        hit.level = 4
        hit.category = "format"
        hit.url = ""
  add_warning(hit)

p_dangerous_scanf_format = re.compile(r'%s')
p_low_risk_scanf_format = re.compile(r'%[0-9]+s')

def c_scanf(hit):
  format_position = hit.format_position
  if format_position <= len(hit.parameters)-1:
    # Assume that translators are trusted to not insert "evil" formats;
    # it's not clear that translators will be messing with INPUT formats,
    # but it's possible so we'll account for it.
    source = strip_i18n(hit.parameters[format_position])
    if c_constant_string(source):
      if p_dangerous_scanf_format.search(source): pass # Accept default.
      elif p_low_risk_scanf_format.search(source):
        hit.level = 1
        hit.note = "Only low-risk scanf formats detected."
      else:
        # No risky scanf request.
        # We'll pass it on, just in case it's needed, but at level 0 risk.
        hit.level = 0
        hit.note = "No risky scanf format detected."
    else:
        # Format isn't a constant.
        hit.note = "If the scanf format is influenceable by an attacker, it's exploitable."
  add_warning(hit)

def normal(hit):
  add_warning(hit)


# "c_ruleset": the rules for identifying "hits" in C (potential warnings).
# It's a dictionary, where the key is the function name causing the hit,
# and the value is a tuple with the following format:
#  (hook, level, warning, suggestion, category, {other})
# See the definition for class "Hit".
# The key can have multiple values separated with "|".

c_ruleset = {
  "strcpy" :
     (c_buffer, 4,
      "does not check for buffer overflows",
      "Consider using strncpy or strlcpy",
      "buffer", "", {}),
  "strcat" :
     (c_buffer, 4,
      "does not check for buffer overflows",
      "Consider using strncat or strlcat",
      "buffer", "", {}),
  "gets":
     (normal, 5, "does not check for buffer overflows",
      "Use fgets() instead", "buffer", "", {'input' : 1}),

  # The "sprintf" hook will raise "format" issues instead if appropriate:
  "sprintf|vsprintf":
     (c_sprintf, 4,
      "does not check for buffer overflows",
      "Use snprintf or vsnprintf",
      "buffer", "", {}),

  "printf|vprintf":
     (c_printf, 4,
      "if format strings can be influenced by an attacker, they can be "
      "exploited",
      "Use a constant for the format specification",
      "format", "", {}),

  "fprintf|vfprintf":
     (c_printf, 4,
      "if format strings can be influenced by an attacker, they can be "
      "exploited",
      "Use a constant for the format specification",
      "format", "", { 'format_position' : 2}),

  # The "syslog" hook will raise "format" issues.
  "syslog":
     (c_printf, 4,
      "if syslog's format strings can be influenced by an attacker, "
      "they can be exploited",
      "Use a constant format string for syslog",
      "format", "", { 'format_position' : 2} ),

  "snprintf|vsnprintf":
     (c_printf, 4,
      "if format strings can be influenced by an attacker, they can be "
      "exploited",
      "Use a constant for the format specification",
      "format", "", { 'format_position' : 3}),

  "scanf|vscanf":
     (c_scanf, 4,
      "the scanf() family's %s operation, without a limit specification, "
      "permits buffer overflows",
      "Specify a limit to %s, or use a different input function",
      "buffer", "", {'input' : 1}),

  "fscanf|sscanf|vsscanf|vfscanf":
     (c_scanf, 4,
      "the scanf() family's %s operation, without a limit specification, "
      "permits buffer overflows",
      "Specify a limit to %s, or use a different input function",
      "buffer", "", {'input' : 1, 'format_position' : 2}),

  "realpath|getopt|getpass|streadd|strecpy|strtrns|getwd":
     (normal, 3,
     "this does not protect against buffer overflows "
     "by itself, so use with caution",
      "",
      "buffer", "dangers-c", {}),

  "access":        # ???: TODO: analyze TOCTOU more carefully.
     (normal, 4,
      "this usually indicates a security flaw.  If an "
      "attacker can change anything along the path between the "
      "call to access() and the file's actual use (e.g., by moving "
      "files), the attacker can exploit the race condition",
      "Set up the correct permissions (e.g., using setuid()) and "
      "try to open the file directly", "race",
      "avoid-race#atomic-filesystem", {}),
  "chown":
     (normal, 5,
      "this accepts filename arguments; if an attacker "
      "can move those files, a race condition results. ",
      "Use fchown( ) instead",
      "race", "", {}),
  "chgrp":
     (normal, 5,
      "this accepts filename arguments; if an attacker "
      "can move those files, a race condition results. ",
      "Use fchgrp( ) instead",
      "race", "", {}),
  "chmod":
     (normal, 5,
      "this accepts filename arguments; if an attacker "
      "can move those files, a race condition results. ",
      "Use fchmod( ) instead",
      "race", "", {}),
  "vfork":
     (normal, 2,
      "on some old systems, vfork() permits race conditions, and it's "
      "very difficult to use correctly", 
      "Use fork() instead",
      "race", "", {}),
  "tmpfile":
     (normal, 2,
      "tmpfile() has a security flaw on some systems (e.g., older "
      "System V systems)",
      "",
      "tmpfile", "", {}),
  "tmpnam|tempnam":
     (normal, 3,
      "temporary file race condition",
      "",
      "tmpfile", "avoid-race", {}),

  # TODO: Detect GNOME approach to mktemp and ignore it.
  "mktemp":
     (normal, 4,
      "temporary file race condition",
      "",
      "tmpfile", "avoid-race", {}),

  # TODO: Need to detect varying levels of danger.
  "execl|execlp|execle|execv|execvp|system|popen":
     (normal, 4,
      "this calls out to a new process and is difficult to use safely",
      "try using a library call that implements the same functionality "
      "if available.",
      "tmpfile", "", {}),

  # Random values.  Don't trigger on "initstate", it's too common a term.
  "drand48|erand48|jrand48|lcong48|lrand48|mrand48|nrand48|random|seed48|setstate|srand|strfry|srandom":
     (normal, 3,
      "This function is not sufficiently random for security-related functions such as key and nonce creation",
      "use a more secure technique for acquiring random values",
      "random", "", {}),

  "getenv":
     (normal, 3, "Environment variables are untrustable input if they can be set by an attacker.  They can have any content and length, and the same variable can be set more than once.",
      "Check environment variables carefully before using them",
      "buffer", "", {'input' : 1}),

  # TODO: There are many more rules to add, esp. for TOCTOU.
  }

template_ruleset = {
  # This is a template for adding new entries (the key is impossible):
  "9":
     (normal, 2,
      "",
      "",
      "tmpfile", "", {}),
  }


def find_column(text, position):
  "Find column number inside line."
  newline = string.rfind(text, "\n", 0, position)
  if newline == -1:
    return position + 1
  else:
    return position - newline

def get_context(text, position):
  "Get surrounding text line starting from text[position]"
  linestart = string.rfind(text, "\n", 0, position+1) + 1
  lineend   = string.find(text, "\n", position, len(text))
  if lineend == -1: lineend = len(text)
  return text[linestart:lineend]

def process_directive():
  "Given a directive, process it."
  global ignoreline
  # TODO: Currently this is just a stub routine that simply removes
  # hits from the current line, if any, and sets a flag if not.
  # Thus, any directive is considered the "ignore" directive.
  if never_ignore: return
  hitfound = 0
  # Iterate backwards over hits, to be careful about the destructive iterator
  for i in xrange(len(hitlist)-1, -1, -1):
    if hitlist[i].line == linenumber:
      del hitlist[i] # DESTROY - this is a DESTRUCTIVE iterator.
      hitfound = 1   # Don't break, because there may be more than one.
  if not hitfound:
    ignoreline = linenumber + 1  # Nothing found - ignore next line.

# Characters that can be in a string.
# 0x4, 4.4e4, etc.
numberset=string.hexdigits+"_x.Ee"

# Patterns for various circumstances:
p_include = re.compile( r'#\s*include\s+(<.*?>|".*?")' )
p_digits  = re.compile( r'[0-9]' )
p_alphaunder = re.compile( r'[A-Za-z_]' )  # Alpha chars and underline.
# A "word" in C.  Note that "$" is permitted -- it's not permitted by the
# C standard in identifiers, but gcc supports it as an extension.
p_c_word = re.compile( r'[A-Za-z_][A-Za-z_0-9$]*' )
# We'll recognize ITS4 and RATS ignore directives, as well as our own,
# for compatibility's sake:
p_directive = re.compile( r'(?i)\s*(ITS4|Flawfinder|RATS):\s*([^\*]*)' )


def process_c_file(f):
  global filename, linenumber, ignoreline
  filename=f
  linenumber = 1
  ignoreline = -1

  incomment = 0
  instring = 0
  linebegin = 1

  if f == "-":
   input = sys.stdin
  else:
   input = open(f, "r")

  # Read ENTIRE file into memory.  Use readlines() to convert \n if necessary.
  # This turns out to be very fast in Python, even on large files, and it
  # eliminates lots of range checking later, making the result faster.

  if not quiet:
    print "Examining", f
    sys.stdout.flush()

  text = string.join(input.readlines(),"")

  i = 0
  while i < len(text):
    # This is a trivial tokenizer that just tries to find "words", which
    # match [A-Za-z_][A-Za-z0-9_]*.  It skips comments & strings.
    # It also skips "#include <...>", which must be handled specially
    # because "<" and ">" aren't usually delimiters.
    # It doesn't bother to tokenize anything else, since it's not used.
    # The following is a state machine with 3 states: incomment, instring,
    # and "normal", and a separate state "linebegin" if at BOL.
    if linebegin:  # If at beginning of line, see if #include is there.
       linebegin = 0
       m = p_include.match(text,i)
       if m:  # Found #include, skip it.  Otherwise: #include <stdio.h>
         i = m.end(0)
         continue
    c = text[i]
    if c == "\n":
      linenumber = linenumber + 1
      linebegin = 1
    i = i + 1   # From here on, text[i] points to next character.
    if c == " ": continue  # Skip whitespace.
    if i < len(text): nextc = text[i]
    else:             nextc = ''
    if incomment:
       if c=='*' and nextc=='/':
           i = i + 1
           incomment = 0
    elif instring:
       if c == '\\' and (nextc == '"' or nextc == '\\'):
          i = i + 1
       elif c == '"':
          instring = 0
    else:
      if c=='/' and nextc=='*':
          m = p_directive.match(text, i+1)  # Is there a directive here?
          if m:
            process_directive()
          i = i + 1
          incomment = 1
      elif c=='/' and nextc=='/':  # "//" comments - skip to EOL.
          m = p_directive.match(text, i+1)  # Is there a directive here?
          if m:
            process_directive()
          while i<len(text) and text[i] != "\n":
            i = i + 1
      elif c=='"':
          instring = 1
      else:
          m = p_c_word.match(text, i-1)
          if m:                        # Do we have a word?
            startpos=i-1
            endpos = m.end(0)
            i = endpos
            word = text[startpos:endpos]
            # print "Word is:", text[startpos:endpos]
            if c_ruleset.has_key(word):  # FOUND A MATCH, setup & call hook.
              # print "HIT: #%s#\n" % word
              hit = Hit(c_ruleset[word])
              hit.name = word
              hit.start, hit.end = startpos, endpos
              hit.line = linenumber
              hit.line, hit.column = linenumber, find_column(text, startpos)
              hit.filename=filename
              hit.context_text = get_context(text, startpos)
              hit.parameters = extract_c_parameters(text, endpos)
              apply(hit.hook, (hit, ))
          elif p_digits.match(c):
            while i<len(text) and p_digits.match(text[i]): # Process a number.
              i = i + 1
          # else some other character, which we ignore.

def expand_ruleset(ruleset):
  # Rulesets can have compressed sets of rules
  # (multiple function names separated by "|".
  # Expand the given ruleset.
  # Note that this for loop modifies the ruleset while it's iterating!
  for rule in ruleset.keys():
    if string.find(rule, "|") != -1:  # We found a rule to expand.
      for newrule in string.split(rule, "|"):
        ruleset[newrule] = ruleset[rule]
      del ruleset[rule]
  # To print out the set of keys in the expanded ruleset, run:
  #   print `ruleset.keys()`

def initialize_ruleset():
  expand_ruleset(c_ruleset)
  print "Number of dangerous functions in C ruleset:", len(c_ruleset)

c_extensions = { '.c' : 1, '.h' : 1,
                 '.ec': 1, '.ecp': 1,  # Informix embedded C.
                 '.pgc': 1,            # Postgres embedded C.
                 '.C': 1, '.cpp': 1, '.cxx': 1, '.cc': 1,    # C++.
                 '.pcc': 1,            # Oracle C++
                 '.hpp': 1, '.H' : 1,  # .h - usually C++.
               }

def maybe_process_file(f):
  # process f, but only if it's source code in a language we can handle.
  # Currently, that means only C/C++, and we check if the filename has a
  # known C/C++ filename extension.  If it doesn't, we ignore the file.
  dotposition = string.rfind(f, ".")
  if dotposition > 1:
    extension = f[dotposition:]
    if c_extensions.has_key(extension):
      process_c_file(f)

def call_maybe_process_file(dummy, dirname, filenames):
  for file in filenames:
     maybe_process_file(os.path.join(dirname, file))

# Show the header, but only if it hasn't been shown yet.
def display_header():
  global displayed_header
  if not displayed_header:
    print "Flawfinder version %s, (C) 2001 David A. Wheeler." % version
    displayed_header = 1

def process_dir_or_file(f):
  # process the file or directory - ALWAYS normal files given on the
  # command line, to give users control over what's processed:
  if os.path.isfile(f):
     process_c_file(f)
  elif os.path.isdir(f):
     os.path.walk(f, call_maybe_process_file, "")
  else:
     print "Warning: skipping file", f
     
def usage():
  print """
flawfinder [--help] [--context]  [-c]  [--columns]  [  -m  X ] [ -minlevel=X ] 
           [--immediate] [-i] [--inputs] [-n] [--neverignore] [--quiet] 
           [--loadhitlist=F ] [ --savehitlist=F ] [ --diffhitlist=F ] 
           [--] [ source code file or source root directory ]+
           
  --help      Show this usage help

  --context  
  -c          Show context (the line having the "hit"/potential flaw)  
  
  --columns   Show  the  column  number  (as well as the file name and 
              line number) of each hit; this is shown after the line number 
              by adding a colon and the column number in the line (the first 
              character in a line is column number 1).
  
  -m X  
  --minlevel=X
              Set minimum risk level to X for inclusion in hitlist.  This 
              can be from 0 (``no risk'')  to  5  (``maximum  risk'');  the
              default is 1.
  
  --neverignore
  -n          Never ignore security issues, even if they have an ``ignore'' 
              directive in a comment.
  
  --immediate  
  -i          Immediately display hits (don't just wait until the end).
  
  --inputs    Show only functions that obtain data from outside the program; 
              this also sets minlevel to 0.
  
  --quiet     Don't display status information (i.e., which files are being 
              examined) while the analysis is going on.
  
  --loadhitlist=F
              Load hits from F instead of analyzing source programs.
  
  --savehitlist=F
              Save all hits (the "hitlist") to F.
  
  --diffhitlist=F
              Show only hits (loaded or analyzed) not in F.
           
  
  For more information, please consult the manpage or available
  documentation.
"""

def process_options():
  global show_context, show_inputs, minimum_level, show_immediately
  global show_columns, never_ignore, quiet
  global loadhitlist, savehitlist, diffhitlist
  # Note - as a side-effect, this sets sys.argv[].
  optlist, args = getopt.getopt(sys.argv[1:], "cm:nih?",
                  ["context", "minlevel=", "immediate", "inputs", "columns",
                   "neverignore", "quiet",
                   "loadhitlist=", "savehitlist=", "diffhitlist=",
                   "version", "help" ])
  for (opt,value) in optlist:
    if   opt == "--context" or opt == "-c":
      show_context = 1
    elif opt == "--columns":
      show_columns = 1
    elif opt == "--quiet":
      quiet = 1
    elif opt == "--inputs":
      show_inputs = 1
      minimum_level = 0
    elif opt == "--minlevel" or opt == "-m":
      minimum_level = string.atoi(value)
    elif opt == "--immediate" or opt == "-i":
      show_immediately = 1
    elif opt == "-n" or opt == "--neverignore":
      never_ignore = 1
    elif opt == "--loadhitlist":
      loadhitlist = value
      display_header()
      print "Loading hits from", value
    elif opt == "--savehitlist":
      savehitlist = value
      display_header()
      print "Saving hitlist to", value
    elif opt == "--diffhitlist":
      diffhitlist = value
      display_header()
      print "Showing hits not in", value
    elif opt == "--version":
      print version
      sys.exit(0)
    elif opt in [ '-h', '-?', '--help' ]:
      usage()
      sys.exit(0)
  # For DOS/Windows, expand filenames; for Unix, DON'T expand them
  # (the shell will expand them for us).  Some sloppy Python programs
  # always call "glob", but that's WRONG -- on Unix-like systems that
  # will expand twice.  Python doesn't have a clean way to detect
  # "has globbing occurred", so this is the best I've found:
  if os.name == "windows" or os.name == "nt" or os.name == "dos":
     sys.argv[1:] = reduce(operator.add, map(glob.glob, args))
  else:
     sys.argv[1:] = args


def process_files():
  global hitlist
  if loadhitlist:
    f = open(loadhitlist)
    hitlist = pickle.load(f)
  else:
    for f in sys.argv[1:]:
      process_dir_or_file(f)

def show_final_results():
  global hitlist
  count = 0
  if show_immediately:   # Separate the final results.
    print
    print "FINAL RESULTS:"
  hitlist.sort()
  if diffhitlist:
    diff_file = open(diffhitlist)
    diff_hitlist = pickle.load(diff_file)
    for h in hitlist:
      if h not in diff_hitlist:
        h.show()
        count = count + 1
    diff_file.close()
    if count > 0:
      print "Number of hits not in original histlist =", count
    else:
      print "No hits found that weren't already in the hitlist."
  else:
    for h in hitlist:
      h.show()
    count = len(hitlist)
    if count > 0:
      print "Number of hits =", count
    else:
      print "No hits found."
  if count > 0: print "Not every hit is necessarily a security vulnerability."
  print "There may be other security vulnerabilities; review your code!"

def save_if_desired():
  # We'll save entire hitlist, even if only differences displayed.
  if savehitlist:
    print "Saving hitlist to", savehitlist
    f = open(savehitlist, "w")
    pickle.dump(hitlist, f)
    f.close()

def flawfind():
  process_options()
  display_header()
  initialize_ruleset()
  process_files()
  show_final_results()
  save_if_desired()

if __name__ == '__main__':
  flawfind()

