# $Id: QuoteWrap.pm,v 1.2 2000/09/09 21:17:26 chardin Exp chardin $
package Mail::QuoteWrap;
=head1 NAME
B<Mail::QuoteWrap> - Provides quotification functionality for Usenet articles
and mail.
=head1 SYNOPSIS
use Mail::QuoteWrap;
...
my $columns = 72; # maximum column width of the post
my $output_quotechar = ">"; # character to prepend to
# quoted lines -- see quotify()
my $input_quotechars = "<>:"; # characters to be recognized as
# quotifiers when judging the
# generation of a quote
my $text = $news_article->body(); # get some body text somehow
my $body = create Mail::QuoteWrap ($text, $columns, $output_quotechar,
$input_quotechars, {});
$body->quotify();
$body->append("Me too!");
my $newtext = $body->format();
=head1 DESCRIPTION
A B<Mail::QuoteWrap> object expects its text member to contain a reference to a
list of lines of text, such as the output of methods like
C<body News::Article()>. It can then produce quotified output, optionally
prepended with the quote mark designated by I<output_quotechar>, within the
width specified by I<columns>.
B<Mail::QuoteWrap> specificially does not solve the following problems:
=over 4
=item
It does not handle munged quote characters, such as those produced
by the AOHell newsreader or similar gunge:
>> This >is a second-generation quote, but it
> looks >>like a nasty mix of first- and second->
> generation >>material.
=item
It does not automatically detect and bypass news or mail headers. That
is not the role of this object.
=item
It assumes a paragraph structure to the quoted text and doesn't try to enforce
any other. If you want a module that detects document structure and deals
well with it, look at B<Text::Autoformat>.
=back
=head1 REQUIRES
This module uses B<Text::Format>.
=head1 BUGS
=over 4
=item
If I<input_quotechars> or I<output_quotechar> contain suckful characters
that regexp thinks it understands, all hell can break loose.
=item
B<Mail::QuoteWrap> may not deal well with Supercite-style quotification:
Chuck> I believe everything I see written on toilet paper
is thought to be zeroth-generation (unquoted) material.
=back
=head1 AUTHOR
Chuck Hardin <chardin@savageoasis.fc.net>
=head1 COPYRIGHT
This module is copyright 2000, Chuck Hardin.
=head1 LICENSE
This module is distributed under version 2 of the GNU Public License.
=cut
use strict;
use Text::Format;
my $VERSION = '0.01';
=head1 Public class method
=head2 create
public class
(Mail::QuoteWrap) create(string[] text [,integer columns]
[,string output_quotechar] [,string input_quotechars]
[,hashref format_params])
This method creates a B<Mail::QuoteWrap> object populated with the
parameters passed in. It returns a NULL object if any of the
provided parameters are invalid.
The meanings of the members are as follows:
=over 4
=item text
The body text of the message.
=item columns
The width to which the message should be justified. NOTE: If any line
consisting of the quotification string and the first word is wider than this,
then the line will be generated with that quotification string and that word,
and it will overflow. Life is hard.
=item output_quotechar
The quotification character to prepend to the text when quoting. See
C<quotify()>.
=item input_quotechars
The set of characters to be recognized as quotification marks when determining
how to group quoted material.
=item format_params
Miscellaneous parameters to pass for formatting. See the documentation for the
B<Text::Format> module.
=back
=cut
sub create {
my ($class, $text, $columns, $output_quotechar, $input_quotechars, $format_params) = @_;
# check parameters for reasonableness
return undef if defined $text && !ref($text);
return undef if defined $columns && ref($columns);
return undef if defined $output_quotechar && ref($output_quotechar);
return undef if defined $input_quotechars && ref($input_quotechars);
return undef if defined $format_params && !ref($format_params);
return "Quotification character is a multiple-generation quote character!" if ( defined $output_quotechar && defined $input_quotechars && quote_generation($output_quotechar, $input_quotechars) > 1);
# set up parameter hash
my $params = {};
$params->{text} = $text;
$params->{columns} = $columns;
$params->{output_quotechar} = $output_quotechar;
$params->{input_quotechars} = $input_quotechars;
$params->{format_params} = $format_params;
# instantiate the object and return it
my $this = new Mail::QuoteWrap($params);
return $this;
}
=head1 Private class method
=head2 new
private class
(Mail::QuoteWrap) new(hashref params)
Creates a B<Mail::QuoteWrap> object populated by the data in I<params>.
=cut
sub new {
my ($class, $params) = @_;
my $this = {};
bless $this, $class;
foreach my $param_name (keys %$params) {
$this->{$param_name} = $params->{$param_name};
}
return $this;
}
=head1 Public instance methods
=head2 text
public instance
(string []) text()
Returns the text member of the current B<Mail::QuoteWrap> object.
=cut
sub text {
my ($this) = @_;
return $this->{text};
}
=head2 set_text
public instance
(string) set_text(string[] text)
Sets the text member of the current B<Mail::QuoteWrap> object. Returns a NULL
string if it succeeds, or a descriptive error message otherwise.
=cut
sub set_text {
my ($this, $text) = @_;
return "Supplied text is not an array ref!" unless defined $text && ref($text);
$this->{text} = $text;
return undef;
}
=head2 columns
public instance
(integer) columns()
Returns the columns member of the current B<Mail::QuoteWrap> object.
=cut
sub columns {
my ($this) = @_;
return $this->{columns};
}
=head2 set_columns
public instance
(string) set_columns(integer columns)
Sets the columns member of the current B<Mail::QuoteWrap> object. Returns a
NULL string if it succeeds, or a descriptive error message otherwise.
=cut
sub set_columns {
my ($this, $columns) = @_;
return "Number of columns is invalid!" unless $columns && !ref($columns);
$this->{columns} = $columns;
return undef;
}
=head2 input_quotechars
public instance
(string) input_quotechars()
Returns the input_quotechars member of the current B<Mail::QuoteWrap> object.
=cut
sub input_quotechars {
my ($this) = @_;
return $this->{input_quotechars};
}
=head2 set_input_quotechars
public instance
(string) set_input_quotechars(string input_quotechars)
Sets the input_quotechars member of the current B<Mail::QuoteWrap>
object. Returns a NULL string if it succeeds, or a descriptive error
message otherwise.
=cut
sub set_input_quotechars {
my ($this, $input_quotechars) = @_;
return "Input quote characters are invalid!" unless defined $input_quotechars && !ref($input_quotechars);
$this->{input_quotechars} = $input_quotechars;
return undef;
}
=head2 output_quotechar
public instance
(string) output_quotechar()
Returns the output_quotechar member of the current B<Mail::QuoteWrap> object.
=cut
sub output_quotechar {
my ($this) = @_;
return $this->{output_quotechar};
}
=head2 set_output_quotechar
public instance
(string) set_output_quotechar(string output_quotechar)
Sets the output_quotechar member of the current B<Mail::QuoteWrap>
object. Returns a NULL string if it succeeds, or a descriptive error
message otherwise.
=cut
sub set_output_quotechar {
my ($this, $output_quotechar) = @_;
return "Quotification character is invalid!" unless defined $output_quotechar && !ref($output_quotechar);
return "Quotification character is a multiple-generation quote character!" if ( defined $this->input_quotechars() && quote_generation($output_quotechar, $this->input_quotechars()) > 1);
$this->{output_quotechar} = $output_quotechar;
return undef;
}
=head2 format_params
public instance
(hashref) format_params()
Returns the format_params member of the current B<Mail::QuoteWrap> object.
=cut
sub format_params {
my ($this) = @_;
return $this->{format_params};
}
=head2 set_format_params
public instance
(string) set_format_params(hashref format_params)
Sets the format_params member of the current B<Mail::QuoteWrap>
object. Returns a NULL string if it succeeds, or a descriptive error
message otherwise.
=cut
sub set_format_params {
my ($this, $format_params) = @_;
return "Supplied format_params is not a hashref!" unless defined $format_params && ref($format_params);
$this->{format_params} = $format_params;
return undef;
}
=head2 quotify
public instance
(string) quotify()
Quotifies all current text with the string in C<output_quotechar()>.
Modifies the I<input_quotechars> member to reflect that the text is now
quotified. Returns a NULL string if it succeeds, or a descriptive error
message otherwise.
=cut
sub quotify {
my ($this) = @_;
# Load members of the current object for slightly faster reference
my $input_quotechars = $this->input_quotechars();
my $output_quotechar = $this->output_quotechar();
my $text = $this->text();
# Check that the necessary members are valid. We do not care about
# format_text or columns at this time.
return "Supplied text is not valid!" unless defined($text) && ref($text);
return "Supplied input_quotechars is not valid!" unless defined($input_quotechars) && !ref($input_quotechars);
return "Supplied output_quotechar is not valid!" unless defined($output_quotechar) && !ref($output_quotechar);
# construct the quoted text
my @new_text = ();
foreach my $line (@$text) {
my $new_line = $output_quotechar.$line;
push @new_text, $new_line;
}
# add output_quotechar to the input_quotechars member; we do this in
# case output_quotechar is not included in input_quotechars, so that the
# text can be recognized as quoted material.
$this->{input_quotechars} .= $output_quotechar;
# put the quoted text into the object
return $this->set_text(\@new_text);
}
=head2 format
public instance
(string) format()
This method alters the I<text> member of the current
B<Mail::QuoteWrap> object to conform to the constraints implied in the
I<columns> and I<format_params> members. It recognizes the
quotification characters in I<input_quotechars> and uses them to lump
related quoted material together. C<format()> will use the same
quotification character at the beginning of each line within a block
of quoted material which it believes to be related. Returns a NULL
string if it succeeds, or a descriptive error message otherwise.
=cut
sub format {
my ($this) = @_;
# Load members of the current object for slightly faster reference
my $text = $this->text();
my $columns = $this->columns();
my $input_quotechars = $this->input_quotechars();
my $format_params = $this->{format_params};
# Check that the necessary members are valid. We care about all of the
# members at this time except for output_quotechar.
return "Supplied text is invalid!" unless defined($text) && ref($text);
return "Supplied input_quotechars is invalid!" unless defined($input_quotechars) && !ref($input_quotechars);
return "Supplied columns is invalid!" unless $columns && !ref($columns);
return "Supplied format_params is invalid!" unless defined($format_params) && ref($format_params);
# break the text into blocks of same-generation quoted material
my $broken_into_blocks = break_text_into_blocks($text, $input_quotechars);
my @new_text = ();
# set up each block to be converted into paragraphs, justified and
# formatted
foreach my $block (@$broken_into_blocks) {
my $message_block = [];
foreach my $message_line (@{$block->{message}}) {
push @$message_block, $message_line;
}
# calculate column width for the message text, defaulting to 1 in the
# case that the quotification characters are wider than the specified
# justification. Text::Format will deal.
my $width = $this->columns() - length($block->{quotification});
$format_params->{columns} = ($width > 0) ? $width : 1;
# default to not indenting the first line of every paragraph
$format_params->{firstIndent} = 0 unless exists $format_params->{firstIndent};
# construct the new, formatted block
my @new_message_block = ();
# must special-case for a message block with only whitespace text
# since Text::Format tends to mess these over
if ( (join ' ', @$message_block) =~ /^[\t ]$/ ) {
push @new_message_block, " ";
}
else {
my ($paragraphs) = break_block_into_paragraphs($message_block);
my $formatted_message_block = new Text::Format($format_params);
foreach my $paragraph (@$paragraphs) {
push @new_message_block, $formatted_message_block->format($paragraph);
}
}
# construct the output, removing newlines from the end of each line
foreach my $message_line (@new_message_block) {
my $line = $block->{quotification}.$message_line;
chomp $line;
push @new_text, $line;
}
}
return $this->set_text(\@new_text);
}
=head1 Private utility methods
=head2 parse_quotification
private
(string, string) parse_quotification (string text, string quotechars)
Returns two strings: the quotification part of the line of text (consisting
of all characters at the beginning of the line which are tabs, spaces, or
characters in I<quotechars>), and the remainder of the line. Returns two NULL
strings if this matching does not work out.
=cut
sub parse_quotification {
my ($text, $quotechars) = @_;
return (undef, undef) unless (my ($quotification, $message) = ($text =~ /^([$quotechars \t]*)(.*)$/));
($quotification, my $whitespace) = ( $quotification =~ /^([$quotechars \t]*?)([ \t]*)$/);
$message = $whitespace . $message;
$message = " " unless length $message;
return ($quotification, $message);
}
=head2 quote_generation
private
(integer) quote_generation(string quotification, string quotechars)
Given the quotification portion of a line of text and the accepted quote
characters, returns the presumed generation of the quote (zeroth -- original
text, first -- once-quoted text, etc.)
=cut
sub quote_generation {
my ($quotification, $quotechars) = @_;
$quotification =~ tr/[^$quotechars]//;
return length $quotification;
}
=head2 break_text_into_blocks
private
(hashref []) break_text_into_blocks (string[] text, string quotechars)
Breaks I<text> into a list of elements, each of which is a hash with the
following elements:
=over 4
=item quotification
Quotification string to use for this block.
=item message
Array ref containing the message text; undef if the message portion is blank.
=back
Each message element is guaranteed to consist of lines of same-generation
quotage -- i.e., a block will contain only first-generation quotes,
second-generation, zeroth-generation, or what have you. Each line with blank
message text gets its own block, to preserve vertical whitespace.
=cut
sub break_text_into_blocks {
my ($text, $quotechars) = @_;
# set up holding areas for the output list and the current block of text
my $outlist = [];
my $current_block = {};
# put the first line into current_block
my $line = shift @$text;
my ($quotification, $message) = parse_quotification($line, $quotechars);
my $current_generation = quote_generation($quotification, $quotechars);
push @{$current_block->{message}}, $message;
$current_block->{quotification} = $quotification;
# if it's a blank line, push onto outlist and clear current_block
if ($message =~ /^\s*$/) {
copy_and_push($current_block, $outlist);
$current_block = {};
}
foreach $line (@$text) {
($quotification, $message) = parse_quotification($line, $quotechars);
my $generation = quote_generation($quotification, $quotechars);
# if it's the start of a new block, push the previous contents onto
# @$outlist, clear $current_block, and set
# $current_block->{quotification}
if (($message =~ /^\s*$/) || ($generation != $current_generation)) {
copy_and_push($current_block, $outlist);
$current_block = {};
$current_block->{quotification} = $quotification;
$current_generation = $generation;
}
push @{$current_block->{message}}, $message;
# if the current line is blank, push it onto @$outlist as well
# and flag $current_generation to force the next line to be its own block
if ($message =~ /^\s*$/) {
copy_and_push($current_block, $outlist);
$current_block = {};
undef $current_generation;
}
}
copy_and_push($current_block, $outlist) if scalar(@{$current_block->{message}});
return $outlist;
}
=head2 copy_and_push
private
(string) copy_and_push(hashref current_block, arrayref outlist)
Pushes a copy of the contents of I<current_block> onto I<outlist>.
I<current_block> is assumed to have two members as described in the
documentation for C<break_text_into_blocks()> above. Returns a NULL
string if it succeeds, or a descriptive error message otherwise.
=cut
sub copy_and_push {
my ($current_block, $outlist) = @_;
return "Current block passed is invalid!" unless defined $current_block && ref($current_block);
return "Output list passed is invalid!" unless defined $outlist && ref($outlist);
return undef unless ref($current_block->{message}) && scalar(@{$current_block->{message}});
my $copy_current_block = {};
$copy_current_block->{quotification} = $current_block->{quotification};
$copy_current_block->{message} = [];
foreach my $line (@{$current_block->{message}}) {
push @{$copy_current_block->{message}}, $line;
}
push @$outlist, $copy_current_block;
return undef;
}
=head2 break_block_into_paragraphs
private
(string[]) break_block_into_paragraphs(string[] block)
Breaks the block into paragraphs according to the following rule:
If the previous line ended with a period and the current line begins with a tab
or at least three spaces, the current line begins a new paragraph.
=cut
sub break_block_into_paragraphs {
my ($block) = @_;
my $prev_line = undef;
my $current_paragraph = [];
my $outlist = [];
foreach my $line (@$block) {
if ( ($prev_line =~ /\.$/) && ( ($line =~ /^ /) || ($line =~ /^\t/) ) ) {
my $copy_para = [];
foreach my $line_in_para (@$current_paragraph) {
push @$copy_para, $line_in_para;
}
push @$outlist, $copy_para;
$current_paragraph = [];
}
push @$current_paragraph, $line;
$prev_line = $line;
}
push @$outlist, $current_paragraph if scalar(@$current_paragraph);
return $outlist;
}
1;
syntax highlighted by Code2HTML, v. 0.9.1