#!/usr/local/bin/perl

# Julius C. Duque
# v1.1.1 2003 November 12 Wednesday
# v1.2.0 2003 November 14 Friday - Added hyphenation
# v1.3.0 2003 November 16 Sunday - Added hyphenation of overfull lines

use diagnostics;
use strict;
use warnings;
use Getopt::Long;
use TeX::Hyphen;

my ($width, $hyphenate, $left, $centered, $right, $both);
my ($indent, $newline);
GetOptions("width=i" => \$width, "help" => \$hyphenate,
  "left" => \$left, "centered" => \$centered,
  "right" => \$right, "both" => \$both,
  "indent:i" => \$indent, "newline" => \$newline);

my $hyp = new TeX::Hyphen;

syntax() if (!$width);
$indent = 0 if (!$indent);

local $/ = "";

while (<>) {
  my @linein = split;
  printpar(@linein);
  print "\n" if ($newline);
}

sub printpar
{
  my (@par) = @_;
  my $firstline = 0;

  while (@par) {
    $firstline++;
    my ($buffer, $word);
    my ($charcount, $wordlen) = (0, 0);
    my $linewidth = $width;

    if ($firstline == 1) {
      $linewidth -= $indent;
      print " " x $indent;
    }

    while (($charcount < $linewidth) and (@par)) {
      $word = shift @par;
      $buffer .= $word;
      $wordlen = length($word);
      $charcount += $wordlen;
      $buffer .= " ";
      $charcount++;
    }

    chop $buffer;
    $charcount--;

    if ($charcount == $wordlen) {
      $linewidth = $wordlen;
      my ($pos, $pre_word_len) = (0, 0);
      if ($hyphenate) {
        if ($word =~ /^([^a-zA-Z]*)([a-zA-Z-']+)([^a-zA-Z]*)$/) {
          my $pre_word = $1;
          $pre_word_len = length($pre_word);
          my $stripped_word = $2;
          $pos = hyphenate_word($stripped_word, $width);
          $pos = 0 if ($wordlen <= $width);
        }

        if ($pos) {
          $charcount = $pre_word_len + $pos;
          my $post_word = substr $word, $charcount;
          unshift(@par, $post_word);
          $buffer = substr $word, 0, $charcount;
          $buffer .= "-";
          $charcount++;
        }
      }
    }

    my $lineout = $buffer;

    if ($charcount > $linewidth) {
      my ($pos, $pre_word_len) = (0, 0);
      if ($hyphenate) {
        if ($word =~ /^([^a-zA-Z]*)([a-zA-Z-']+)([^a-zA-Z]*)$/) {
          my $pre_word = $1;
          $pre_word_len = length($pre_word);
          my $stripped_word = $2;
          my $unfilled = $linewidth - $charcount + $wordlen
            - $pre_word_len + 1;

          $pos = hyphenate_word($stripped_word, $unfilled);
        }
      }

      $charcount -= $wordlen;

      if ($pos == 0) {
        $charcount--;
        unshift(@par, $word);
      } else {
        my $post_word = substr $word, ($pre_word_len + $pos);
        unshift(@par, $post_word);
        $charcount = $charcount + $pre_word_len + $pos;
      }

      $lineout = substr $buffer, 0, $charcount;

      if ($pos) {
        $lineout .= "-";
        $charcount++;
      }
    }

    my $spaces_to_fill = $linewidth - $charcount;

    if ($centered) {
      my $leftfill = int($spaces_to_fill/2);
      print " " x $leftfill;
    } elsif ($right) {
      print " " x $spaces_to_fill;
    } elsif ($both) {
      my $tempbuf = $lineout;
      my $replacements_made = 0;

      if (@par) {
        my $reps = 1;

        while (length($tempbuf) < $linewidth) {
          last if ($tempbuf !~ /\s/);
          if ($tempbuf =~ /(\S+ {$reps})(\S+)/) {
            $tempbuf =~ s/(\S+ {$reps})(\S+)/$1 $2/;
            $replacements_made++;
            $tempbuf = reverse $tempbuf;
          } else {
            $reps++;
          }
        }
      }

      if ($replacements_made % 2 == 0) {
        $lineout = $tempbuf;
      } else {
        $lineout = reverse $tempbuf;
      }
    }

    print "$lineout\n";
  }
}

sub hyphenate_word
{
  my ($tword, $unfilled) = @_;
  my @hyphen_places = $hyp->hyphenate($tword);

  if (@hyphen_places) {
    @hyphen_places = reverse @hyphen_places;

    foreach my $places (@hyphen_places) {
      return $places if ($places < $unfilled - 1);
    }
  }

  return 0;
}

sub syntax
{
  print "Usage:\n";
  print "  $0 --width=n [options] file1 [file2 file3 ...]\n";
  print "  cat file1 [file2 file3 ...] | $0 --width=n [options]\n\n";
  print "Options:\n";
  print "--width=n (or -w=n or -w n)   Line width is n chars ";
  print "long\n";
  print "--left (or -l)                Left-justified";
  print " (default)\n";
  print "--right (or -r)               Right-justified\n";
  print "--centered (or -c)            Centered\n";
  print "--both (or -b)                Both left- and\n";
  print "                                right-justified\n";
  print "--indent=n (or -i=n or -i n)  Leave n spaces for ";
  print "initial\n";
  print "                                indention (defaults ";
  print "to 0)\n";
  print "--newline (or -n)             Output an empty line \n";
  print "                                between ";
  print "paragraphs\n";
  print "--hyphenate (or -h)           Hyphenate word that ";
  print "doesn't\n";
  print "                                fit on a line\n";
  exit 0;
}

=head1 NAME

paradj - a small Perl script that reformats lines of ASCII text so that
the resulting lines are  justified in any of the following formats:
left-justified  (default), right-justified, centered, or both left- and
right-justified.

=head1 README

Paragraph Adjuster with Hyphenation (PAwH) is a small Perl script that
reformats lines of ASCII text so that the resulting lines are justified
in  any  of  the following  formats:  left-justified  (default),
right-justified, centered, or both left- and right-justified. PAwH has
various  switches, most are optional, to control its output. The  only
mandatory switch is the line width (--width). For PAwH to work properly,
input paragraphs must be separated by blank lines.

PAwH is also capable of hyphenating a word that cannot be accommodated
on a line.

=head1 DESCRIPTION

Paragraph Adjuster with Hyphenation (PAwH) is a small Perl script that
reformats lines of ASCII text so that the resulting lines are justified
in  any  of  the following  formats:  left-justified  (default),
right-justified, centered, or both left- and right-justified. PAwH has
various  switches, most are optional, to control its output. The  only
mandatory switch is the line width (--width). For PAwH to work properly,
input paragraphs must be separated by blank lines.

PAwH is also capable of hyphenating a word that cannot be accommodated
on a line.

=head1 USAGE

You can use PAwH in any of two ways:

./paradj.pl.pl --width=n [options] file1 [file2 file3 ...]

or

cat file1 [file2 file3 ...] | ./paradj.pl --width=n [options]

where file1, file2, file3, and so on, are the files to be reformatted.
There's only one output, though.

=head1 SWITCHES

The available switches are:

--width=n (or -w=n or -w n)
    Line width is n chars long

--left (or -l)
    Output is left-justified (default)

--right (or -r)
    Output is right-justified

--centered (or -c)
    Output is centered

--both (or -b)
    Output is both left- and right-justified

--indent=n (or -i=n or -i n)
    Leave n spaces for initial indention (defaults to 0)

--newline (or -n)
    Insert blank lines between paragraphs

--hyphenate (or -h)
    Hyphenate word that doesn't fit on a line

=head1 EXAMPLES

The  following command reformats the file, LICENSE,  so that the line
width is at most 70 characters,  both left-  and right-justified, with
blank lines inserted between consecutive  paragraphs,  and words that
can't fit at the end of lines are hyphenated.

paradj.pl --width=70 --both --newline --hyphenate LICENSE

You can also use the shortened version:

paradj.pl -w=70 -b -n -h LICENSE

If  you  want to indent each paragraph, just use the --indent  switch.
Say, you want to indent the LICENSE file with 4 leading spaces, type:

paradj.pl --width=70 --both --newline --hyphenate --indent=4 LICENSE

or

paradj.pl -w=70 -b -n -h -i=4 LICENSE


=head1 GUI VERSION

There is also a Perl/Tk version of paradj.pl, called paradj-tk.pl.

=head1 PREREQUISITE

You  need Jan Pazdziora's Perl module, TeX::Hyphen, available from the
Comprehensive Perl Archive Network (CPAN), to use the hyphenation
feature. The latest is version 0.140.

For  Windows  users,  you can install TeX::Hyphen by  following  these
steps:

1. Uncompress the TeX::Hyphen module, TeX-Hyphen-0.140.tar.gz.

2.  Descend (cd) into the TeX-Hyphen-0.140/lib and copy the TeX directory
into <Perl directory>\lib. For example, if your Perl binaries are
installed on E:\Perl, copy the TeX directory into E:\Perl\lib.

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2003 Julius C. Duque <{jcduque}{at}{lycos}{dot}{com}>

This  library is free software; you can redistribute it and/or  modify
it under the same terms as the GNU General Public License.

=pod SCRIPT CATEGORIES

CPAN/Administrative
Fun/Educational

=cut