Skip to content
Snippets Groups Projects
cleanfile 3.41 KiB
#!/usr/bin/perl -w
#
# Clean a text file -- or directory of text files -- of stealth whitespace.
# WARNING: this can be a highly destructive operation.  Use with caution.
#

use bytes;
use File::Basename;

# Default options
$max_width = 79;

# Clean up space-tab sequences, either by removing spaces or
# replacing them with tabs.
sub clean_space_tabs($)
{
    no bytes;			# Tab alignment depends on characters

    my($li) = @_;
    my($lo) = '';
    my $pos = 0;
    my $nsp = 0;
    my($i, $c);

    for ($i = 0; $i < length($li); $i++) {
	$c = substr($li, $i, 1);
	if ($c eq "\t") {
	    my $npos = ($pos+$nsp+8) & ~7;
	    my $ntab = ($npos >> 3) - ($pos >> 3);
	    $lo .= "\t" x $ntab;
	    $pos = $npos;
	    $nsp = 0;
	} elsif ($c eq "\n" || $c eq "\r") {
	    $lo .= " " x $nsp;
	    $pos += $nsp;
	    $nsp = 0;
	    $lo .= $c;
	    $pos = 0;
	} elsif ($c eq " ") {
	    $nsp++;
	} else {
	    $lo .= " " x $nsp;
	    $pos += $nsp;
	    $nsp = 0;
	    $lo .= $c;
	    $pos++;
	}
    }
    $lo .= " " x $nsp;
    return $lo;
}

# Compute the visual width of a string
sub strwidth($) {
    no bytes;			# Tab alignment depends on characters

    my($li) = @_;
    my($c, $i);
    my $pos = 0;
    my $mlen = 0;

    for ($i = 0; $i < length($li); $i++) {
	$c = substr($li,$i,1);
	if ($c eq "\t") {
	    $pos = ($pos+8) & ~7;
	} elsif ($c eq "\n") {
	    $mlen = $pos if ($pos > $mlen);
	    $pos = 0;
	} else {
	    $pos++;