1 #!/usr/bin/env perl !! 1 #!/usr/bin/perl -w 2 # SPDX-License-Identifier: GPL-2.0 << 3 # 2 # 4 # Clean a text file -- or directory of text fi 3 # Clean a text file -- or directory of text files -- of stealth whitespace. 5 # WARNING: this can be a highly destructive op 4 # WARNING: this can be a highly destructive operation. Use with caution. 6 # 5 # 7 6 8 use warnings; << 9 use bytes; 7 use bytes; 10 use File::Basename; 8 use File::Basename; 11 9 12 # Default options 10 # Default options 13 $max_width = 79; 11 $max_width = 79; 14 12 15 # Clean up space-tab sequences, either by remo 13 # Clean up space-tab sequences, either by removing spaces or 16 # replacing them with tabs. 14 # replacing them with tabs. 17 sub clean_space_tabs($) 15 sub clean_space_tabs($) 18 { 16 { 19 no bytes; # Tab alignmen 17 no bytes; # Tab alignment depends on characters 20 18 21 my($li) = @_; 19 my($li) = @_; 22 my($lo) = ''; 20 my($lo) = ''; 23 my $pos = 0; 21 my $pos = 0; 24 my $nsp = 0; 22 my $nsp = 0; 25 my($i, $c); 23 my($i, $c); 26 24 27 for ($i = 0; $i < length($li); $i++) { 25 for ($i = 0; $i < length($li); $i++) { 28 $c = substr($li, $i, 1); 26 $c = substr($li, $i, 1); 29 if ($c eq "\t") { 27 if ($c eq "\t") { 30 my $npos = ($pos+$nsp+8) & ~7; 28 my $npos = ($pos+$nsp+8) & ~7; 31 my $ntab = ($npos >> 3) - ($pos >> 29 my $ntab = ($npos >> 3) - ($pos >> 3); 32 $lo .= "\t" x $ntab; 30 $lo .= "\t" x $ntab; 33 $pos = $npos; 31 $pos = $npos; 34 $nsp = 0; 32 $nsp = 0; 35 } elsif ($c eq "\n" || $c eq "\r") { 33 } elsif ($c eq "\n" || $c eq "\r") { 36 $lo .= " " x $nsp; 34 $lo .= " " x $nsp; 37 $pos += $nsp; 35 $pos += $nsp; 38 $nsp = 0; 36 $nsp = 0; 39 $lo .= $c; 37 $lo .= $c; 40 $pos = 0; 38 $pos = 0; 41 } elsif ($c eq " ") { 39 } elsif ($c eq " ") { 42 $nsp++; 40 $nsp++; 43 } else { 41 } else { 44 $lo .= " " x $nsp; 42 $lo .= " " x $nsp; 45 $pos += $nsp; 43 $pos += $nsp; 46 $nsp = 0; 44 $nsp = 0; 47 $lo .= $c; 45 $lo .= $c; 48 $pos++; 46 $pos++; 49 } 47 } 50 } 48 } 51 $lo .= " " x $nsp; 49 $lo .= " " x $nsp; 52 return $lo; 50 return $lo; 53 } 51 } 54 52 55 # Compute the visual width of a string 53 # Compute the visual width of a string 56 sub strwidth($) { 54 sub strwidth($) { 57 no bytes; # Tab alignmen 55 no bytes; # Tab alignment depends on characters 58 56 59 my($li) = @_; 57 my($li) = @_; 60 my($c, $i); 58 my($c, $i); 61 my $pos = 0; 59 my $pos = 0; 62 my $mlen = 0; 60 my $mlen = 0; 63 61 64 for ($i = 0; $i < length($li); $i++) { 62 for ($i = 0; $i < length($li); $i++) { 65 $c = substr($li,$i,1); 63 $c = substr($li,$i,1); 66 if ($c eq "\t") { 64 if ($c eq "\t") { 67 $pos = ($pos+8) & ~7; 65 $pos = ($pos+8) & ~7; 68 } elsif ($c eq "\n") { 66 } elsif ($c eq "\n") { 69 $mlen = $pos if ($pos > $mlen); 67 $mlen = $pos if ($pos > $mlen); 70 $pos = 0; 68 $pos = 0; 71 } else { 69 } else { 72 $pos++; 70 $pos++; 73 } 71 } 74 } 72 } 75 73 76 $mlen = $pos if ($pos > $mlen); 74 $mlen = $pos if ($pos > $mlen); 77 return $mlen; 75 return $mlen; 78 } 76 } 79 77 80 $name = basename($0); 78 $name = basename($0); 81 79 82 @files = (); 80 @files = (); 83 81 84 while (defined($a = shift(@ARGV))) { 82 while (defined($a = shift(@ARGV))) { 85 if ($a =~ /^-/) { 83 if ($a =~ /^-/) { 86 if ($a eq '-width' || $a eq '-w') { 84 if ($a eq '-width' || $a eq '-w') { 87 $max_width = shift(@ARGV)+0; 85 $max_width = shift(@ARGV)+0; 88 } else { 86 } else { 89 print STDERR "Usage: $name [-width 87 print STDERR "Usage: $name [-width #] files...\n"; 90 exit 1; 88 exit 1; 91 } 89 } 92 } else { 90 } else { 93 push(@files, $a); 91 push(@files, $a); 94 } 92 } 95 } 93 } 96 94 97 foreach $f ( @files ) { 95 foreach $f ( @files ) { 98 print STDERR "$name: $f\n"; 96 print STDERR "$name: $f\n"; 99 97 100 if (! -f $f) { 98 if (! -f $f) { 101 print STDERR "$f: not a file\n"; 99 print STDERR "$f: not a file\n"; 102 next; 100 next; 103 } 101 } 104 102 105 if (!open(FILE, '+<', $f)) { 103 if (!open(FILE, '+<', $f)) { 106 print STDERR "$name: Cannot open file: 104 print STDERR "$name: Cannot open file: $f: $!\n"; 107 next; 105 next; 108 } 106 } 109 107 110 binmode FILE; 108 binmode FILE; 111 109 112 # First, verify that it is not a binary fi 110 # First, verify that it is not a binary file; consider any file 113 # with a zero byte to be a binary file. I 111 # with a zero byte to be a binary file. Is there any better, or 114 # additional, heuristic that should be app 112 # additional, heuristic that should be applied? 115 $is_binary = 0; 113 $is_binary = 0; 116 114 117 while (read(FILE, $data, 65536) > 0) { 115 while (read(FILE, $data, 65536) > 0) { 118 if ($data =~ /\0/) { 116 if ($data =~ /\0/) { 119 $is_binary = 1; 117 $is_binary = 1; 120 last; 118 last; 121 } 119 } 122 } 120 } 123 121 124 if ($is_binary) { 122 if ($is_binary) { 125 print STDERR "$name: $f: binary file\n 123 print STDERR "$name: $f: binary file\n"; 126 next; 124 next; 127 } 125 } 128 126 129 seek(FILE, 0, 0); 127 seek(FILE, 0, 0); 130 128 131 $in_bytes = 0; 129 $in_bytes = 0; 132 $out_bytes = 0; 130 $out_bytes = 0; 133 $blank_bytes = 0; 131 $blank_bytes = 0; 134 132 135 @blanks = (); 133 @blanks = (); 136 @lines = (); 134 @lines = (); 137 $lineno = 0; 135 $lineno = 0; 138 136 139 while ( defined($line = <FILE>) ) { 137 while ( defined($line = <FILE>) ) { 140 $lineno++; 138 $lineno++; 141 $in_bytes += length($line); 139 $in_bytes += length($line); 142 $line =~ s/[ \t\r]*$//; # Remo 140 $line =~ s/[ \t\r]*$//; # Remove trailing spaces 143 $line = clean_space_tabs($line); 141 $line = clean_space_tabs($line); 144 142 145 if ( $line eq "\n" ) { 143 if ( $line eq "\n" ) { 146 push(@blanks, $line); 144 push(@blanks, $line); 147 $blank_bytes += length($line); 145 $blank_bytes += length($line); 148 } else { 146 } else { 149 push(@lines, @blanks); 147 push(@lines, @blanks); 150 $out_bytes += $blank_bytes; 148 $out_bytes += $blank_bytes; 151 push(@lines, $line); 149 push(@lines, $line); 152 $out_bytes += length($line); 150 $out_bytes += length($line); 153 @blanks = (); 151 @blanks = (); 154 $blank_bytes = 0; 152 $blank_bytes = 0; 155 } 153 } 156 154 157 $l_width = strwidth($line); 155 $l_width = strwidth($line); 158 if ($max_width && $l_width > $max_widt 156 if ($max_width && $l_width > $max_width) { 159 print STDERR 157 print STDERR 160 "$f:$lineno: line exceeds $max 158 "$f:$lineno: line exceeds $max_width characters ($l_width)\n"; 161 } 159 } 162 } 160 } 163 161 164 # Any blanks at the end of the file are di 162 # Any blanks at the end of the file are discarded 165 163 166 if ($in_bytes != $out_bytes) { 164 if ($in_bytes != $out_bytes) { 167 # Only write to the file if changed 165 # Only write to the file if changed 168 seek(FILE, 0, 0); 166 seek(FILE, 0, 0); 169 print FILE @lines; 167 print FILE @lines; 170 168 171 if ( !defined($where = tell(FILE)) || 169 if ( !defined($where = tell(FILE)) || 172 !truncate(FILE, $where) ) { 170 !truncate(FILE, $where) ) { 173 die "$name: Failed to truncate mod 171 die "$name: Failed to truncate modified file: $f: $!\n"; 174 } 172 } 175 } 173 } 176 174 177 close(FILE); 175 close(FILE); 178 } 176 }
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.