#!/usr/bin/perl # # [ Miguel Thu Feb 17 15:23:32 CET 2005 (@colinux) ] # # Finds BOM signatures and strips them from files. # # More info regarding BOM at http://www.unicode.org/faq/utf_bom.html # use strict; use warnings; sub remove_bom($); if (@ARGV < 2) { print "removebom.pl: removes UTF Byte Order Marks from files.\n\n"; print " Usage: removebom.pl [path] [pattern]\n\n"; print " You must specify both a target directory to be traversed and a file pattern.\n"; exit 1; } my ($dir, $patt) = ($ARGV[0], $ARGV[1]); my @list = `find $dir -iname "*$patt"`; my $enc; open LOG, ">>", "/tmp/removebom.log" or die "Unable to open log file /tmp/removebom.log\n"; chomp @list; print LOG "\n\n\n-------------- removebom.pl $dir $patt --------------\n\n"; foreach my $file (@list) { print LOG remove_bom($file); } close LOG; ############################################################################## # Removes BOMs ############################################################################## sub remove_bom($) { my $fname = shift; my $str; my $enc = "No"; open (FH, "<", $fname) or return "Unable to open file $fname for reading.\n"; { $/ = undef; # only in this scope. $str = ; # read the entire input. } close FH; # Remove UTF-8 Byte Order Mark ($str =~ s/^\xEF\xBB\xBF//) && ($enc = "UTF-8"); # Remove UTF-16 Big Endian Byte Order Mark ($str =~ s/^\xFE\xFF//) && ($enc = "UTF-16BE"); # Remove UTF-16 Little Endian Byte Order Mark ($str =~ s/^\xFF\x/FE/) && ($enc = "UTF-16LE"); # Remove UTF-32 Big Endian Byte Order Mark ($str =~ s/^\x00\x00\xFE\xFF//) && ($enc = "UTF-32BE"); # Remove UTF-32 Little Endian Byte Order Mark ($str =~ s/^\xFF\xFE\x00\x00//) && ($enc = "UTF-32LE"); # # Reopen and truncate file. # open (FH, ">", $fname) or return "Unable to open file $fname for writing.\n"; print FH $str; close FH; return "$enc BOM removed from $fname\n"; }